47 #include <Bpp/Utils/MapTools.h> 48 #include <Bpp/Numeric/NumTools.h> 49 #include <Bpp/Numeric/VectorTools.h> 64 throw AlphabetException(
"CodonSiteTools::hasGapOrStop: alphabet is not CodonAlphabet", site.getAlphabet());
65 for (
size_t i = 0; i < site.size(); i++)
79 throw AlphabetException(
"CodonSiteTools::hasStop: alphabet is not CodonAlphabet", site.getAlphabet());
80 for (
size_t i = 0; i < site.size(); i++)
82 if (gCode.isStop(site[i]))
94 throw AlphabetException(
"CodonSiteTools::isMonoSitePolymorphic: alphabet is not CodonAlphabet", site.getAlphabet());
97 throw EmptySiteException(
"CodonSiteTools::isMonoSitePolymorphic: Incorrect specified site", &site);
103 vector<int> pos1, pos2, pos3;
105 for (
size_t i = 0; i < site.size(); i++)
112 Site s1(pos1, na), s2(pos2, na), s3(pos3, na);
133 throw AlphabetException(
"CodonSiteTools::isSynonymousPolymorphic: alphabet is not CodonAlphabet", site.getAlphabet());
134 if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
135 throw AlphabetMismatchException(
"CodonSiteTools::isSynonymousPolymorphic: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
137 if (site.size() == 0)
138 throw EmptySiteException(
"CodonSiteTools::isSynonymousPolymorphic: Incorrect specified site", &site);
146 int first_aa = gCode.translate(site[0]);
147 for (
size_t i = 1; i < site.size(); i++)
149 int aa = gCode.translate(site[i]);
163 throw AlphabetException(
"CodonSiteTools::generateCodonSiteWithoutRareVariant: alphabet is not CodonAlphabet", site.getAlphabet());
165 if (site.size() == 0)
166 throw EmptySiteException(
"CodonSiteTools::generateCodonSiteWithoutRareVariant: Incorrect specified site", &site);
170 Site* noRareVariant =
new Site(site);
171 return noRareVariant;
176 map<int, double> freqcodon;
181 for (map<int, double>::iterator it = freqcodon.begin(); it != freqcodon.end(); it++)
183 if (it->second > freqmin && !gCode.isStop(it->first))
185 newcodon = it->first;
189 vector<int> pos1, pos2, pos3;
190 for (
size_t i = 0; i < site.size(); i++)
196 Site s1(pos1, na), s2(pos2, na), s3(pos3, na);
197 map<int, double> freq1;
199 map<int, double> freq2;
201 map<int, double> freq3;
204 for (
size_t i = 0; i < site.size(); i++)
206 if (freq1[s1.getValue(i)] > freqmin && freq2[s2.getValue(i)] > freqmin && freq3[s3.
getValue(i)] > freqmin)
208 codon.push_back(site.getValue(i));
211 codon.push_back(newcodon);
213 Site* noRareVariant =
new Site(codon, ca);
214 return noRareVariant;
240 switch (numberOfDifferences(i, j, *ca))
253 vector<double> path(2, 0);
254 vector<double> weight(2, 1);
257 int trans1 = ca->
getCodon(ci[0], cj[1], ci[2]);
258 int trans2 = ca->
getCodon(ci[0], ci[1], cj[2]);
259 if (!gCode.
isStop(trans1))
268 if (!gCode.
isStop(trans2))
280 int trans1 = ca->
getCodon(cj[0], ci[1], ci[2]);
281 int trans2 = ca->
getCodon(ci[0], ci[1], cj[2]);
282 if (!gCode.
isStop(trans1))
291 if (!gCode.
isStop(trans2))
303 int trans1 = ca->
getCodon(cj[0], ci[1], ci[2]);
304 int trans2 = ca->
getCodon(ci[0], cj[1], ci[2]);
305 if (!gCode.
isStop(trans1))
314 if (!gCode.
isStop(trans2))
325 return VectorTools::max(path);
328 for (
size_t k = 0; k < 2; k++)
330 nbdif += path[k] * weight[k];
333 return nbdif / VectorTools::sum(weight);
337 vector<double> path(6, 0);
338 vector<double> weight(6, 1);
340 int trans100 = ca->
getCodon(cj[0], ci[1], ci[2]);
341 int trans010 = ca->
getCodon(ci[0], cj[1], ci[2]);
342 int trans001 = ca->
getCodon(ci[0], ci[1], cj[2]);
344 int trans110 = ca->
getCodon(cj[0], cj[1], ci[2]);
345 int trans101 = ca->
getCodon(cj[0], ci[1], cj[2]);
346 int trans011 = ca->
getCodon(ci[0], cj[1], cj[2]);
348 if (!gCode.
isStop(trans100))
352 path[0]++; path[1]++;
354 if (!gCode.
isStop(trans110))
363 if (!gCode.
isStop(trans101))
375 weight[0] = 0; weight[1] = 0;
377 if (!gCode.
isStop(trans010))
381 path[2]++; path[3]++;
383 if (!gCode.
isStop(trans110))
392 if (!gCode.
isStop(trans011))
404 weight[2] = 0; weight[3] = 0;
406 if (!gCode.
isStop(trans001))
410 path[4]++; path[5]++;
412 if (!gCode.
isStop(trans101))
421 if (!gCode.
isStop(trans011))
433 weight[4] = 0; weight[5] = 0;
436 return VectorTools::max(path);
439 for (
size_t k = 0; k < 6; k++)
441 nbdif += path[k] * weight[k];
444 return nbdif / VectorTools::sum(weight);
458 throw AlphabetException(
"CodonSiteTools::piSynonymous: alphabet is not CodonAlphabet", site.getAlphabet());
459 if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
460 throw AlphabetMismatchException(
"CodonSiteTools::piSynonymous: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
462 if (site.size() == 0)
463 throw EmptySiteException(
"CodonSiteTools::piSynonymous: Incorrect specified site", &site);
469 map<int, double> freq;
472 for (map<int, double>::iterator it1 = freq.begin(); it1 != freq.end(); it1++)
474 for (map<int, double>::iterator it2 = freq.begin(); it2 != freq.end(); it2++)
476 pi += (it1->second) * (it2->second) * (numberOfSynonymousDifferences(it1->first, it2->first, gCode, minchange));
479 size_t n = site.size();
480 return pi *
static_cast<double>(n / (n - 1));
490 throw AlphabetException(
"CodonSiteTools::piNonSynonymous: alphabet is not CodonAlphabet", site.getAlphabet());
491 if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
492 throw AlphabetMismatchException(
"CodonSiteTools::piNonSynonymous: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
494 if (site.size() == 0)
495 throw EmptySiteException(
"CodonSiteTools::piSynonymous: Incorrect specified site", &site);
500 if (isSynonymousPolymorphic(site, gCode))
503 map<int, double> freq;
507 for (map<int, double>::iterator it1 = freq.begin(); it1 != freq.end(); it1++)
509 for (map<int, double>::iterator it2 = freq.begin(); it2 != freq.end(); it2++)
511 double nbtot =
static_cast<double>(numberOfDifferences(it1->first, it2->first, *ca));
512 double nbsyn = numberOfSynonymousDifferences(it1->first, it2->first, gCode, minchange);
513 pi += (it1->second) * (it2->second) * (nbtot - nbsyn);
516 size_t n = site.size();
517 return pi *
static_cast<double>(n / (n - 1));
529 double nbsynpos = 0.0;
531 int acid = gCode.translate(i);
532 for (
size_t pos = 0; pos < 3; ++pos)
534 for (
int an = 0; an < 4; ++an)
536 if (an == codon[pos])
538 vector<int> mutcodon = codon;
540 int intcodon = ca->
getCodon(mutcodon[0], mutcodon[1], mutcodon[2]);
541 if (gCode.isStop(intcodon))
543 int altacid = gCode.translate(intcodon);
546 if (((codon[pos] == 0 || codon[pos] == 2) && (mutcodon[pos] == 1 || mutcodon[pos] == 3)) ||
547 ((codon[pos] == 1 || codon[pos] == 3) && (mutcodon[pos] == 0 || mutcodon[pos] == 2)))
549 nbsynpos = nbsynpos + 1 / (ratio + 2);
553 nbsynpos = nbsynpos + ratio / (ratio + 2);
568 throw AlphabetException(
"CodonSiteTools::meanNumberOfSynonymousPositions: alphabet is not CodonAlphabet", site.getAlphabet());
569 if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
570 throw AlphabetMismatchException(
"CodonSiteTools::meanNumberOfSynonymousPositions: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
572 if (site.size() == 0)
573 throw EmptySiteException(
"CodonSiteTools::meanNumberOfSynonymousPositions: Incorrect specified site", &site);
577 map<int, double> freq;
579 for (map<int, double>::iterator it = freq.begin(); it != freq.end(); it++)
581 NbSyn += (it->second) * numberOfSynonymousPositions(it->first, gCode, ratio);
593 throw AlphabetException(
"CodonSiteTools::numberOfSubsitutions: alphabet is not CodonAlphabet", site.getAlphabet());
595 if (site.size() == 0)
596 throw EmptySiteException(
"CodonSiteTools::numberOfSubsitutions: Incorrect specified site", &site);
601 if (freqmin > 1. / static_cast<double>(site.size()))
604 newsite =
new Site(site);
608 vector<int> pos1, pos2, pos3;
612 for (
size_t i = 0; i < newsite->
size(); i++)
621 Site s1(pos1, na), s2(pos2, na), s3(pos3, na);
638 throw AlphabetException(
"CodonSiteTools::numberOfNonSynonymousSubstitutions: alphabet is not CodonAlphabet", site.getAlphabet());
639 if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
640 throw AlphabetMismatchException(
"CodonSiteTools::numberOfNonSynonymousSubstitutions: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
642 if (site.size() == 0)
643 throw EmptySiteException(
"CodonSiteTools::numberOfNonSynonymousSubstitutions: Incorrect specified site", &site);
648 if (freqmin > 1. / static_cast<double>(site.size()))
649 newsite = generateCodonSiteWithoutRareVariant(site, gCode, freqmin);
651 newsite =
new Site(site);
655 map<int, size_t> count;
662 for (map<int, size_t>::iterator it1 = count.begin(); it1 != count.end(); it1++)
665 for (map<int, size_t>::iterator it2 = count.begin(); it2 != count.end(); it2++)
667 size_t Ntot = numberOfDifferences(it1->first, it2->first, *ca);
668 size_t Ns = (size_t)numberOfSynonymousDifferences(it1->first, it2->first, gCode,
true);
669 if (Nmin > Ntot - Ns && it1->first != it2->first)
677 return NaSup - Nminmin;
687 throw AlphabetException(
"CodonSiteTools::fixedDifferences: alphabet is not CodonAlphabet (siteIn)", siteIn.getAlphabet());
689 throw AlphabetException(
"CodonSiteTools::fixedDifferences: alphabet is not CodonAlphabet (siteOut)", siteOut.getAlphabet());
690 if (!siteIn.getAlphabet()->equals(*gCode.getSourceAlphabet()))
691 throw AlphabetMismatchException(
"CodonSiteTools::fixedDifferences: siteIn and genetic code have not the same codon alphabet.", siteIn.getAlphabet(), gCode.getSourceAlphabet());
692 if (!siteOut.getAlphabet()->equals(*gCode.getSourceAlphabet()))
693 throw AlphabetMismatchException(
"CodonSiteTools::fixedDifferences: siteOut and genetic code have not the same codon alphabet.", siteOut.getAlphabet(), gCode.getSourceAlphabet());
695 if (siteIn.size() == 0)
696 throw EmptySiteException(
"CodonSiteTools::getFixedDifferences Incorrect specified site", &siteIn);
697 if (siteOut.size() == 0)
698 throw EmptySiteException(
"CodonSiteTools::getFixedDifferences Incorrect specified site", &siteOut);
702 size_t Ntot = numberOfDifferences(i, j, *ca);
703 size_t Ns = (size_t) numberOfSynonymousDifferences(i, j, gCode,
true);
704 size_t Na = Ntot - Ns;
706 vector<int> pos1in, pos2in, pos3in, pos1out, pos2out, pos3out;
708 for (
size_t k = 0; k < siteIn.size(); k++)
719 Site s1in(pos1in, na), s2in(pos2in, na), s3in(pos3in, na);
720 Site s1out(pos1out, na), s2out(pos2out, na), s3out(pos3out, na);
815 for (
size_t i = 0; i < site.
size(); i++)
825 for (
size_t i = 0; i < site.
size(); i++)
bool isFourFoldDegenerated(int codon) const
This alphabet is used to deal NumericAlphabet.
const CodonAlphabet * getSourceAlphabet() const
Get the source alphabet.
virtual int getCodon(int pos1, int pos2, int pos3) const
Get the int code for a codon given the int code of the three underlying positions.
virtual int getFirstPosition(int codon) const
Get the int code of the first position of a codon given its int description.
std::vector< int > getPositions(int word) const
Get the int codes of each position of a word given its int description.
bool areSynonymous(int i, int j) const
Tell if two codons are synonymous, that is, if they encode the same amino-acid.
Exception sent when a empty site is found.
virtual int getValue(size_t pos) const
Get the element at position 'pos' as an int.
The alphabet exception base class.
virtual size_t size() const
Get the number of elements in the list.
virtual int getSecondPosition(int codon) const
Get the int code of the second position of a codon given its int description.
virtual bool isStop(int state) const =0
Tells is a particular codon is a stop codon.
virtual const NucleicAlphabet *const getNucleicAlphabet() const
bool isUnresolved(int state) const
Exception thrown when two alphabets do not match.
Partial implementation of the Transliterator interface for genetic code object.
The abstract base class for nucleic alphabets.
virtual int getThirdPosition(int codon) const
Get the int code of the third position of a codon given its int description.