bpp-seq  2.2.0
CodonSiteTools.cpp
Go to the documentation of this file.
1 //
2 // File CodonSiteTools.cpp
3 // Author : Sylvain Glémin
4 // Last modification : October 2004
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for sequences analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #include "CodonSiteTools.h"
41 #include "Alphabet/CodonAlphabet.h"
42 #include "Alphabet/DNA.h"
43 #include "Alphabet/AlphabetTools.h"
44 #include "SiteTools.h"
47 #include <Bpp/Utils/MapTools.h>
48 #include <Bpp/Numeric/NumTools.h>
49 #include <Bpp/Numeric/VectorTools.h>
50 
51 using namespace bpp;
52 
53 // From the STL:
54 #include <cmath>
55 
56 using namespace std;
57 
58 /******************************************************************************/
59 
60 bool CodonSiteTools::hasGapOrStop(const Site& site, const GeneticCode& gCode) throw (AlphabetException)
61 {
62  // Alphabet checking
63  if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
64  throw AlphabetException("CodonSiteTools::hasGapOrStop: alphabet is not CodonAlphabet", site.getAlphabet());
65  for (size_t i = 0; i < site.size(); i++)
66  {
67  if (site[i] < 0)
68  return true;
69  }
70  return false;
71 }
72 
73 /******************************************************************************/
74 
75 bool CodonSiteTools::hasStop(const Site& site, const GeneticCode& gCode) throw (AlphabetException)
76 {
77  // Alphabet checking
78  if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
79  throw AlphabetException("CodonSiteTools::hasStop: alphabet is not CodonAlphabet", site.getAlphabet());
80  for (size_t i = 0; i < site.size(); i++)
81  {
82  if (gCode.isStop(site[i]))
83  return true;
84  }
85  return false;
86 }
87 
88 /******************************************************************************/
89 
91 {
92  // Alphabet checking
93  if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
94  throw AlphabetException("CodonSiteTools::isMonoSitePolymorphic: alphabet is not CodonAlphabet", site.getAlphabet());
95  // Empty site checking
96  if (site.size() == 0)
97  throw EmptySiteException("CodonSiteTools::isMonoSitePolymorphic: Incorrect specified site", &site);
98 
99  // Global polymorphism checking
100  if (SiteTools::isConstant(site))
101  return false;
102  // initialisation of the 3 sub-sites ot the codon
103  vector<int> pos1, pos2, pos3;
104  const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(site.getAlphabet());
105  for (size_t i = 0; i < site.size(); i++)
106  {
107  pos1.push_back(ca->getFirstPosition(site[i]));
108  pos2.push_back(ca->getSecondPosition(site[i]));
109  pos3.push_back(ca->getThirdPosition(site[i]));
110  }
111  const NucleicAlphabet* na = ca->getNucleicAlphabet();
112  Site s1(pos1, na), s2(pos2, na), s3(pos3, na);
113  // polymorphism checking for each sub-sites
114  size_t nbpol = 0;
115  if (!SiteTools::isConstant(s1))
116  nbpol++;
117  if (!SiteTools::isConstant(s2))
118  nbpol++;
119  if (!SiteTools::isConstant(s3))
120  nbpol++;
121  if (nbpol > 1)
122  return false;
123  return true;
124 }
125 
126 /******************************************************************************/
127 
130 {
131  // Alphabet checking
132  if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
133  throw AlphabetException("CodonSiteTools::isSynonymousPolymorphic: alphabet is not CodonAlphabet", site.getAlphabet());
134  if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
135  throw AlphabetMismatchException("CodonSiteTools::isSynonymousPolymorphic: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
136  // Empty site checking
137  if (site.size() == 0)
138  throw EmptySiteException("CodonSiteTools::isSynonymousPolymorphic: Incorrect specified site", &site);
139 
140  // Global polymorphism checking
141  if (SiteTools::isConstant(site))
142  return false;
143 
144  // Synonymous polymorphism checking
145  vector<int> prot;
146  int first_aa = gCode.translate(site[0]);
147  for (size_t i = 1; i < site.size(); i++)
148  {
149  int aa = gCode.translate(site[i]);
150  if (aa != first_aa)
151  return false;
152  }
153  return true;
154 }
155 
156 /******************************************************************************/
157 
160 {
161  // Alphabet checking
162  if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
163  throw AlphabetException("CodonSiteTools::generateCodonSiteWithoutRareVariant: alphabet is not CodonAlphabet", site.getAlphabet());
164  // Empty site checking
165  if (site.size() == 0)
166  throw EmptySiteException("CodonSiteTools::generateCodonSiteWithoutRareVariant: Incorrect specified site", &site);
167 
168  if (SiteTools::isConstant(site))
169  {
170  Site* noRareVariant = new Site(site);
171  return noRareVariant;
172  }
173  else
174  {
175  // Computation
176  map<int, double> freqcodon;
177  SiteTools::getFrequencies(site, freqcodon);
178  const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(site.getAlphabet());
179  const NucleicAlphabet* na = ca->getNucleicAlphabet();
180  int newcodon = -1;
181  for (map<int, double>::iterator it = freqcodon.begin(); it != freqcodon.end(); it++)
182  {
183  if (it->second > freqmin && !gCode.isStop(it->first))
184  {
185  newcodon = it->first;
186  break;
187  }
188  }
189  vector<int> pos1, pos2, pos3;
190  for (size_t i = 0; i < site.size(); i++)
191  {
192  pos1.push_back(ca->getFirstPosition(site[i]));
193  pos2.push_back(ca->getSecondPosition(site[i]));
194  pos3.push_back(ca->getThirdPosition(site[i]));
195  }
196  Site s1(pos1, na), s2(pos2, na), s3(pos3, na);
197  map<int, double> freq1;
198  SiteTools::getFrequencies(s1, freq1);
199  map<int, double> freq2;
200  SiteTools::getFrequencies(s2, freq2);
201  map<int, double> freq3;
202  SiteTools::getFrequencies(s3, freq3);
203  vector<int> codon;
204  for (size_t i = 0; i < site.size(); i++)
205  {
206  if (freq1[s1.getValue(i)] > freqmin && freq2[s2.getValue(i)] > freqmin && freq3[s3.getValue(i)] > freqmin)
207  {
208  codon.push_back(site.getValue(i));
209  }
210  else
211  codon.push_back(newcodon);
212  }
213  Site* noRareVariant = new Site(codon, ca);
214  return noRareVariant;
215  }
216 }
217 
218 /******************************************************************************/
219 
220 size_t CodonSiteTools::numberOfDifferences(int i, int j, const CodonAlphabet& ca)
221 {
222  size_t nbdif = 0;
223  if (ca.getFirstPosition(i) != ca.getFirstPosition(j))
224  nbdif++;
225  if (ca.getSecondPosition(i) != ca.getSecondPosition(j))
226  nbdif++;
227  if (ca.getThirdPosition(i) != ca.getThirdPosition(j))
228  nbdif++;
229  return nbdif;
230 }
231 
232 /******************************************************************************/
233 
234 double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const GeneticCode& gCode, bool minchange)
235 {
236  const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(gCode.getSourceAlphabet());
237  vector<int> ci = ca->getPositions(i);
238  vector<int> cj = ca->getPositions(j);
239 
240  switch (numberOfDifferences(i, j, *ca))
241  {
242  case 0: return 0;
243  case 1:
244  {
245  if (gCode.areSynonymous(i, j))
246  return 1;
247  return 0;
248  }
249  case 2:
250  {
251  if (gCode.areSynonymous(i, j))
252  return 2;
253  vector<double> path(2, 0); // Vector of number of synonymous changes per path (2 here)
254  vector<double> weight(2, 1); // Weight to exclude path through stop codon
255  if (ci[0] == cj[0])
256  {
257  int trans1 = ca->getCodon(ci[0], cj[1], ci[2]); // transitory codon between NcNiNi et NcNjNj: NcNjNi, Nc = identical site
258  int trans2 = ca->getCodon(ci[0], ci[1], cj[2]); // transitory codon between NcNiNi et NcNjNj: NcNiNj, Nc = identical site
259  if (!gCode.isStop(trans1))
260  {
261  if (gCode.areSynonymous(i, trans1))
262  path[0]++;
263  if (gCode.areSynonymous(trans1, j))
264  path[0]++;
265  }
266  else
267  weight[0] = 0;
268  if (!gCode.isStop(trans2))
269  {
270  if (gCode.areSynonymous(i, trans2))
271  path[1]++;
272  if (gCode.areSynonymous(trans2, j))
273  path[1]++;
274  }
275  else
276  weight[1] = 0;
277  }
278  if (ci[1] == cj[1])
279  {
280  int trans1 = ca->getCodon(cj[0], ci[1], ci[2]); // transitory codon between NiNcNi et NjNcNj: NjNcNi, Nc = identical site
281  int trans2 = ca->getCodon(ci[0], ci[1], cj[2]); // transitory codon between NiNcNi et NjNcNj: NiNcNj, Nc = identical site
282  if (!gCode.isStop(trans1))
283  {
284  if (gCode.areSynonymous(i, trans1))
285  path[0]++;
286  if (gCode.areSynonymous(trans1, j))
287  path[0]++;
288  }
289  else
290  weight[0] = 0;
291  if (!gCode.isStop(trans2))
292  {
293  if (gCode.areSynonymous(i, trans2))
294  path[1]++;
295  if (gCode.areSynonymous(trans2, j))
296  path[1]++;
297  }
298  else
299  weight[1] = 0;
300  }
301  if (ci[2] == cj[2])
302  {
303  int trans1 = ca->getCodon(cj[0], ci[1], ci[2]); // transitory codon between NiNiNc et NjNjNc: NjNiNc, Nc = identical site
304  int trans2 = ca->getCodon(ci[0], cj[1], ci[2]); // transitory codon between NiNiNc et NjNjNc: NiNjNc, Nc = identical site
305  if (!gCode.isStop(trans1))
306  {
307  if (gCode.areSynonymous(i, trans1))
308  path[0]++;
309  if (gCode.areSynonymous(trans1, j))
310  path[0]++;
311  }
312  else
313  weight[0] = 0;
314  if (!gCode.isStop(trans2))
315  {
316  if (gCode.areSynonymous(i, trans2))
317  path[1]++;
318  if (gCode.areSynonymous(trans2, j))
319  path[1]++;
320  }
321  else
322  weight[1] = 0;
323  }
324  if (minchange)
325  return VectorTools::max(path);
326 
327  double nbdif = 0;
328  for (size_t k = 0; k < 2; k++)
329  {
330  nbdif += path[k] * weight[k];
331  }
332 
333  return nbdif / VectorTools::sum(weight);
334  }
335  case 3:
336  {
337  vector<double> path(6, 0);
338  vector<double> weight(6, 1);
339  // First transitory codons
340  int trans100 = ca->getCodon(cj[0], ci[1], ci[2]);
341  int trans010 = ca->getCodon(ci[0], cj[1], ci[2]);
342  int trans001 = ca->getCodon(ci[0], ci[1], cj[2]);
343  // Second transitory codons
344  int trans110 = ca->getCodon(cj[0], cj[1], ci[2]);
345  int trans101 = ca->getCodon(cj[0], ci[1], cj[2]);
346  int trans011 = ca->getCodon(ci[0], cj[1], cj[2]);
347  // Paths
348  if (!gCode.isStop(trans100))
349  {
350  if (gCode.areSynonymous(i, trans100))
351  {
352  path[0]++; path[1]++;
353  }
354  if (!gCode.isStop(trans110))
355  {
356  if (gCode.areSynonymous(trans100, trans110))
357  path[0]++;
358  if (gCode.areSynonymous(trans110, j))
359  path[0]++;
360  }
361  else
362  weight[0] = 0;
363  if (!gCode.isStop(trans101))
364  {
365  if (gCode.areSynonymous(trans100, trans101))
366  path[1]++;
367  if (gCode.areSynonymous(trans101, j))
368  path[1]++;
369  }
370  else
371  weight[1] = 0;
372  }
373  else
374  {
375  weight[0] = 0; weight[1] = 0;
376  }
377  if (!gCode.isStop(trans010))
378  {
379  if (gCode.areSynonymous(i, trans010))
380  {
381  path[2]++; path[3]++;
382  }
383  if (!gCode.isStop(trans110))
384  {
385  if (gCode.areSynonymous(trans010, trans110))
386  path[2]++;
387  if (gCode.areSynonymous(trans110, j))
388  path[2]++;
389  }
390  else
391  weight[2] = 0;
392  if (!gCode.isStop(trans011))
393  {
394  if (gCode.areSynonymous(trans010, trans011))
395  path[3]++;
396  if (gCode.areSynonymous(trans011, j))
397  path[3]++;
398  }
399  else
400  weight[3] = 0;
401  }
402  else
403  {
404  weight[2] = 0; weight[3] = 0;
405  }
406  if (!gCode.isStop(trans001))
407  {
408  if (gCode.areSynonymous(i, trans001))
409  {
410  path[4]++; path[5]++;
411  }
412  if (!gCode.isStop(trans101))
413  {
414  if (gCode.areSynonymous(trans001, trans101))
415  path[4]++;
416  if (gCode.areSynonymous(trans101, j))
417  path[4]++;
418  }
419  else
420  weight[4] = 0;
421  if (!gCode.isStop(trans011))
422  {
423  if (gCode.areSynonymous(trans001, trans011))
424  path[5]++;
425  if (gCode.areSynonymous(trans011, j))
426  path[5]++;
427  }
428  else
429  weight[5] = 0;
430  }
431  else
432  {
433  weight[4] = 0; weight[5] = 0;
434  }
435  if (minchange)
436  return VectorTools::max(path);
437 
438  double nbdif = 0;
439  for (size_t k = 0; k < 6; k++)
440  {
441  nbdif += path[k] * weight[k];
442  }
443 
444  return nbdif / VectorTools::sum(weight);
445  }
446  }
447  // This line is never reached but sends a warning if not there:
448  return 0.;
449 }
450 
451 /******************************************************************************/
452 
453 double CodonSiteTools::piSynonymous(const Site& site, const GeneticCode& gCode, bool minchange)
455 {
456  // Alphabet checking
457  if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
458  throw AlphabetException("CodonSiteTools::piSynonymous: alphabet is not CodonAlphabet", site.getAlphabet());
459  if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
460  throw AlphabetMismatchException("CodonSiteTools::piSynonymous: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
461  // Empty site checking
462  if (site.size() == 0)
463  throw EmptySiteException("CodonSiteTools::piSynonymous: Incorrect specified site", &site);
464 
465  // General polymorphism checking
466  if (SiteTools::isConstant(site))
467  return 0;
468  // Computation
469  map<int, double> freq;
470  SiteTools::getFrequencies(site, freq);
471  double pi = 0;
472  for (map<int, double>::iterator it1 = freq.begin(); it1 != freq.end(); it1++)
473  {
474  for (map<int, double>::iterator it2 = freq.begin(); it2 != freq.end(); it2++)
475  {
476  pi += (it1->second) * (it2->second) * (numberOfSynonymousDifferences(it1->first, it2->first, gCode, minchange));
477  }
478  }
479  size_t n = site.size();
480  return pi * static_cast<double>(n / (n - 1));
481 }
482 
483 /******************************************************************************/
484 
485 double CodonSiteTools::piNonSynonymous(const Site& site, const GeneticCode& gCode, bool minchange)
487 {
488  // Alphabet checking
489  if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
490  throw AlphabetException("CodonSiteTools::piNonSynonymous: alphabet is not CodonAlphabet", site.getAlphabet());
491  if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
492  throw AlphabetMismatchException("CodonSiteTools::piNonSynonymous: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
493  // Empty site checking
494  if (site.size() == 0)
495  throw EmptySiteException("CodonSiteTools::piSynonymous: Incorrect specified site", &site);
496 
497  // General polymorphism checking
498  if (SiteTools::isConstant(site))
499  return 0;
500  if (isSynonymousPolymorphic(site, gCode))
501  return 0;
502  // Computation
503  map<int, double> freq;
504  SiteTools::getFrequencies(site, freq);
505  const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(site.getAlphabet());
506  double pi = 0;
507  for (map<int, double>::iterator it1 = freq.begin(); it1 != freq.end(); it1++)
508  {
509  for (map<int, double>::iterator it2 = freq.begin(); it2 != freq.end(); it2++)
510  {
511  double nbtot = static_cast<double>(numberOfDifferences(it1->first, it2->first, *ca));
512  double nbsyn = numberOfSynonymousDifferences(it1->first, it2->first, gCode, minchange);
513  pi += (it1->second) * (it2->second) * (nbtot - nbsyn);
514  }
515  }
516  size_t n = site.size();
517  return pi * static_cast<double>(n / (n - 1));
518 }
519 
520 /******************************************************************************/
521 
522 double CodonSiteTools::numberOfSynonymousPositions(int i, const GeneticCode& gCode, double ratio) throw (Exception)
523 {
524  const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(gCode.getSourceAlphabet());
525  if (gCode.isStop(i))
526  return 0;
527  if (ca->isUnresolved(i))
528  return 0;
529  double nbsynpos = 0.0;
530  vector<int> codon = ca->getPositions(i);
531  int acid = gCode.translate(i);
532  for (size_t pos = 0; pos < 3; ++pos)
533  {
534  for (int an = 0; an < 4; ++an)
535  {
536  if (an == codon[pos])
537  continue;
538  vector<int> mutcodon = codon;
539  mutcodon[pos] = an;
540  int intcodon = ca->getCodon(mutcodon[0], mutcodon[1], mutcodon[2]);
541  if (gCode.isStop(intcodon))
542  continue;
543  int altacid = gCode.translate(intcodon);
544  if (altacid == acid) // if synonymous
545  {
546  if (((codon[pos] == 0 || codon[pos] == 2) && (mutcodon[pos] == 1 || mutcodon[pos] == 3)) ||
547  ((codon[pos] == 1 || codon[pos] == 3) && (mutcodon[pos] == 0 || mutcodon[pos] == 2))) // if it is a transversion
548  {
549  nbsynpos = nbsynpos + 1 / (ratio + 2);
550  }
551  else // if transition
552  {
553  nbsynpos = nbsynpos + ratio / (ratio + 2);
554  }
555  }
556  }
557  }
558  return nbsynpos;
559 }
560 
561 /******************************************************************************/
562 
563 double CodonSiteTools::meanNumberOfSynonymousPositions(const Site& site, const GeneticCode& gCode, double ratio)
565 {
566  // Alphabet checking
567  if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
568  throw AlphabetException("CodonSiteTools::meanNumberOfSynonymousPositions: alphabet is not CodonAlphabet", site.getAlphabet());
569  if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
570  throw AlphabetMismatchException("CodonSiteTools::meanNumberOfSynonymousPositions: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
571  // Empty site checking
572  if (site.size() == 0)
573  throw EmptySiteException("CodonSiteTools::meanNumberOfSynonymousPositions: Incorrect specified site", &site);
574 
575  // Computation
576  double NbSyn = 0;
577  map<int, double> freq;
578  SiteTools::getFrequencies(site, freq);
579  for (map<int, double>::iterator it = freq.begin(); it != freq.end(); it++)
580  {
581  NbSyn += (it->second) * numberOfSynonymousPositions(it->first, gCode, ratio);
582  }
583  return NbSyn;
584 }
585 
586 /******************************************************************************/
587 
588 size_t CodonSiteTools::numberOfSubsitutions(const Site& site, const GeneticCode& gCode, double freqmin)
590 {
591  // Alphabet checking
592  if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
593  throw AlphabetException("CodonSiteTools::numberOfSubsitutions: alphabet is not CodonAlphabet", site.getAlphabet());
594  // Empty site checking
595  if (site.size() == 0)
596  throw EmptySiteException("CodonSiteTools::numberOfSubsitutions: Incorrect specified site", &site);
597 
598  if (SiteTools::isConstant(site))
599  return 0;
600  Site* newsite;
601  if (freqmin > 1. / static_cast<double>(site.size()))
602  newsite = CodonSiteTools::generateCodonSiteWithoutRareVariant(site, gCode, freqmin);
603  else
604  newsite = new Site(site);
605  // Computation
606  if (SiteTools::hasGap(*newsite))
607  return 0;
608  vector<int> pos1, pos2, pos3;
609 
610  const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(site.getAlphabet());
611 
612  for (size_t i = 0; i < newsite->size(); i++)
613  {
614  pos1.push_back(ca->getFirstPosition(newsite->getValue(i)));
615  pos2.push_back(ca->getSecondPosition(newsite->getValue(i)));
616  pos3.push_back(ca->getThirdPosition(newsite->getValue(i)));
617  }
618 
619  const NucleicAlphabet* na = ca->getNucleicAlphabet();
620 
621  Site s1(pos1, na), s2(pos2, na), s3(pos3, na);
622  size_t Scodon = SiteTools::getNumberOfDistinctCharacters(*newsite) - 1;
624  delete newsite;
625  if (Scodon >= Sbase)
626  return Scodon;
627  else
628  return Sbase;
629 }
630 
631 /******************************************************************************/
632 
633 size_t CodonSiteTools::numberOfNonSynonymousSubstitutions(const Site& site, const GeneticCode& gCode, double freqmin)
635 {
636  // Alphabet checking
637  if (!AlphabetTools::isCodonAlphabet(site.getAlphabet()))
638  throw AlphabetException("CodonSiteTools::numberOfNonSynonymousSubstitutions: alphabet is not CodonAlphabet", site.getAlphabet());
639  if (!site.getAlphabet()->equals(*gCode.getSourceAlphabet()))
640  throw AlphabetMismatchException("CodonSiteTools::numberOfNonSynonymousSubstitutions: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
641  // Empty site checking
642  if (site.size() == 0)
643  throw EmptySiteException("CodonSiteTools::numberOfNonSynonymousSubstitutions: Incorrect specified site", &site);
644 
645  if (SiteTools::isConstant(site))
646  return 0;
647  Site* newsite;
648  if (freqmin > 1. / static_cast<double>(site.size()))
649  newsite = generateCodonSiteWithoutRareVariant(site, gCode, freqmin);
650  else
651  newsite = new Site(site);
652  if (SiteTools::hasGap(*newsite))
653  return 0;
654  // computation
655  map<int, size_t> count;
656  SiteTools::getCounts(*newsite, count);
657  size_t NaSup = 0;
658  size_t Nminmin = 10;
659 
660  const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(site.getAlphabet());
661 
662  for (map<int, size_t>::iterator it1 = count.begin(); it1 != count.end(); it1++)
663  {
664  size_t Nmin = 10;
665  for (map<int, size_t>::iterator it2 = count.begin(); it2 != count.end(); it2++)
666  {
667  size_t Ntot = numberOfDifferences(it1->first, it2->first, *ca);
668  size_t Ns = (size_t)numberOfSynonymousDifferences(it1->first, it2->first, gCode, true);
669  if (Nmin > Ntot - Ns && it1->first != it2->first)
670  Nmin = Ntot - Ns;
671  }
672  NaSup += Nmin;
673  if (Nmin < Nminmin)
674  Nminmin = Nmin;
675  }
676  delete newsite;
677  return NaSup - Nminmin;
678 }
679 
680 /******************************************************************************/
681 
682 vector<size_t> CodonSiteTools::fixedDifferences(const Site& siteIn, const Site& siteOut, int i, int j, const GeneticCode& gCode)
684 {
685  // Alphabet checking
686  if (!AlphabetTools::isCodonAlphabet(siteIn.getAlphabet()))
687  throw AlphabetException("CodonSiteTools::fixedDifferences: alphabet is not CodonAlphabet (siteIn)", siteIn.getAlphabet());
688  if (!AlphabetTools::isCodonAlphabet(siteOut.getAlphabet()))
689  throw AlphabetException("CodonSiteTools::fixedDifferences: alphabet is not CodonAlphabet (siteOut)", siteOut.getAlphabet());
690  if (!siteIn.getAlphabet()->equals(*gCode.getSourceAlphabet()))
691  throw AlphabetMismatchException("CodonSiteTools::fixedDifferences: siteIn and genetic code have not the same codon alphabet.", siteIn.getAlphabet(), gCode.getSourceAlphabet());
692  if (!siteOut.getAlphabet()->equals(*gCode.getSourceAlphabet()))
693  throw AlphabetMismatchException("CodonSiteTools::fixedDifferences: siteOut and genetic code have not the same codon alphabet.", siteOut.getAlphabet(), gCode.getSourceAlphabet());
694  // Empty site checking
695  if (siteIn.size() == 0)
696  throw EmptySiteException("CodonSiteTools::getFixedDifferences Incorrect specified site", &siteIn);
697  if (siteOut.size() == 0)
698  throw EmptySiteException("CodonSiteTools::getFixedDifferences Incorrect specified site", &siteOut);
699 
700  const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(gCode.getSourceAlphabet());
701 
702  size_t Ntot = numberOfDifferences(i, j, *ca);
703  size_t Ns = (size_t) numberOfSynonymousDifferences(i, j, gCode, true);
704  size_t Na = Ntot - Ns;
705  size_t Nfix = Ntot;
706  vector<int> pos1in, pos2in, pos3in, pos1out, pos2out, pos3out;
707 
708  for (size_t k = 0; k < siteIn.size(); k++)
709  {
710  pos1in.push_back(ca->getFirstPosition(siteIn[k]));
711  pos2in.push_back(ca->getSecondPosition(siteIn[k]));
712  pos3in.push_back(ca->getThirdPosition(siteIn[k]));
713  pos1out.push_back(ca->getFirstPosition(siteOut[k]));
714  pos2out.push_back(ca->getSecondPosition(siteOut[k]));
715  pos3out.push_back(ca->getThirdPosition(siteOut[k]));
716  }
717  const NucleicAlphabet* na = ca->getNucleicAlphabet();
718 
719  Site s1in(pos1in, na), s2in(pos2in, na), s3in(pos3in, na);
720  Site s1out(pos1out, na), s2out(pos2out, na), s3out(pos3out, na);
721  bool test1 = false;
722  bool test2 = false;
723  bool test3 = false;
724  if ( (!SiteTools::isConstant(s1in) || !SiteTools::isConstant(s1out)) && ca->getFirstPosition(i) != ca->getFirstPosition(j) )
725  {
726  test1 = true;
727  Nfix--;
728  }
729  if ( (!SiteTools::isConstant(s2in) || !SiteTools::isConstant(s2out)) && ca->getSecondPosition(i) != ca->getSecondPosition(j) )
730  {
731  test2 = true;
732  Nfix--;
733  }
734  if ( (!SiteTools::isConstant(s3in) || !SiteTools::isConstant(s3out)) && ca->getThirdPosition(i) != ca->getThirdPosition(j) )
735  {
736  test3 = true;
737  Nfix--;
738  }
739  // Suppression of differences when not fixed
740  vector<size_t> v(2);
741  if (Nfix == 0)
742  {
743  v[0] = 0;
744  v[1] = 0;
745  return v;
746  }
747  if (Nfix < Ntot)
748  {
749  if (Na == 0)
750  Ns = Nfix;
751  if (Ns == 0)
752  Na = Nfix;
753  else
754  {
755  if (Ntot == 3)
756  {
757  if (Nfix == 1)
758  {
759  if (test1 && test2)
760  {
761  Na = 0; Ns = 1;
762  }
763  if (test1 && test3)
764  {
765  Na = 1; Ns = 0;
766  }
767  if (test2 && test3)
768  {
769  Na--; Ns--;
770  }
771  }
772  }
773  if (Nfix == 2)
774  {
775  if (test1)
776  {
777  Na = 1; Ns = 1;
778  }
779  if (test2)
780  Na--;
781  if (test3)
782  Ns--;
783  }
784  }
785  if (Ntot == 2)
786  {
787  if (test1)
788  {
789  if (ca->getSecondPosition(i) == ca->getSecondPosition(j))
790  Na--;
791  else
792  Ns--;
793  }
794  if (test2)
795  Na--;
796  if (test3)
797  Ns--;
798  }
799  }
800  v[0] = Ns;
801  v[1] = Na;
802  return v;
803 }
804 
805 /******************************************************************************/
806 
808 {
809  if (!SiteTools::isConstant(site, true))
810  {
812  if (!(CodonSiteTools::isSynonymousPolymorphic(site, gCode)))
813  return false;
814 
815  for (size_t i = 0; i < site.size(); i++)
816  {
817  if (!(gCode.isFourFoldDegenerated(site.getValue(i))))
818  {
819  return false;
820  }
821  }
822  }
823  else
824  {
825  for (size_t i = 0; i < site.size(); i++)
826  {
827  if (!(gCode.isFourFoldDegenerated(site.getValue(i))))
828  {
829  return false;
830  }
831  }
832  }
833  return true;
834 }
835 
836 /******************************************************************************/
837 
bool isFourFoldDegenerated(int codon) const
static void getCounts(const SymbolList &list, std::map< int, size_t > &counts)
Count all states in the list.
static bool isConstant(const Site &site, bool ignoreUnknown=false, bool unresolvedRaisesException=true)
Tell if a site is constant, that is displaying the same state in all sequences that do not present a ...
Definition: SiteTools.cpp:141
static size_t numberOfNonSynonymousSubstitutions(const Site &site, const GeneticCode &gCode, double freqmin=0.)
Return the number of Non Synonymous subsitutions per codon site.
static size_t numberOfSubsitutions(const Site &site, const GeneticCode &gCode, double freqmin=0.)
Return the number of subsitutions per codon site.
static bool isMonoSitePolymorphic(const Site &site)
Method to know if a polymorphic codon site is polymorphic at only one site.
This alphabet is used to deal NumericAlphabet.
static bool isFourFoldDegenerated(const Site &site, const GeneticCode &gCode)
const CodonAlphabet * getSourceAlphabet() const
Get the source alphabet.
Definition: GeneticCode.h:106
static double numberOfSynonymousPositions(int i, const GeneticCode &gCode, double ratio=1.0)
Return the number of synonymous positions of a codon.
STL namespace.
static double meanNumberOfSynonymousPositions(const Site &site, const GeneticCode &gCode, double ratio=1)
Return the mean number of synonymous positions per codon site.
static void getFrequencies(const SymbolList &list, std::map< int, double > &frequencies, bool resolveUnknowns=false)
Get all states frequencies in the list.
virtual int getCodon(int pos1, int pos2, int pos3) const
Get the int code for a codon given the int code of the three underlying positions.
static double piNonSynonymous(const Site &site, const GeneticCode &gCode, bool minchange=false)
Compute the non-synonymous pi per codon site.
static bool isSynonymousPolymorphic(const Site &site, const GeneticCode &gCode)
Method to know if polymorphism at a codon site is synonymous.
static size_t numberOfDifferences(int i, int j, const CodonAlphabet &ca)
Compute the number of differences between two codons.
virtual int getFirstPosition(int codon) const
Get the int code of the first position of a codon given its int description.
static bool isCodonAlphabet(const Alphabet *alphabet)
Codon alphabet class.
Definition: CodonAlphabet.h:63
static bool hasGap(const Site &site)
Definition: SiteTools.cpp:56
std::vector< int > getPositions(int word) const
Get the int codes of each position of a word given its int description.
Definition: WordAlphabet.h:264
bool areSynonymous(int i, int j) const
Tell if two codons are synonymous, that is, if they encode the same amino-acid.
Definition: GeneticCode.h:206
Exception sent when a empty site is found.
virtual int getValue(size_t pos) const
Get the element at position &#39;pos&#39; as an int.
Definition: SymbolList.cpp:212
static size_t getNumberOfDistinctCharacters(const Site &site)
Give the number of distinct characters at a site.
Definition: SiteTools.cpp:333
static bool hasStop(const Site &site, const GeneticCode &gCode)
Method to know if a codon site contains stop codon or not.
static Site * generateCodonSiteWithoutRareVariant(const Site &site, const GeneticCode &gCode, double freqmin)
generate a codon site without rare variants
The alphabet exception base class.
static double piSynonymous(const Site &site, const GeneticCode &gCode, bool minchange=false)
Compute the synonymous pi per codon site.
static bool hasGapOrStop(const Site &site, const GeneticCode &gCode)
Method to know if a codon site contains gap(s) or stop codons.
virtual size_t size() const
Get the number of elements in the list.
Definition: SymbolList.h:350
virtual int getSecondPosition(int codon) const
Get the int code of the second position of a codon given its int description.
virtual bool isStop(int state) const =0
Tells is a particular codon is a stop codon.
virtual const NucleicAlphabet *const getNucleicAlphabet() const
bool isUnresolved(int state) const
Definition: WordAlphabet.h:178
static double numberOfSynonymousDifferences(int i, int j, const GeneticCode &gCode, bool minchange=false)
Compute the number of synonymous differences between two codons.
The Site class.
Definition: Site.h:61
Exception thrown when two alphabets do not match.
Partial implementation of the Transliterator interface for genetic code object.
Definition: GeneticCode.h:79
The abstract base class for nucleic alphabets.
virtual int getThirdPosition(int codon) const
Get the int code of the third position of a codon given its int description.
static std::vector< size_t > fixedDifferences(const Site &siteIn, const Site &siteOut, int i, int j, const GeneticCode &gCode)
Return a vector with the number of fixed synonymous and non-synonymous differences per codon site...