bpp-seq  2.2.0
GeneticCode.cpp
Go to the documentation of this file.
1 //
2 // File: GeneticCode.cpp
3 // Created by: Julien Dutheil
4 // Created on: Mon Oct 13 15:37:25 2003
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for sequences analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #include "GeneticCode.h"
41 #include "../SequenceTools.h"
42 #include "../Alphabet/AlphabetTools.h"
43 
44 using namespace bpp;
45 using namespace std;
46 
47 /**********************************************************************************************/
48 
49 StopCodonException::StopCodonException(const std::string& text, const std::string& codon) :
50  Exception("StopCodonException: " + text + "(" + codon + ")"),
51  codon_(codon) {}
52 
53 /**********************************************************************************************/
54 
55 int GeneticCode::translate(int state) const throw (BadIntException, Exception)
56 {
57  if (isStop(state))
58  throw StopCodonException("GeneticCode::translate().", codonAlphabet_.intToChar(state));
59 
60  map<int, int>::const_iterator it = tlnTable_.find(state);
61  if (it == tlnTable_.end())
62  throw BadIntException(state, "GeneticCode::translate().");
63 
64  return it->second;
65 }
66 
67 /**********************************************************************************************/
68 
69 std::string GeneticCode::translate(const std::string& state) const throw (BadCharException, Exception)
70 {
71  int x = codonAlphabet_.charToInt(state);
72  return proteicAlphabet_.intToChar(translate(x));
73 }
74 
75 /**********************************************************************************************/
76 
77 vector<int> GeneticCode::getSynonymous(int aminoacid) const throw (BadIntException)
78 {
79  // test:
80  proteicAlphabet_.intToChar(aminoacid);
81 
82  vector<int> synonymes;
83  for (int i = 0; i < static_cast<int>(codonAlphabet_.getSize()); ++i)
84  {
85  try
86  {
87  if (translate(i) == aminoacid)
88  synonymes.push_back(i);
89  }
90  catch (StopCodonException)
91  { }
92  }
93  return synonymes;
94 }
95 
96 /**********************************************************************************************/
97 
98 std::vector<std::string> GeneticCode::getSynonymous(const std::string& aminoacid) const throw (BadCharException)
99 {
100  // test:
101  int aa = proteicAlphabet_.charToInt(aminoacid);
102 
103  vector<string> synonymes;
104  for (int i = 0; i < static_cast<int>(codonAlphabet_.getSize()); ++i)
105  {
106  try
107  {
108  if (translate(i) == aa)
109  synonymes.push_back(codonAlphabet_.intToChar(i));
110  }
111  catch (StopCodonException)
112  { }
113  }
114  return synonymes;
115 }
116 
117 /**********************************************************************************************/
118 
120 {
121  if (isStop(val))
122  return false;
123 
124  vector<int> codon = codonAlphabet_.getPositions(val);
125  int acid = translate(val);
126 
127  // test all the substitution on third codon position
128  for (int an = 0; an < 4; an++)
129  {
130  if (an == codon[2])
131  continue;
132  vector<int> mutcodon = codon;
133  mutcodon[2] = an;
134  int intcodon = codonAlphabet_.getCodon(mutcodon[0], mutcodon[1], mutcodon[2]);
135  if (isStop(intcodon))
136  return false;
137  int altacid = translate(intcodon);
138  if (altacid != acid) // if non-synonymous
139  {
140  return false;
141  }
142  }
143 
144  return true;
145 }
146 
147 /**********************************************************************************************/
148 
149 Sequence* GeneticCode::getCodingSequence(const Sequence& sequence, bool lookForInitCodon, bool includeInitCodon) const throw (Exception)
150 {
151  size_t initPos = 0;
152  size_t stopPos = sequence.size();
153  if (AlphabetTools::isCodonAlphabet(sequence.getAlphabet()))
154  {
155  // Look for AUG(or ATG) codon:
156  if (lookForInitCodon)
157  {
158  for (size_t i = 0; i < sequence.size(); i++)
159  {
160  vector<int> pos = codonAlphabet_.getPositions(sequence[i]);
161  if (pos[0] == 0 && pos[1] == 3 && pos[2] == 2)
162  {
163  initPos = includeInitCodon ? i : i + 1;
164  break;
165  }
166  }
167  }
168  // Look for stop codon:
169  for (size_t i = initPos; i < sequence.size(); i++)
170  {
171  if (isStop(sequence[i]))
172  {
173  stopPos = i;
174  break;
175  }
176  }
177  }
178  else if (AlphabetTools::isNucleicAlphabet(sequence.getAlphabet()))
179  {
180  // Look for AUG(or ATG) codon:
181  if (lookForInitCodon)
182  {
183  for (size_t i = 0; i < sequence.size() - 2; i++)
184  {
185  if (sequence[i] == 0 && sequence[i + 1] == 3 && sequence[i + 2] == 2)
186  {
187  initPos = includeInitCodon ? i : i + 3;
188  break;
189  }
190  }
191  }
192  // Look for stop codon:
193  const NucleicAlphabet* nucAlpha = codonAlphabet_.getNucleicAlphabet();
194  for (size_t i = initPos; i < sequence.size() - 2; i += 3)
195  {
196  string codon = nucAlpha->intToChar(sequence[i])
197  + nucAlpha->intToChar(sequence[i + 1])
198  + nucAlpha->intToChar(sequence[i + 2]);
199  if (isStop(codon))
200  {
201  stopPos = i;
202  break;
203  }
204  }
205  }
206  else
207  throw AlphabetMismatchException("Sequence must have alphabet of type nucleic or codon in GeneticCode::getCodingSequence.", 0, sequence.getAlphabet());
208 
209  return SequenceTools::subseq(sequence, initPos, stopPos - 1);
210 }
211 
212 /**********************************************************************************************/
213 
bool isFourFoldDegenerated(int codon) const
An alphabet exception thrown when trying to specify a bad char to the alphabet.
Sequence * getCodingSequence(const Sequence &sequence, bool lookForInitCodon=false, bool includeInitCodon=false) const
Get the subsequence corresponding to the coding part of a given sequence.
This alphabet is used to deal NumericAlphabet.
STL namespace.
virtual int getCodon(int pos1, int pos2, int pos3) const
Get the int code for a codon given the int code of the three underlying positions.
static bool isCodonAlphabet(const Alphabet *alphabet)
std::vector< int > getPositions(int word) const
Get the int codes of each position of a word given its int description.
Definition: WordAlphabet.h:264
static bool isNucleicAlphabet(const Alphabet *alphabet)
CodonAlphabet codonAlphabet_
Definition: GeneticCode.h:84
std::vector< int > getSynonymous(int aminoacid) const
Definition: GeneticCode.cpp:77
StopCodonException(const std::string &text, const std::string &codon)
Definition: GeneticCode.cpp:49
virtual size_t size() const =0
Get the number of elements in the list.
virtual bool isStop(int state) const =0
Tells is a particular codon is a stop codon.
The sequence interface.
Definition: Sequence.h:74
Exception thrown when a stop codon is found.
Definition: GeneticCode.h:54
static Sequence * subseq(const Sequence &sequence, size_t begin, size_t end)
Get a sub-sequence.
An alphabet exception thrown when trying to specify a bad int to the alphabet.
Exception thrown when two alphabets do not match.
The abstract base class for nucleic alphabets.
std::string intToChar(int state) const
Give the string description of a state given its int description.
virtual int translate(int state) const
Translate a given state coded as a int from source alphabet to target alphabet.
Definition: GeneticCode.cpp:55