bpp-seq  2.2.0
DNA.cpp
Go to the documentation of this file.
1 //
2 // File: DNA.cpp
3 // Authors: Guillaume Deuchst
4 // Julien Dutheil
5 // Sylvain Gaillard
6 // Created on: Tue Jul 22 2003
7 //
8 
9 
10 /*
11 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
12 
13 This software is a computer program whose purpose is to provide classes
14 for sequences analysis.
15 
16 This software is governed by the CeCILL license under French law and
17 abiding by the rules of distribution of free software. You can use,
18 modify and/ or redistribute the software under the terms of the CeCILL
19 license as circulated by CEA, CNRS and INRIA at the following URL
20 "http://www.cecill.info".
21 
22 As a counterpart to the access to the source code and rights to copy,
23 modify and redistribute granted by the license, users are provided only
24 with a limited warranty and the software's author, the holder of the
25 economic rights, and the successive licensors have only limited
26 liability.
27 
28 In this respect, the user's attention is drawn to the risks associated
29 with loading, using, modifying and/or developing or reproducing the
30 software by the user in light of its specific status of free software,
31 that may mean that it is complicated to manipulate, and that also
32 therefore means that it is reserved for developers and experienced
33 professionals having in-depth computer knowledge. Users are therefore
34 encouraged to load and test the software's suitability as regards their
35 requirements in conditions enabling the security of their systems and/or
36 data to be ensured and, more generally, to use and operate it in the
37 same conditions as regards security.
38 
39 The fact that you are presently reading this means that you have had
40 knowledge of the CeCILL license and that you accept its terms.
41 */
42 
43 #include "DNA.h"
44 #include "AlphabetState.h"
45 #include <Bpp/Text/TextTools.h>
46 #include <Bpp/Utils/MapTools.h>
47 
48 using namespace bpp;
49 
50 // From STL:
51 #include <map>
52 
53 using namespace std;
54 
55 /******************************************************************************/
56 
57 DNA::DNA(bool exclamationMarkCountsAsGap)
58 {
59  // Alphabet content definition
60  // all unresolved bases use n°14
61  registerState(new NucleicAlphabetState(-1, "-", 0, "Gap"));
62  registerState(new NucleicAlphabetState( 0, "A", 1, "Adenine"));
63  registerState(new NucleicAlphabetState( 1, "C", 2, "Cytosine"));
64  registerState(new NucleicAlphabetState( 2, "G", 4, "Guanine"));
65  registerState(new NucleicAlphabetState( 3, "T", 8, "Thymine"));
66  registerState(new NucleicAlphabetState( 4, "M", 3, "Adenine or Cytosine"));
67  registerState(new NucleicAlphabetState( 5, "R", 5, "Purine (Adenine or Guanine)"));
68  registerState(new NucleicAlphabetState( 6, "W", 9, "Adenine or Thymine"));
69  registerState(new NucleicAlphabetState( 7, "S", 6, "Cytosine or Guanine"));
70  registerState(new NucleicAlphabetState( 8, "Y", 10, "Pyrimidine (Cytosine or Thymine)"));
71  registerState(new NucleicAlphabetState( 9, "K", 12, "Guanine or Thymine"));
72  registerState(new NucleicAlphabetState(10, "V", 7, "Adenine or Cytosine or Guanine"));
73  registerState(new NucleicAlphabetState(11, "H", 11, "Adenine or Cytosine or Thymine"));
74  registerState(new NucleicAlphabetState(12, "D", 13, "Adenine or Guanine or Thymine"));
75  registerState(new NucleicAlphabetState(13, "B", 14, "Cytosine or Guanine or Thymine"));
76  registerState(new NucleicAlphabetState(14, "N", 15, "Unresolved base"));
77  registerState(new NucleicAlphabetState(14, "X", 15, "Unresolved base"));
78  registerState(new NucleicAlphabetState(14, "O", 15, "Unresolved base"));
79  registerState(new NucleicAlphabetState(14, "0", 15, "Unresolved base"));
80  registerState(new NucleicAlphabetState(14, "?", 15, "Unresolved base"));
81  if (exclamationMarkCountsAsGap)
82  registerState(new NucleicAlphabetState(-1, "!", 0, "Frameshift"));
83  else
84  registerState(new NucleicAlphabetState(14, "!", 15, "Unresolved base"));
85 }
86 
87 /******************************************************************************/
88 
89 std::vector<int> DNA::getAlias(int state) const throw (BadIntException)
90 {
91  if (!isIntInAlphabet(state))
92  throw BadIntException(state, "DNA::getAlias(int): Specified base unknown.");
93  vector<int> v;
94  const NucleicAlphabetState& st = getState(state);
95  if (state == -1)
96  v.push_back(-1);
97  if (st.getBinaryCode() & 1)
98  v.push_back(0);
99  if (st.getBinaryCode() & 2)
100  v.push_back(1);
101  if (st.getBinaryCode() & 4)
102  v.push_back(2);
103  if (st.getBinaryCode() & 8)
104  v.push_back(3);
105  return v;
106 }
107 
108 
109 /******************************************************************************/
110 
111 std::vector<std::string> DNA::getAlias(const std::string& state) const throw (BadCharException)
112 {
113  string locstate = TextTools::toUpper(state);
114  if(!isCharInAlphabet(locstate)) throw BadCharException(locstate, "DNA::getAlias(int): Specified base unknown.");
115  vector<int> vi = this->getAlias(this->charToInt(state));
116  vector<string> v;
117  for (unsigned int i = 0 ; i < vi.size() ; i++)
118  v.push_back(this->intToChar(vi[i]));
119  return v;
120 }
121 
122 /******************************************************************************/
123 
124 int DNA::getGeneric(const std::vector<int>& states) const throw (BadIntException)
125 {
126  int v = 0;
127  for (size_t i = 0 ; i < states.size() ; ++i) {
128  if (!isIntInAlphabet(states[i])) throw BadIntException(states[i], "DNA::getGeneric(const vector<int>& states): Specified base unknown.");
129  v |= getState(states[i]).getBinaryCode();
130  }
131  return getStateByBinCode(v).getNum();
132 }
133 
134 /******************************************************************************/
135 
136 std::string DNA::getGeneric(const std::vector<std::string>& states) const throw (BadCharException)
137 {
138  vector<int> vi;
139  for (unsigned int i = 0 ; i < states.size() ; ++i) {
140  if (!isCharInAlphabet(states[i])) throw BadCharException(states[i], "DNA::getGeneric(const vector<string>& states): Specified base unknown.");
141  vi.push_back(this->charToInt(states[i]));
142  }
143  return intToChar(getGeneric(vi));
144 }
145 
146 /******************************************************************************/
147 
DNA(bool exclamationMarkCountsAsGap=false)
Definition: DNA.cpp:57
An alphabet exception thrown when trying to specify a bad char to the alphabet.
int getGeneric(const std::vector< int > &states) const
Get the generic state that match a set of states.
Definition: DNA.cpp:124
This alphabet is used to deal NumericAlphabet.
int getBinaryCode() const
Get the state&#39;s binary representation.
STL namespace.
This is the base class to describe states in a NucleicAlphabet.
An alphabet exception thrown when trying to specify a bad int to the alphabet.
std::vector< int > getAlias(int state) const
Get all resolved states that match a generic state.
Definition: DNA.cpp:89