bpp-seq  2.2.0
RNA.cpp
Go to the documentation of this file.
1 //
2 // File: RNA.cpp
3 // Authors: Guillaume Deuchst
4 // Julien Dutheil
5 // Sylvain Gaillard
6 // Created on: Tue Jul 22 2003
7 //
8 
9 /*
10 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
11 
12 This software is a computer program whose purpose is to provide classes
13 for sequences analysis.
14 
15 This software is governed by the CeCILL license under French law and
16 abiding by the rules of distribution of free software. You can use,
17 modify and/ or redistribute the software under the terms of the CeCILL
18 license as circulated by CEA, CNRS and INRIA at the following URL
19 "http://www.cecill.info".
20 
21 As a counterpart to the access to the source code and rights to copy,
22 modify and redistribute granted by the license, users are provided only
23 with a limited warranty and the software's author, the holder of the
24 economic rights, and the successive licensors have only limited
25 liability.
26 
27 In this respect, the user's attention is drawn to the risks associated
28 with loading, using, modifying and/or developing or reproducing the
29 software by the user in light of its specific status of free software,
30 that may mean that it is complicated to manipulate, and that also
31 therefore means that it is reserved for developers and experienced
32 professionals having in-depth computer knowledge. Users are therefore
33 encouraged to load and test the software's suitability as regards their
34 requirements in conditions enabling the security of their systems and/or
35 data to be ensured and, more generally, to use and operate it in the
36 same conditions as regards security.
37 
38 The fact that you are presently reading this means that you have had
39 knowledge of the CeCILL license and that you accept its terms.
40 */
41 
42 #include "RNA.h"
43 #include <Bpp/Text/TextTools.h>
44 #include <Bpp/Utils/MapTools.h>
45 
46 using namespace bpp;
47 
48 // From STL:
49 #include <map>
50 
51 using namespace std;
52 
53 /******************************************************************************/
54 // class constructor
55 RNA::RNA(bool exclamationMarkCountsAsGap)
56 {
57  // Alphabet content definition
58  // all unresolved bases use n°14
59  registerState(new NucleicAlphabetState(-1, "-", 0, "Gap"));
60  registerState(new NucleicAlphabetState( 0, "A", 1, "Adenine"));
61  registerState(new NucleicAlphabetState( 1, "C", 2, "Cytosine"));
62  registerState(new NucleicAlphabetState( 2, "G", 4, "Guanine"));
63  registerState(new NucleicAlphabetState( 3, "U", 8, "Uracile"));
64  registerState(new NucleicAlphabetState( 4, "M", 3, "Adenine or Cytosine"));
65  registerState(new NucleicAlphabetState( 5, "R", 5, "Purine (Adenine or Guanine)"));
66  registerState(new NucleicAlphabetState( 6, "W", 9, "Adenine or Uracile"));
67  registerState(new NucleicAlphabetState( 7, "S", 6, "Cytosine or Guanine"));
68  registerState(new NucleicAlphabetState( 8, "Y", 10, "Pyrimidine (Cytosine or Uracile)"));
69  registerState(new NucleicAlphabetState( 9, "K", 12, "Guanine or Uracile"));
70  registerState(new NucleicAlphabetState(10, "V", 7, "Adenine or Cytosine or Guanine"));
71  registerState(new NucleicAlphabetState(11, "H", 11, "Adenine or Cytosine or Uracile"));
72  registerState(new NucleicAlphabetState(12, "D", 13, "Adenine or Guanine or Uracile"));
73  registerState(new NucleicAlphabetState(13, "B", 14, "Cytosine or Guanine or Uracile"));
74  registerState(new NucleicAlphabetState(14, "N", 15, "Unresolved base"));
75  registerState(new NucleicAlphabetState(14, "X", 15, "Unresolved base"));
76  registerState(new NucleicAlphabetState(14, "O", 15, "Unresolved base"));
77  registerState(new NucleicAlphabetState(14, "0", 15, "Unresolved base"));
78  registerState(new NucleicAlphabetState(14, "?", 15, "Unresolved base"));
79  if (exclamationMarkCountsAsGap)
80  registerState(new NucleicAlphabetState(-1, "!", 0, "Frameshift"));
81  else
82  registerState(new NucleicAlphabetState(14, "!", 15, "Unresolved base"));
83 }
84 
85 /******************************************************************************/
86 
87 std::vector<int> RNA::getAlias(int state) const throw (BadIntException)
88 {
89  if (!isIntInAlphabet(state))
90  throw BadIntException(state, "DNA::getAlias(int): Specified base unknown.");
91  vector<int> v;
92  const NucleicAlphabetState& st = getState(state);
93  if (state == -1)
94  v.push_back(-1);
95  if (st.getBinaryCode() & 1)
96  v.push_back(0);
97  if (st.getBinaryCode() & 2)
98  v.push_back(1);
99  if (st.getBinaryCode() & 4)
100  v.push_back(2);
101  if (st.getBinaryCode() & 8)
102  v.push_back(3);
103  return v;
104 }
105 
106 
107 /******************************************************************************/
108 
109 std::vector<std::string> RNA::getAlias(const std::string & state) const throw (BadCharException)
110 {
111  string locstate = TextTools::toUpper(state);
112  if(!isCharInAlphabet(locstate)) throw BadCharException(locstate, "RNA::getAlias(int): Specified base unknown.");
113  vector<int> vi = this->getAlias(this->charToInt(state));
114  vector<string> v;
115  for (unsigned int i = 0 ; i < vi.size() ; i++)
116  v.push_back(this->intToChar(vi[i]));
117  return v;
118 }
119 
120 /******************************************************************************/
121 
122 int RNA::getGeneric(const std::vector<int> & states) const throw (BadIntException)
123 {
124  int v = 0;
125  for (size_t i = 0 ; i < states.size() ; ++i) {
126  if (!isIntInAlphabet(states[i])) throw BadIntException(states[i], "RNA::getGeneric(const vector<int>& states): Specified base unknown.");
127  v |= getState(states[i]).getBinaryCode();
128  }
129  return getStateByBinCode(v).getNum();
130 }
131 
132 /******************************************************************************/
133 
134 std::string RNA::getGeneric(const std::vector<std::string> & states) const throw (BadCharException)
135 {
136  vector<int> vi;
137  for (unsigned int i = 0 ; i < states.size() ; ++i) {
138  if (!isCharInAlphabet(states[i])) throw BadCharException(states[i], "DNA::getGeneric(const vector<string>& states): Specified base unknown.");
139  vi.push_back(this->charToInt(states[i]));
140  }
141  return intToChar(getGeneric(vi));
142 }
143 
144 /******************************************************************************/
145 
An alphabet exception thrown when trying to specify a bad char to the alphabet.
This alphabet is used to deal NumericAlphabet.
int getBinaryCode() const
Get the state&#39;s binary representation.
STL namespace.
RNA(bool exclamationMarkCountsAsGap=false)
Definition: RNA.cpp:55
int getGeneric(const std::vector< int > &states) const
Get the generic state that match a set of states.
Definition: RNA.cpp:122
This is the base class to describe states in a NucleicAlphabet.
std::vector< int > getAlias(int state) const
Get all resolved states that match a generic state.
Definition: RNA.cpp:87
An alphabet exception thrown when trying to specify a bad int to the alphabet.