bpp-seq  2.2.0
AbstractAlphabet.cpp
Go to the documentation of this file.
1 //
2 // File: AbstractAlphabet.cpp
3 // Authors: Guillaume Deuchst
4 // Julien Dutheil
5 // Sylvain Gaillard
6 // Created on: Tue Jul 22 2003
7 //
8 
9 /*
10 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
11 
12 This software is a computer program whose purpose is to provide classes
13 for sequences analysis.
14 
15 This software is governed by the CeCILL license under French law and
16 abiding by the rules of distribution of free software. You can use,
17 modify and/ or redistribute the software under the terms of the CeCILL
18 license as circulated by CEA, CNRS and INRIA at the following URL
19 "http://www.cecill.info".
20 
21 As a counterpart to the access to the source code and rights to copy,
22 modify and redistribute granted by the license, users are provided only
23 with a limited warranty and the software's author, the holder of the
24 economic rights, and the successive licensors have only limited
25 liability.
26 
27 In this respect, the user's attention is drawn to the risks associated
28 with loading, using, modifying and/or developing or reproducing the
29 software by the user in light of its specific status of free software,
30 that may mean that it is complicated to manipulate, and that also
31 therefore means that it is reserved for developers and experienced
32 professionals having in-depth computer knowledge. Users are therefore
33 encouraged to load and test the software's suitability as regards their
34 requirements in conditions enabling the security of their systems and/or
35 data to be ensured and, more generally, to use and operate it in the
36 same conditions as regards security.
37 
38 The fact that you are presently reading this means that you have had
39 knowledge of the CeCILL license and that you accept its terms.
40 */
41 
42 #include "AbstractAlphabet.h"
43 #include <Bpp/Text/TextTools.h>
44 #include <Bpp/Utils/MapTools.h>
45 
46 using namespace bpp;
47 
48 // From the STL:
49 #include <ctype.h>
50 #include <map>
51 #include <iostream>
52 
53 using namespace std;
54 
55 /******************************************************************************/
56 
57 void AbstractAlphabet::updateMaps_(size_t pos, const AlphabetState& st) {
58  if (letters_.find(st.getLetter()) == letters_.end())
59  letters_[st.getLetter()] = pos;
60  else
61  throw Exception("AbstractAlphabet::updateMaps_. A state with the same character code already exists! " + st.getLetter() + ".");
62  if (nums_.find(st.getNum()) == nums_.end())
63  nums_[st.getNum()] = pos;
64  else
65  nums_[st.getNum()] = min(pos, nums_[st.getNum()]);
66 }
67 
68 /******************************************************************************/
69 
70 void AbstractAlphabet::registerState(AlphabetState* st) throw (Exception) {
71  // Add the state to the vector
72  alphabet_.push_back(st);
73  // Update the maps
74  updateMaps_(alphabet_.size() - 1, *st);
75 }
76 
77 /******************************************************************************/
78 
80  throw (Exception, IndexOutOfBoundsException) {
81  if (pos > alphabet_.size())
82  throw IndexOutOfBoundsException("AbstractAlphabet::setState: incorect position", pos, 0, alphabet_.size());
83  // Delete the state if not empty
84  if (alphabet_[pos] != 0)
85  delete alphabet_[pos];
86  // Put the state in the vector
87  alphabet_[pos] = st;
88  // Update the maps
89  updateMaps_(pos, *st);
90  }
91 
92 /******************************************************************************/
93 
94 const AlphabetState& AbstractAlphabet::getState(const std::string& letter) const throw (BadCharException) {
95  map<string, size_t>::const_iterator it = letters_.find(letter);
96  if (it == letters_.end())
97  throw BadCharException(letter, "AbstractAlphabet::getState(string): Specified base unknown", this);
98  return * (alphabet_[it->second]);
99 }
100 
101 /******************************************************************************/
102 
103 size_t AbstractAlphabet::getStateIndex(const std::string& letter) const throw (BadCharException) {
104  map<string, size_t>::const_iterator it = letters_.find(letter);
105  if (it == letters_.end())
106  throw BadCharException(letter, "AbstractAlphabet::getStateIndex(string): Specified base unknown", this);
107  return it->second;
108 }
109 
110 /******************************************************************************/
111 
113  map<int, size_t>::const_iterator it = nums_.find(num);
114  if (it == nums_.end())
115  throw BadIntException(num, "AbstractAlphabet::getState(int): Specified base unknown", this);
116  return *(alphabet_[it->second]);
117 }
118 
119 /******************************************************************************/
120 
121 size_t AbstractAlphabet::getStateIndex(int num) const throw (BadIntException) {
122  map<int, size_t>::const_iterator it = nums_.find(num);
123  if (it == nums_.end())
124  throw BadIntException(num, "AbstractAlphabet::getStateIndex(int): Specified base unknown", this);
125  return it->second;
126 }
127 
128 /******************************************************************************/
129 
130 AlphabetState& AbstractAlphabet::getState(const std::string& letter) throw (BadCharException) {
131  map<string, size_t>::iterator it = letters_.find(letter);
132  if (it == letters_.end())
133  throw BadCharException(letter, "AbstractAlphabet::getState(string): Specified base unknown", this);
134  return * (alphabet_[it->second]);
135 }
136 
137 /******************************************************************************/
138 
140  map<int, size_t>::iterator it = nums_.find(num);
141  if (it == nums_.end())
142  throw BadIntException(num, "AbstractAlphabet::getState(int): Specified base unknown", this);
143  return * (alphabet_[it->second]);
144 }
145 
146 /******************************************************************************/
147 
148 AlphabetState& AbstractAlphabet::getStateAt(size_t pos) throw (IndexOutOfBoundsException) {
149  if (pos > alphabet_.size())
150  throw IndexOutOfBoundsException("AbstractAlphabet::getStateAt: incorect position", pos, 0, alphabet_.size());
151  return * (alphabet_[pos]);
152 }
153 
154 /******************************************************************************/
155 
156 const AlphabetState& AbstractAlphabet::getStateAt(size_t pos) const throw (IndexOutOfBoundsException) {
157  if (pos > alphabet_.size())
158  throw IndexOutOfBoundsException("AbstractAlphabet::getStateAt: incorect position", pos, 0, alphabet_.size());
159  return * (alphabet_[pos]);
160 }
161 
162 /******************************************************************************/
163 
164 std::string AbstractAlphabet::getName(const std::string& state) const throw (BadCharException)
165 {
166  return (getState(state)).getName();
167 }
168 
169 /******************************************************************************/
170 
171 std::string AbstractAlphabet::getName(int state) const throw (BadIntException)
172 {
173  return (getState(state)).getName();
174 }
175 
176 /******************************************************************************/
177 
178 int AbstractAlphabet::charToInt(const std::string& state) const throw (BadCharException)
179 {
180  return getState(state).getNum();
181 }
182 
183 /******************************************************************************/
184 
185 std::string AbstractAlphabet::intToChar(int state) const throw (BadIntException)
186 {
187  return (getState(state)).getLetter();
188 }
189 
190 /******************************************************************************/
191 
193 {
194  map<int, size_t>::const_iterator it = nums_.find(state);
195  if (it != nums_.end())
196  return true;
197  return false;
198 }
199 
200 /******************************************************************************/
201 
202 bool AbstractAlphabet::isCharInAlphabet(const std::string& state) const
203 {
204  map<string, size_t>::const_iterator it = letters_.find(state);
205  if (it != letters_.end())
206  return true;
207  return false;
208 }
209 
210 /******************************************************************************/
211 
212 std::vector<int> AbstractAlphabet::getAlias(int state) const throw (BadIntException)
213 {
214  if (!isIntInAlphabet(state)) throw BadIntException(state, "AbstractAlphabet::getAlias(int): Specified base unknown.");
215  vector<int> v(1);
216  v[0] = state;
217  return v;
218 }
219 
220 /******************************************************************************/
221 
222 std::vector<std::string> AbstractAlphabet::getAlias(const std::string& state) const throw (BadCharException)
223 {
224  if (!isCharInAlphabet(state)) throw BadCharException(state, "AbstractAlphabet::getAlias(char): Specified base unknown.");
225  vector<string> v(1);
226  v[0] = state;
227  return v;
228 }
229 
230 /******************************************************************************/
231 
232 int AbstractAlphabet::getGeneric(const std::vector<int>& states) const throw (BadIntException) {
233  map<int, int> m;
234  for (unsigned int i = 0 ; i < states.size() ; ++i) {
235  vector<int> tmp_s = this->getAlias(states[i]); // get the states for generic characters
236  for (unsigned int j = 0 ; j < tmp_s.size() ; ++j) {
237  m[tmp_s[j]] ++; // add each state to the list
238  }
239  }
240  vector<int> ve = MapTools::getKeys(m);
241 
242  string key;
243  for (unsigned int i = 0 ; i < ve.size() ; ++i) {
244  if (!isIntInAlphabet(ve[i])) throw BadIntException(ve[i], "AbstractAlphabet::getGeneric(const vector<int>): Specified base unknown.");
245  key += "_" + TextTools::toString(ve[i]);
246  }
247  int v;
248  if (ve.size() == 1) {
249  v = ve[0];
250  } else {
251  v = this->getUnknownCharacterCode();
252  }
253  return v;
254 }
255 
256 /******************************************************************************/
257 
258 std::string AbstractAlphabet::getGeneric(const std::vector<std::string>& states) const throw (AlphabetException) {
259  map <string, int> m;
260  for (unsigned int i = 0 ; i < states.size() ; ++i) {
261  vector<string> tmp_s = this->getAlias(states[i]); // get the states for generic characters
262  for (unsigned int j = 0 ; j < tmp_s.size() ; ++j) {
263  m[tmp_s[j]] ++; // add each state to the list
264  }
265  }
266  vector<string> ve = MapTools::getKeys(m);
267 
268  string key;
269  for (unsigned int i = 0 ; i < ve.size() ; ++i) {
270  if (!isCharInAlphabet(ve[i])) throw BadCharException(ve[i], "AbstractAlphabet::getAlias(const vector<string>): Specified base unknown.");
271  key += TextTools::toString(ve[i]);
272  }
273  string v;
274  if (ve.size() == 1) {
275  v = ve[0];
276  } else {
277  throw CharStateNotSupportedException("AbstractAlphabet::getAlias(const vector<string>): No generic char state.");
278  }
279  return v;
280 }
281 
282 /******************************************************************************/
283 
284 const std::vector<int>& AbstractAlphabet::getSupportedInts() const
285 {
286  if(intList_.size() != alphabet_.size())
287  {
288  intList_.resize(alphabet_.size());
289  charList_.resize(alphabet_.size());
290  for (size_t i = 0; i < alphabet_.size(); ++i)
291  {
292  intList_[i] = alphabet_[i]->getNum();
293  charList_[i] = alphabet_[i]->getLetter();
294  }
295  }
296  return intList_;
297 }
298 
299 /******************************************************************************/
300 
301 const std::vector<std::string>& AbstractAlphabet::getSupportedChars() const
302 {
303  if(charList_.size() != alphabet_.size())
304  {
305  intList_.resize(alphabet_.size());
306  charList_.resize(alphabet_.size());
307  for (size_t i = 0; i < alphabet_.size(); ++i)
308  {
309  intList_[i] = alphabet_[i]->getNum();
310  charList_[i] = alphabet_[i]->getLetter();
311  }
312  }
313  return charList_;
314 }
315 
316 /******************************************************************************/
317 
This is the base class to describe states in an Alphabet.
Definition: AlphabetState.h:54
An alphabet exception thrown when trying to specify a bad char to the alphabet.
int getNum() const
Get the state&#39;s number.
Definition: AlphabetState.h:83
This alphabet is used to deal NumericAlphabet.
void updateMaps_(size_t pos, const AlphabetState &st)
Update the private maps letters_ and nums_ when adding a state.
STL namespace.
int charToInt(const std::string &state) const
Give the int description of a state given its string description.
bool isIntInAlphabet(int state) const
Tell if a state (specified by its int description) is allowed by the the alphabet.
const std::vector< std::string > & getSupportedChars() const
const AlphabetState & getState(const std::string &letter) const
Get a state by its letter.
virtual void setState(size_t pos, AlphabetState *st)
Set a state in the Alphabet.
The alphabet exception base class.
std::vector< int > getAlias(int state) const
Get all resolved states that match a generic state.
const std::string & getLetter() const
Get the letter(s) corresponding to the state.
Definition: AlphabetState.h:99
std::string getName(const std::string &state) const
Get the complete name of a state given its string description.
bool isCharInAlphabet(const std::string &state) const
Tell if a state (specified by its string description) is allowed by the the alphabet.
Exception thrown in case no character is available for a certain state in an alphabet.
int getGeneric(const std::vector< int > &states) const
Get the generic state that match a set of states.
An alphabet exception thrown when trying to specify a bad int to the alphabet.
virtual AlphabetState & getStateAt(size_t stateIndex)
Get a state at a position in the alphabet_ vector.
virtual void registerState(AlphabetState *st)
Add a state to the Alphabet.
const std::vector< int > & getSupportedInts() const
std::string intToChar(int state) const
Give the string description of a state given its int description.
size_t getStateIndex(int state) const