bpp-seq  2.2.0
WordAlphabet.h
Go to the documentation of this file.
1 //
2 // File: WordAlphabet.h
3 // Created by: Laurent Gueguen
4 // Created on: Sun Dec 28 2008
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for sequences analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided
21  only with a limited warranty and the software's author, the holder of
22  the economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards
32  their requirements in conditions enabling the security of their
33  systems and/or data to be ensured and, more generally, to use and
34  operate it in the same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #ifndef _WORDALPHABET_H_
41 #define _WORDALPHABET_H_
42 
43 #include "AbstractAlphabet.h"
44 
45 // From the STL:
46 #include <string>
47 #include <vector>
48 
49 #include "../Sequence.h"
50 
51 namespace bpp
52 {
66 class WordAlphabet :
67  public AbstractAlphabet
68 {
69 protected:
70  std::vector<const Alphabet* > vAbsAlph_;
71 
72 public:
73  // Constructor and destructor.
82  WordAlphabet(const std::vector<const Alphabet*>& vAlpha);
83 
91  WordAlphabet(const Alphabet* pAlpha, unsigned int num);
92 
94 
96  {
98  vAbsAlph_=bia.vAbsAlph_;
99  return *this;
100  }
101 
103  {
104  return new WordAlphabet(*this);
105  }
106 
107  virtual ~WordAlphabet() {}
108 
109 public:
125  std::string getName(const std::string& state) const throw (BadCharException);
126 
127  int charToInt(const std::string& state) const throw (BadCharException)
128  {
129  if (state.size() != vAbsAlph_.size())
130  throw BadCharException(state, "WordAlphabet::charToInt", this);
131  if (containsUnresolved(state))
132  return static_cast<int>(getSize());
133  if (containsGap(state))
134  return -1;
135  else return AbstractAlphabet::charToInt(state);
136  }
137 
138  unsigned int getSize() const
139  {
140  return getNumberOfChars() - 2;
141  }
142 
150  bool hasUniqueAlphabet() const;
151 
156  unsigned int getLength() const
157  {
158  return static_cast<unsigned int>(vAbsAlph_.size());
159  }
160 
161 
166  unsigned int getNumberOfTypes() const
167  {
168  return getNumberOfChars() - 1;
169  }
170 
171  std::string getAlphabetType() const;
172 
174  {
175  return static_cast<int>(getSize());
176  }
177 
178  bool isUnresolved(int state) const { return state == getUnknownCharacterCode(); }
179  bool isUnresolved(const std::string& state) const { return charToInt(state) == getUnknownCharacterCode(); }
180 
181  std::vector<int> getAlias(int state) const throw (BadIntException);
182  std::vector<std::string> getAlias(const std::string& state) const throw (BadCharException);
183  int getGeneric(const std::vector<int>& states) const throw (BadIntException);
184  std::string getGeneric(const std::vector<std::string>& states) const throw (BadCharException);
185 
186 private:
192  bool containsUnresolved(const std::string& state) const throw (BadCharException);
193  bool containsGap(const std::string& state) const throw (BadCharException);
194  void build_();
197 public:
210  const Alphabet* getNAlphabet(size_t n) const
211  {
212  if (n >= vAbsAlph_.size())
213  throw IndexOutOfBoundsException("WordAlphabet::getNPosition", n, 0, vAbsAlph_.size());
214 
215  return vAbsAlph_[n];
216  }
217 
227  virtual int getWord(const std::vector<int>& vint, size_t pos = 0) const throw (IndexOutOfBoundsException);
228 
239  virtual std::string getWord(const std::vector<std::string>& vpos, size_t pos = 0) const throw (IndexOutOfBoundsException, BadCharException);
240 
248  int getNPosition(int word, size_t n) const throw (BadIntException)
249  {
250  if (n >= vAbsAlph_.size())
251  throw IndexOutOfBoundsException("WordAlphabet::getNPosition", n, 0, vAbsAlph_.size());
252 
253  std::string s = intToChar(word);
254  return vAbsAlph_[n]->charToInt(s.substr(n, 1));
255  }
256 
264  std::vector<int> getPositions(int word) const throw (BadIntException)
265  {
266  std::string s = intToChar(word);
267  std::vector<int> positions;
268  for (size_t i = 0; i < s.size(); i++)
269  {
270  positions.push_back(vAbsAlph_[i]->charToInt(s.substr(i, 1)));
271  }
272 
273  return positions;
274  }
282  std::string getNPosition(const std::string& word, size_t n) const throw (BadCharException)
283  {
284  if (n > vAbsAlph_.size())
285  throw BadCharException("", "WordAlphabet::getNPosition", this);
286  // Test:
287  charToInt(word);
288 
289  return "" + word.substr(n, 1);
290  }
291 
292 
300  std::vector<std::string> getPositions(const std::string& word) const throw (BadCharException)
301  {
302  charToInt(word);
303  std::vector<std::string> positions;
304  for (size_t i = 0; i < word.size(); i++)
305  {
306  positions.push_back(word.substr(i, 1));
307  }
308 
309  return positions;
310  }
311 
321  Sequence* translate(const Sequence &sequence, size_t = 0) const throw (AlphabetMismatchException, Exception);
322 
331  Sequence* reverse(const Sequence& sequence) const throw (AlphabetMismatchException, Exception);
332 
339  unsigned int getStateCodingSize() const { return static_cast<unsigned int>(vAbsAlph_.size()); }
341 };
342 } // end of namespace bpp.
343 
344 #endif // _WORDALPHABET_H_
345 
An alphabet exception thrown when trying to specify a bad char to the alphabet.
Sequence * reverse(const Sequence &sequence) const
Translate a whole sequence from words alphabet to letters alphabet.
int charToInt(const std::string &state) const
Give the int description of a state given its string description.
Definition: WordAlphabet.h:127
int getGeneric(const std::vector< int > &states) const
Get the generic state that match a set of states.
const Alphabet * getNAlphabet(size_t n) const
Get the pointer to the Alphabet of the n-position.
Definition: WordAlphabet.h:210
bool containsUnresolved(const std::string &state) const
This alphabet is used to deal NumericAlphabet.
WordAlphabet * clone() const
Definition: WordAlphabet.h:102
Sequence * translate(const Sequence &sequence, size_t=0) const
Translate a whole sequence from letters alphabet to words alphabet.
unsigned int getLength() const
Returns the length of the word.
Definition: WordAlphabet.h:156
The Alphabet interface.
Definition: Alphabet.h:130
STL namespace.
int charToInt(const std::string &state) const
Give the int description of a state given its string description.
int getNPosition(int word, size_t n) const
Get the int code of the n-position of a word given its int description.
Definition: WordAlphabet.h:248
virtual ~WordAlphabet()
Definition: WordAlphabet.h:107
AbstractAlphabet & operator=(const AbstractAlphabet &alph)
The base class for word alphabets.
Definition: WordAlphabet.h:66
bool isUnresolved(const std::string &state) const
Definition: WordAlphabet.h:179
WordAlphabet & operator=(const WordAlphabet &bia)
Definition: WordAlphabet.h:95
std::string getAlphabetType() const
Identification method.
unsigned int getSize() const
Get the number of resolved states in the alphabet (e.g. return 4 for DNA alphabet). This is the method you&#39;ll need in most cases.
Definition: WordAlphabet.h:138
WordAlphabet(const std::vector< const Alphabet *> &vAlpha)
Builds a new word alphabet from a vector of Alphabets.
unsigned int getStateCodingSize() const
Get the size of the string coding a state.
Definition: WordAlphabet.h:339
std::vector< int > getPositions(int word) const
Get the int codes of each position of a word given its int description.
Definition: WordAlphabet.h:264
std::string getNPosition(const std::string &word, size_t n) const
Get the char code of the n-position of a word given its char description.
Definition: WordAlphabet.h:282
bool containsGap(const std::string &state) const
WordAlphabet(const WordAlphabet &bia)
Definition: WordAlphabet.h:93
std::vector< const Alphabet *> vAbsAlph_
Definition: WordAlphabet.h:70
A partial implementation of the Alphabet interface.
The sequence interface.
Definition: Sequence.h:74
virtual int getWord(const std::vector< int > &vint, size_t pos=0) const
Get the int code for a word given the int code of the underlying positions.
bool isUnresolved(int state) const
Definition: WordAlphabet.h:178
An alphabet exception thrown when trying to specify a bad int to the alphabet.
unsigned int getNumberOfTypes() const
Returns the number of resolved states + one for unresolved.
Definition: WordAlphabet.h:166
int getUnknownCharacterCode() const
Definition: WordAlphabet.h:173
Exception thrown when two alphabets do not match.
bool hasUniqueAlphabet() const
Returns True if the Alphabet of the letters in the word are the same type.
std::vector< std::string > getPositions(const std::string &word) const
Get the char codes of each position of a word given its char description.
Definition: WordAlphabet.h:300
std::string getName(const std::string &state) const
Get the complete name of a state given its string description.
std::string intToChar(int state) const
Give the string description of a state given its int description.
unsigned int getNumberOfChars() const
Get the number of supported characters in this alphabet, including generic characters (e...
std::vector< int > getAlias(int state) const
Get all resolved states that match a generic state.