bpp-seq  2.2.0
RNY.cpp
Go to the documentation of this file.
1 //
2 // File: RNY.cpp
3 // Created by: Laurent Gueguen
4 // Created on: Tue Jul 31 2007
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10  This software is a computer program whose purpose is to provide
11  classes for sequences analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided
21  only with a limited warranty and the software's author, the holder of
22  the economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards
32  their requirements in conditions enabling the security of their
33  systems and/or data to be ensured and, more generally, to use and
34  operate it in the same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #include "RNY.h" // class's header file
41 
42 // From Utils:
43 #include <Bpp/Text/TextTools.h>
44 #include "AlphabetTools.h"
45 
46 #include <iostream>
47 using namespace std;
48 using namespace bpp;
49 
50 /****************************************************************************************/
51 
52 RNY::RNY(const NucleicAlphabet& na) : nuclalph_(na)
53 {
54  // Initialization:
55  vector<AlphabetState*> states;
56  for (int i = 0; i < 351; ++i)
57  {
58  states.push_back(new AlphabetState(i, TextTools::toString(i), ""));
59  }
60 
61  // Alphabet content definition:
62 
63  string s1;
64 
66  s1 = "RCT-";
67  else
68  s1 = "RCU-";
69 
70  string s2;
71 
73  s2 = "AGCT-";
74  else
75  s2 = "AGCU-";
76 
77  string s3 = "AGY-";
78  string s = " ";
79 
80 
81  // NNN (0->35)
82 
83  for (size_t i = 0; i < 3; ++i)
84  {
85  for (size_t j = 0; j < 4; ++j)
86  {
87  for (size_t k = 0; k < 3; ++k)
88  {
89  size_t l = i * 12 + j * 3 + k;
90  s[0] = s1[i];
91  s[1] = s2[j];
92  s[2] = s3[k];
93  states[l] = new AlphabetState(static_cast<int>(l), s, s);
94  }
95  }
96  }
97 
98  // NN- (50->83)
99 
100  for (size_t i = 0; i < 3; ++i)
101  {
102  for (size_t j = 0; j < 4; ++j)
103  {
104  size_t l = 50 + 12 * i + j * 3;
105  s[0] = s1[i];
106  s[1] = s2[j];
107  s[2] = s3[3];
108  states[l] = new AlphabetState(static_cast<int>(l), s, s);
109  }
110  }
111 
112  // N-N (100->126)
113 
114  for (size_t i = 0; i < 3; ++i)
115  {
116  for (size_t k = 0; k < 3; ++k)
117  {
118  size_t l = 100 + 12 * i + k;
119  s[0] = s1[i];
120  s[1] = s2[4];
121  s[2] = s3[k];
122  states[l] = new AlphabetState(static_cast<int>(l), s, s);
123  }
124  }
125 
126  // N-- (150->152)
127 
128  for (size_t i = 0; i < 3; ++i)
129  {
130  size_t l = 150 + 12 * i;
131  s[0] = s1[i];
132  s[1] = s2[4];
133  s[2] = s3[3];
134  states[l] = new AlphabetState(static_cast<int>(l), s, s);
135  }
136 
137  // -NN (200->211)
138 
139  for (size_t j = 0; j < 4; ++j)
140  {
141  for (size_t k = 0; k < 3; ++k)
142  {
143  size_t l = 200 + j * 3 + k;
144  s[0] = s1[3];
145  s[1] = s2[j];
146  s[2] = s3[k];
147  states[l] = new AlphabetState(static_cast<int>(l), s, s);
148  }
149  }
150 
151 
152  // -N- (250->253)
153 
154  for (size_t j = 0; j < 4; ++j)
155  {
156  size_t l = 250 + 3 * j;
157  s[0] = s1[3];
158  s[1] = s2[j];
159  s[2] = s3[3];
160  states[l] = new AlphabetState(static_cast<int>(l), s, s);
161  }
162 
163  // --N (300->302)
164 
165  for (size_t k = 0; k < 3; ++k)
166  {
167  size_t l = 300 + k;
168  s[0] = s1[3];
169  s[1] = s2[4];
170  s[2] = s3[k];
171  states[l] = new AlphabetState(static_cast<int>(l), s, s);
172  }
173 
174 
175  // --- (350)
176 
177  s[0] = s1[3];
178  s[1] = s2[4];
179  s[2] = s3[3];
180  states[350] = new AlphabetState(350, s, s);
181 
182  // Register all states:
183  for (size_t i = 0; i < states.size(); ++i)
184  registerState(states[i]);
185 }
186 
187 /****************************************************************************************/
188 
189 vector<int> RNY::getAlias(int state) const throw (BadIntException)
190 {
191  if (!isIntInAlphabet(state))
192  throw BadIntException(state, "RNY::getAlias(int): Specified base unknown.");
193  vector<int> v;
194 
195  int qs = state / 50;
196  int rs = state % 50;
197  int i, j, k;
198 
199  switch (qs)
200  {
201  case 0: // NNN
202  v.resize(1);
203  v[0] = rs;
204  break;
205  case 1: // NN-
206  v.resize(3);
207  for (k = 0; k < 3; ++k)
208  {
209  v[static_cast<size_t>(k)] = k + rs;
210  }
211  break;
212  case 2: // N-N
213  v.resize(4);
214  for (j = 0; j < 4; ++j)
215  {
216  v[static_cast<size_t>(j)] = 3 * j + rs;
217  }
218  break;
219  case 3: // N--
220  v.resize(12);
221  for (j = 0; j < 4; ++j)
222  {
223  for (k = 0; k < 3; ++k)
224  {
225  v[static_cast<size_t>(3 * j + k)] = rs + 3 * j + k;
226  }
227  }
228  break;
229  case 4: // -NN
230  v.resize(3);
231  for (i = 0; i < 3; ++i)
232  {
233  v[static_cast<size_t>(i)] = 12 * i + rs;
234  }
235  break;
236  case 5: // -N-
237  v.resize(9);
238  for (i = 0; i < 3; ++i)
239  {
240  for (k = 0; k < 3; ++k)
241  {
242  v[static_cast<size_t>(3 * i + k)] = rs + 12 * i + k;
243  }
244  }
245  break;
246  case 6: // --N
247  v.resize(12);
248  for (i = 0; i < 3; ++i)
249  {
250  for (j = 0; j < 4; ++j)
251  {
252  v[static_cast<size_t>(4 * i + j)] = rs + 12 * i + 3 * j;
253  }
254  }
255  break;
256  case 7: // ---
257  v.resize(36);
258  for (i = 0; i < 3; ++i)
259  {
260  for (j = 0; j < 4; ++j)
261  {
262  for (k = 0; k < 3; ++k)
263  {
264  v[static_cast<size_t>(12 * i + 3 * j + k)] = 12 * i + 3 * j + k;
265  }
266  }
267  }
268  break;
269  }
270  return v;
271 }
272 
274 {
275  return nuclalph_;
276 }
277 
278 /****************************************************************************************/
279 
280 vector<string> RNY::getAlias(const string& state) const throw (BadCharException)
281 {
282  if (!isCharInAlphabet(state))
283  throw BadCharException(state, "RNY::getAlias(int): Specified base unknown.");
284 
285  vector<int> v = getAlias(charToInt(state));
286  vector<string> s;
287  size_t size = v.size();
288  s.resize(size);
289 
290  for (size_t i = 0; i < size; i++)
291  {
292  s[i] = AbstractAlphabet::intToChar(v[i]);
293  }
294  return s;
295 }
296 
297 /****************************************************************************************/
298 
299 string RNY::getRNY(const string& pos1, const string& pos2, const string& pos3) const throw (BadCharException)
300 {
301  string tr;
302 
303  if (pos1 == "A" || pos1 == "G")
304  tr = "R";
305  else
306  tr = pos1;
307 
308  tr += pos2;
309 
310  if (pos3 == "T" || pos3 == "U" || pos3 == "C")
311  tr += "Y";
312  else
313  tr += pos3;
314 
315  // teste triplet;
316  charToInt(tr);
317  return tr;
318 }
319 
320 /**************************************************************************************/
321 int RNY::getRNY(int i, int j, int k, const Alphabet& alph) const throw (BadCharException)
322 {
324  {
325  throw AlphabetException ("RNY::getRNY : Sequence must be Nucleic",
326  &alph);
327  }
328 
329  char li = alph.intToChar(i)[0];
330  char lj = alph.intToChar(j)[0];
331  char lk = alph.intToChar(k)[0];
332 
333  int r = 0;
334  int s = 0;
335 
336  switch (li)
337  {
338  case 'A':
339  case 'G':
340  r += 0;
341  break;
342  case 'C':
343  r += 1;
344  break;
345  case 'T':
346  case 'U':
347  r += 2;
348  break;
349  case '-':
350  case 'N':
351  s += 1;
352  break;
353  default:
354  throw BadCharException(&li, "RNY::getRNY(int,int;int,alph): Specified base unknown.");
355  }
356 
357  r *= 4;
358  s *= 2;
359 
360  switch (lj)
361  {
362  case 'A':
363  r += 0;
364  break;
365  case 'G':
366  r += 1;
367  break;
368  case 'C':
369  r += 2;
370  break;
371  case 'T':
372  case 'U':
373  r += 3;
374  break;
375  case '-':
376  case 'N':
377  s += 1;
378  break;
379  default:
380  throw BadCharException(&lj, "RNY::getRNY(int,int;int,alph): Specified base unknown.");
381  }
382 
383  r *= 3;
384  s *= 2;
385 
386  switch (lk)
387  {
388  case 'A':
389  r += 0;
390  break;
391  case 'G':
392  r += 1;
393  break;
394  case 'C':
395  case 'T':
396  case 'U':
397  r += 2;
398  break;
399  case '-':
400  case 'N':
401  s += 1;
402  break;
403  default:
404  throw BadCharException(&lk, "RNY::getRNY(int,int;int,alph): Specified base unknown.");
405  }
406 
407  return 50 * s + r;
408 }
409 
410 /****************************************************************************************/
411 bool RNY::isGap(int state) const
412 {
413  return state == 350;
414 }
415 
416 bool RNY::containsGap(const string& state) const throw (BadCharException)
417 {
418  return state.find("-") != string::npos;
419 }
420 
421 bool RNY::isUnresolved(const string& state) const
422 {
423  return containsGap(state);
424 }
425 
426 bool RNY::isUnresolved(int state) const
427 {
428  return state >= 50 && state != 350;
429 }
430 
431 /****************************************************************************************/
432 
433 int RNY::charToInt(const string& state) const throw (BadCharException)
434 {
435  if (state.size() != 3)
436  throw BadCharException(state, "RNY::charToInt", this);
437  else
438  return AbstractAlphabet::charToInt(state);
439 }
440 
441 
442 /************************************************************/
443 
444 string RNY::intToChar(int state) const throw (BadIntException)
445 {
446  int i, j, k, l;
447  for (i = 0; i < 3; ++i)
448  {
449  for (j = 0; j < 4; ++j)
450  {
451  for (k = 0; k < 3; ++k)
452  {
453  l = i * 12 + j * 3 + k;
454  if (getState(l).getNum() == state)
455  return getState(l).getLetter();
456  }
457  }
458  }
459 
460  // NN- (50->83)
461 
462  for (i = 0; i < 3; ++i)
463  {
464  for (j = 0; j < 4; ++j)
465  {
466  l = 50 + 12 * i + j * 3;
467  if (getState(l).getNum() == state)
468  return getState(l).getLetter();
469  }
470  }
471 
472  // N-N (100->126)
473 
474  for (i = 0; i < 3; ++i)
475  {
476  for (k = 0; k < 3; ++k)
477  {
478  l = 100 + 12 * i + k;
479  if (getState(l).getNum() == state)
480  return getState(l).getLetter();
481  }
482  }
483 
484  // N-- (150->152)
485 
486  for (i = 0; i < 3; ++i)
487  {
488  l = 150 + 12 * i;
489  if (getState(l).getNum() == state)
490  return getState(l).getLetter();
491  }
492 
493  // -NN (200->211)
494 
495  for (j = 0; j < 4; ++j)
496  {
497  for (k = 0; k < 3; ++k)
498  {
499  l = 200 + j * 3 + k;
500  if (getState(l).getNum() == state)
501  return getState(l).getLetter();
502  }
503  }
504 
505 
506  // -N- (250->253)
507 
508  for (j = 0; j < 4; ++j)
509  {
510  l = 250 + 3 * j;
511  if (getState(l).getNum() == state)
512  return getState(l).getLetter();
513  }
514 
515  // --N (300->302)
516 
517  for (k = 0; k < 3; ++k)
518  {
519  l = 300 + k;
520  if (getState(l).getNum() == state)
521  return getState(l).getLetter();
522  }
523 
524 
525  // --- (350)
526 
527  l = 350;
528  if (getState(l).getNum() == state)
529  return getState(l).getLetter();
530 
531  throw BadIntException(state, "RNY::intToChar: Specified base unknown", this);
532  return "XXX";
533 }
This is the base class to describe states in an Alphabet.
Definition: AlphabetState.h:54
An alphabet exception thrown when trying to specify a bad char to the alphabet.
bool isGap(int state) const
Definition: RNY.cpp:411
const NucleicAlphabet & getLetterAlphabet() const
Definition: RNY.cpp:273
This alphabet is used to deal NumericAlphabet.
The Alphabet interface.
Definition: Alphabet.h:130
STL namespace.
int charToInt(const std::string &state) const
Give the int description of a state given its string description.
const NucleicAlphabet & nuclalph_
Definition: RNY.h:68
bool containsGap(const std::string &state) const
Definition: RNY.cpp:416
bool isUnresolved(int state) const
Definition: RNY.cpp:426
static bool isNucleicAlphabet(const Alphabet *alphabet)
static bool isDNAAlphabet(const Alphabet *alphabet)
The alphabet exception base class.
std::string intToChar(int state) const
Give the string description of a state given its int description.
Definition: RNY.cpp:444
std::string getRNY(const std::string &, const std::string &, const std::string &) const
Get the char code for a triplet given the char code of the three underlying positions.
Definition: RNY.cpp:299
int charToInt(const std::string &state) const
Give the int description of a state given its string description.
Definition: RNY.cpp:433
std::vector< int > getAlias(int state) const
Get all resolved states that match a generic state.
Definition: RNY.cpp:189
An alphabet exception thrown when trying to specify a bad int to the alphabet.
virtual void registerState(AlphabetState *st)
Add a state to the Alphabet.
The abstract base class for nucleic alphabets.
std::string intToChar(int state) const
Give the string description of a state given its int description.