bpp-seq  2.2.0
SequenceWithQualityTools.cpp
Go to the documentation of this file.
1 //
2 // File: SequenceWithQualityTools.h
3 // Authors: Vincent Cahais
4 // Sylvain Gaillard
5 // Created on: 16 Apr 2010
6 //
7 
8 /*
9 Copyright or © or Copr. Bio++ Development Team, (Apr 16, 2010)
10 
11 This software is a computer program whose purpose is to provide classes
12 for sequences analysis.
13 
14 This software is governed by the CeCILL license under French law and
15 abiding by the rules of distribution of free software. You can use,
16 modify and/ or redistribute the software under the terms of the CeCILL
17 license as circulated by CEA, CNRS and INRIA at the following URL
18 "http://www.cecill.info".
19 
20 As a counterpart to the access to the source code and rights to copy,
21 modify and redistribute granted by the license, users are provided only
22 with a limited warranty and the software's author, the holder of the
23 economic rights, and the successive licensors have only limited
24 liability.
25 
26 In this respect, the user's attention is drawn to the risks associated
27 with loading, using, modifying and/or developing or reproducing the
28 software by the user in light of its specific status of free software,
29 that may mean that it is complicated to manipulate, and that also
30 therefore means that it is reserved for developers and experienced
31 professionals having in-depth computer knowledge. Users are therefore
32 encouraged to load and test the software's suitability as regards their
33 requirements in conditions enabling the security of their systems and/or
34 data to be ensured and, more generally, to use and operate it in the
35 same conditions as regards security.
36 
37 The fact that you are presently reading this means that you have had
38 knowledge of the CeCILL license and that you accept its terms.
39 */
40 
42 
43 using namespace bpp;
44 using namespace std;
45 
51 
52 /******************************************************************************/
53 
54 SequenceWithQuality * SequenceWithQualityTools::subseq(const SequenceWithQuality & sequence, unsigned int begin, unsigned int end) throw (IndexOutOfBoundsException, Exception)
55 {
56  // Checking interval
57  if (end >= sequence.size()) throw IndexOutOfBoundsException ("SequenceTools::subseq : Invalid upper bound", end, 0, sequence.size());
58  if (end < begin) throw Exception ("SequenceTools::subseq : Invalid interval");
59 
60  // Copy sequence
61  vector<int> temp(sequence.getContent());
62  vector<int> qualtemp(sequence.getQualities());
63 
64  // Truncate sequence
65  temp.erase(temp.begin() + end + 1, temp.end());
66  temp.erase(temp.begin(), temp.begin() + begin);
67  qualtemp.erase(qualtemp.begin() + end + 1, qualtemp.end());
68  qualtemp.erase(qualtemp.begin(), qualtemp.begin() + begin);
69 
70  // New sequence creation
71  return new SequenceWithQuality(sequence.getName(), temp, qualtemp, sequence.getComments(), sequence.getAlphabet());
72 
73 }
74 
75 /******************************************************************************/
76 
78 {
79  // Sequence's alphabets matching verification
80  if ((seqwq1.getAlphabet()->getAlphabetType()) != (seqwq2.getAlphabet()->getAlphabetType()))
81  throw AlphabetMismatchException("SequenceTools::concatenate : Sequence's alphabets don't match ", seqwq1.getAlphabet(), seqwq2.getAlphabet());
82 
83  // Sequence's names matching verification
84  if (seqwq1.getName() != seqwq2.getName())
85  throw Exception ("SequenceTools::concatenate : Sequence's names don't match");
86 
87  // Concatenate sequences and send result
88  vector<int> sequence = seqwq1.getContent();
89  vector<int> sequence2 = seqwq2.getContent();
90  vector<int> qualities = seqwq1.getQualities();
91  vector<int> qualities2 = seqwq2.getQualities();
92 
93  sequence.insert(sequence.end(), sequence2.begin(), sequence2.end());
94  qualities.insert(qualities.end(), qualities2.begin(), qualities2.end());
95  return new SequenceWithQuality(seqwq1.getName(), sequence, qualities, seqwq1.getComments(), seqwq1.getAlphabet());
96 }
97 
98 /******************************************************************************/
99 
101 {
102  // Alphabet type checking
104  if (sequence.getAlphabet()->getAlphabetType() == "DNA alphabet")
105  {
106  NAR = &_DNARep;
107  }
108  else if(sequence.getAlphabet()->getAlphabetType() == "RNA alphabet")
109  {
110  NAR = &_RNARep;
111  }
112  else
113  {
114  throw AlphabetException ("SequenceTools::complement : Sequence must be nucleic.", sequence.getAlphabet());
115  }
116  Sequence * seq = NAR->translate(sequence);
117  SequenceWithQuality * seqwq = new SequenceWithQuality(sequence.getName(), seq->getContent(), sequence.getQualities(), sequence.getComments(), sequence.getAlphabet());
118  delete seq;
119  return seqwq;
120 }
121 
122 /******************************************************************************/
123 
125 {
126  // Alphabet type checking
127  if (sequence.getAlphabet()->getAlphabetType() != "DNA alphabet")
128  {
129  throw AlphabetException ("SequenceTools::transcript : Sequence must be DNA", sequence.getAlphabet());
130  }
131  Sequence * seq = _transc.translate(sequence);
132  SequenceWithQuality * seqwq = new SequenceWithQuality(sequence.getName(), seq->getContent(), sequence.getQualities(), sequence.getComments(), sequence.getAlphabet());
133  delete seq;
134  return seqwq;
135 }
136 
137 /******************************************************************************/
138 
140 {
141  // Alphabet type checking
142  if (sequence.getAlphabet()->getAlphabetType() != "RNA alphabet")
143  {
144  throw AlphabetException ("SequenceTools::reverseTranscript : Sequence must be RNA", sequence.getAlphabet());
145  }
146 
147  Sequence * seq = _transc.reverse(sequence);
148  SequenceWithQuality * seqwq = new SequenceWithQuality(sequence.getName(), seq->getContent(), sequence.getQualities(), sequence.getComments(), sequence.getAlphabet());
149  delete seq;
150  return seqwq;
151 }
152 
153 /******************************************************************************/
154 
156 {
157  vector<int> iContent(sequence.getContent().rbegin(),sequence.getContent().rend());
158  vector<int> iQualities(sequence.getQualities().rbegin(),sequence.getQualities().rend());
159  SequenceWithQuality* iSeq = sequence.clone();
160  iSeq->setContent(iContent);
161  iSeq->setQualities(iQualities);
162 
163  return iSeq;
164 }
165 
166 /******************************************************************************/
167 
169 {
170  vector<int> content;
171  vector<int> qualities;
172  const Alphabet * alpha = seq.getAlphabet();
173  for(unsigned int i = 0; i < seq.size(); i++)
174  {
175  if(! alpha->isGap(seq[i]))
176  {
177  content.push_back(seq[i]);
178  qualities.push_back(seq.getQualities()[i]);
179  }
180  }
181  SequenceWithQuality * newSeq = dynamic_cast<SequenceWithQuality *>(seq.clone());
182  newSeq->setContent(content);
183  newSeq->setQualities(qualities);
184  return newSeq;
185 }
186 
187 /******************************************************************************/
188 
190  bool badqual = false;
191  while (badqual) {
192  }
193  return seq;
194 }
virtual const std::vector< int > & getContent() const
Get the whole content of the list as a vector of int.
Definition: SymbolList.h:603
static SequenceWithQuality * invert(const SequenceWithQuality &sequence)
Inverse a sequence from 5&#39;->3&#39; to 3&#39;->5&#39; and vice-versa.
virtual void setContent(const std::string &sequence)
Set the whole content of the sequence.
static SequenceWithQuality * reverseTranscript(const SequenceWithQuality &sequence)
Get the reverse-transcription sequence of a RNA sequence.
virtual const Alphabet * getAlphabet() const
Get the alphabet associated to the list.
Definition: SymbolList.h:599
virtual bool isGap(int state) const =0
This alphabet is used to deal NumericAlphabet.
The Alphabet interface.
Definition: Alphabet.h:130
static SequenceWithQuality * removeGaps(const SequenceWithQuality &seq)
Remove gaps from a SequenceWithQuality.
STL namespace.
static SequenceWithQuality & trimLeft(SequenceWithQuality &seq)
Trim the left part of the sequence according to quality.
A SequenceWithAnnotation class with quality scores attached.
static NucleicAcidsReplication _transc
int translate(int state) const
Translate a given state coded as a int from source alphabet to target alphabet.
static SequenceWithQuality * complement(const SequenceWithQuality &sequence)
Get the complementary sequence of a nucleotide sequence.
static SequenceWithQuality * concatenate(const SequenceWithQuality &seqwq1, const SequenceWithQuality &seqwq2)
Concatenate two sequences.
static SequenceWithQuality * transcript(const SequenceWithQuality &sequence)
Get the transcription sequence of a DNA sequence.
static SequenceWithQuality * subseq(const SequenceWithQuality &sequence, unsigned int begin, unsigned int end)
Get a sub-sequence.
NucleicAcidsReplication SequenceWithQualityTools::_DNARep & _DNA
The alphabet exception base class.
static NucleicAcidsReplication _RNARep
NucleicAcidsReplication SequenceWithQualityTools::_RNARep & _RNA
void setQualities(const std::vector< int > &quality)
Set the whole quality scores.
virtual const std::vector< int > & getContent() const =0
Get the whole content of the list as a vector of int.
This alphabet is used to deal with DNA sequences.
Definition: DNA.h:60
The sequence interface.
Definition: Sequence.h:74
const std::vector< int > & getQualities() const
Get the whole quality scores.
static NucleicAcidsReplication _DNARep
This alphabet is used to deal with RNA sequences.
Definition: RNA.h:58
Exception thrown when two alphabets do not match.
virtual size_t size() const
Get the number of elements in the list.
Definition: SymbolList.h:601
Replication between to nucleic acids.
SequenceWithQuality * clone() const