bpp-seq  2.2.0
Sequence.cpp
Go to the documentation of this file.
1 //
2 // File: Sequence.cpp
3 // Created by: Guillaume Deuchst
4 // Julien Dutheil
5 // Created on: Tue Aug 21 2003
6 //
7 
8 /*
9 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
10 
11 This software is a computer program whose purpose is to provide classes
12 for sequences analysis.
13 
14 This software is governed by the CeCILL license under French law and
15 abiding by the rules of distribution of free software. You can use,
16 modify and/ or redistribute the software under the terms of the CeCILL
17 license as circulated by CEA, CNRS and INRIA at the following URL
18 "http://www.cecill.info".
19 
20 As a counterpart to the access to the source code and rights to copy,
21 modify and redistribute granted by the license, users are provided only
22 with a limited warranty and the software's author, the holder of the
23 economic rights, and the successive licensors have only limited
24 liability.
25 
26 In this respect, the user's attention is drawn to the risks associated
27 with loading, using, modifying and/or developing or reproducing the
28 software by the user in light of its specific status of free software,
29 that may mean that it is complicated to manipulate, and that also
30 therefore means that it is reserved for developers and experienced
31 professionals having in-depth computer knowledge. Users are therefore
32 encouraged to load and test the software's suitability as regards their
33 requirements in conditions enabling the security of their systems and/or
34 data to be ensured and, more generally, to use and operate it in the
35 same conditions as regards security.
36 
37 The fact that you are presently reading this means that you have had
38 knowledge of the CeCILL license and that you accept its terms.
39 */
40 
41 #include "Sequence.h" // class's header file
42 
43 #include "Alphabet/AlphabetTools.h"
44 #include "StringSequenceTools.h"
45 #include <Bpp/Text/TextTools.h>
46 
47 using namespace bpp;
48 
49 // From the STL:
50 #include <iostream>
51 
52 using namespace std;
53 
54 /* Constructors: **************************************************************/
55 
57  BasicSymbolList(alpha),
58  name_(),
59  comments_()
60 {}
61 
62 BasicSequence::BasicSequence(const std::string& name, const std::string& sequence, const Alphabet* alpha)
63 throw (BadCharException) :
64  BasicSymbolList(alpha),
65  name_(name),
66  comments_()
67 {
68  if (sequence!="")
69  setContent(sequence);
70 }
71 
72 BasicSequence::BasicSequence(const std::string& name, const std::string& sequence, const Comments& comments, const Alphabet* alpha)
73  throw (BadCharException) :
74  BasicSymbolList(alpha),
75  name_(name),
76  comments_(comments)
77 {
78  if (sequence!="")
79  setContent(sequence);
80 }
81 
82 BasicSequence::BasicSequence(const std::string& name, const std::vector<std::string>& sequence, const Alphabet* alpha)
83 throw (BadCharException) :
84  BasicSymbolList(sequence, alpha),
85  name_(name),
86  comments_()
87 {}
88 
89 BasicSequence::BasicSequence(const std::string& name, const std::vector<std::string>& sequence, const Comments& comments, const Alphabet* alpha)
90  throw (BadCharException) :
91  BasicSymbolList(sequence, alpha),
92  name_(name),
93  comments_(comments)
94 {}
95 
96 BasicSequence::BasicSequence(const std::string& name, const std::vector<int>& sequence, const Alphabet* alpha)
97  throw (BadIntException) :
98  BasicSymbolList(sequence, alpha),
99  name_(name),
100  comments_()
101 {}
102 
103 BasicSequence::BasicSequence(const std::string& name, const std::vector<int>& sequence, const Comments& comments, const Alphabet* alpha)
104  throw (BadIntException) :
105  BasicSymbolList(sequence, alpha),
106  name_(name),
107  comments_(comments)
108 {}
109 
110 /* Copy constructors: *********************************************************/
111 
113  BasicSymbolList(s),
114  name_(s.getName()),
115  comments_(s.getComments())
116 {}
117 
119  BasicSymbolList(s),
120  name_(s.getName()),
121  comments_(s.getComments())
122 {}
123 
124 /* Assignation operator: ******************************************************/
125 
127 {
129  name_ = s.getName();
130  comments_ = s.getComments();
131  return *this;
132 }
133 
135 {
137  name_ = s.getName();
138  comments_ = s.getComments();
139  return *this;
140 }
141 
142 /******************************************************************************/
143 
144 void BasicSequence::setContent(const std::string& sequence) throw (BadCharException)
145 {
146  // Remove blanks in sequence
147  content_ = StringSequenceTools::codeSequence(TextTools::removeWhiteSpaces(sequence), getAlphabet());
148  //Warning, an exception may be thrown here!
149 }
150 
151 /******************************************************************************/
152 
153 void BasicSequence::setToSizeR(size_t newSize)
154 {
155  // Size verification
156  size_t seqSize = content_.size();
157  if (newSize == seqSize) return;
158 
159  if (newSize < seqSize)
160  {
161  content_.resize(newSize);
162  return;
163  }
164 
165  // Add gaps up to specified size
166  int gap = getAlphabet()->getGapCharacterCode();
167  while (content_.size() < newSize) content_.push_back(gap);
168 }
169 
170 /******************************************************************************/
171 
172 void BasicSequence::setToSizeL(size_t newSize)
173 {
174  // Size verification
175  size_t seqSize = content_.size();
176  if (newSize == seqSize) return;
177 
178  if (newSize < seqSize)
179  {
180  //We must truncate sequence from the left.
181  //This is a very unefficient method!
182  content_.erase(content_.begin(), content_.begin() + static_cast<ptrdiff_t>(seqSize - newSize));
183  return;
184  }
185 
186  // Add gaps up to specified size
187  int gap = getAlphabet()->getGapCharacterCode();
188  content_.insert(content_.begin(), newSize - seqSize, gap);
189 }
190 
191 /******************************************************************************/
192 
193 void BasicSequence::append(const std::vector<int>& content) throw (BadIntException)
194 {
195  // Check list for incorrect characters
196  for (size_t i = 0; i < content.size(); i++)
197  if(!getAlphabet()->isIntInAlphabet(content[i]))
198  throw BadIntException(content[i], "BasicSequence::append", getAlphabet());
199  //BasicSequence is valid:
200  for (size_t i = 0; i < content.size(); i++)
201  content_.push_back(content[i]);
202 }
203 
204 void BasicSequence::append(const std::vector<std::string>& content) throw (BadCharException)
205 {
206  // Check list for incorrect characters
207  for (size_t i = 0; i < content.size(); i++)
208  if(!getAlphabet()->isCharInAlphabet(content[i]))
209  throw BadCharException(content[i], "BasicSequence::append", getAlphabet());
210 
211  //BasicSequence is valid:
212  for (size_t i = 0; i < content.size(); i++)
213  content_.push_back(getAlphabet()->charToInt(content[i]));
214 }
215 
216 void BasicSequence::append(const std::string& content) throw (BadCharException)
217 {
218  append(StringSequenceTools::codeSequence(content, getAlphabet()));
219 }
220 
221 /******************************************************************************/
222 
const Comments & getComments() const
Get the comments associated to this sequence.
Definition: Sequence.h:379
An alphabet exception thrown when trying to specify a bad char to the alphabet.
std::vector< std::string > Comments
Declaration of Comments type.
Definition: Sequence.h:60
BasicSymbolList & operator=(const SymbolList &list)
The generic assignment operator.
Definition: SymbolList.cpp:69
This alphabet is used to deal NumericAlphabet.
virtual void setToSizeL(size_t newSize)
Set up the size of a sequence from the left side.
Definition: Sequence.cpp:172
The Alphabet interface.
Definition: Alphabet.h:130
STL namespace.
virtual int getGapCharacterCode() const =0
virtual const std::string & getName() const =0
Get the name of this sequence.
static std::vector< int > codeSequence(const std::string &sequence, const Alphabet *alphabet)
Convert a string sequence to a vector of int.
Comments comments_
The sequence comments.
Definition: Sequence.h:221
std::string name_
The sequence name.
Definition: Sequence.h:216
A basic SymbolList object.
Definition: SymbolList.h:264
virtual void setContent(const std::string &sequence)
Set the whole content of the sequence.
Definition: Sequence.cpp:144
virtual const Comments & getComments() const =0
Get the comments associated to this sequence.
BasicSequence & operator=(const Sequence &s)
The Sequence generic assignment operator. This does not perform a hard copy of the alphabet object...
Definition: Sequence.cpp:126
BasicSequence(const Alphabet *alpha)
Empty constructor: build a void Sequence with just an Alphabet.
Definition: Sequence.cpp:56
A basic implementation of the Sequence interface.
Definition: Sequence.h:207
std::vector< int > content_
The list content.
Definition: SymbolList.h:280
const std::string & getName() const
Get the name of this sequence.
Definition: Sequence.h:357
virtual void append(const std::vector< int > &content)
Append the specified content to the sequence.
Definition: Sequence.cpp:193
The sequence interface.
Definition: Sequence.h:74
An alphabet exception thrown when trying to specify a bad int to the alphabet.
virtual const Alphabet * getAlphabet() const
Get the alphabet associated to the list.
Definition: SymbolList.h:348
virtual void setToSizeR(size_t newSize)
Set up the size of a sequence from the right side.
Definition: Sequence.cpp:153