bpp-seq  2.2.0
AlignedSequenceContainer.cpp
Go to the documentation of this file.
1 //
2 // File: AlignedSequenceContainer.cpp
3 // Created by: Guillaume Deuchst
4 // Julien Dutheil
5 // Created on: Friday August 22 2003
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for sequences analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39  */
40 
42 
43 #include <Bpp/Text/TextTools.h>
44 
45 using namespace bpp;
46 
47 // From the STL:
48 #include <iostream>
49 
50 using namespace std;
51 
52 /***************************************************************************/
53 
55  VectorSequenceContainer(osc.getAlphabet()),
56  // We can't call the copy constructor because we want to use the overloaded addSequence method !!!
57  positions_(),
58  length_(),
59  sites_()
60 {
61  // Initializing
62  for (unsigned int i = 0; i < osc.getNumberOfSequences(); i++)
63  {
64  addSequence(osc.getSequence(i), true);
65  }
66 
67  if (osc.getNumberOfSequences() > 0)
68  length_ = getSequence(0).size(); // the overloaded
69  else
70  length_ = 0;
71 
72  reindexSites();
73  sites_.resize(length_);
74  setGeneralComments(osc.getGeneralComments());
75 }
76 
77 /***************************************************************************/
78 
80 {
82 
83  // Initializing
84  length_ = asc.getNumberOfSites();
85  positions_ = asc.getSitePositions();
86  sites_.resize(length_);
87 
88  return *this;
89 }
90 
91 /***************************************************************************/
92 
94 {
96 
97  // Initializing
98  length_ = sc.getNumberOfSites();
99  positions_ = sc.getSitePositions();
100  sites_.resize(length_);
101 
102  return *this;
103 }
104 
105 /***************************************************************************/
106 
108 {
110 
111  // Initializing
112  length_ = 0;
113  reindexSites();
114  sites_.resize(length_);
115 
116  return *this;
117 }
118 
122 {
123  // delete all sites:
124  for (unsigned int i = 0; i < sites_.size(); i++)
125  {
126  if (sites_[i])
127  delete sites_[i];
128  }
129 }
130 
131 /***************************************************************************/
132 
133 const Site& AlignedSequenceContainer::getSite(size_t i) const throw (IndexOutOfBoundsException)
134 {
135  if (i >= length_)
136  throw IndexOutOfBoundsException("AlignedSequenceContainer::getSite", i, 0, getNumberOfSites() - 1);
137 
138  // Main loop : for all sequences
139  size_t n = getNumberOfSequences();
140  std::vector<int> site(n);
141  for (size_t j = 0; j < n; j++)
142  {
143  site[j] = getSequence(j)[i];
144  }
145 
146  if (sites_[i])
147  delete sites_[i];
148  sites_[i] = new Site(site, getAlphabet(), positions_[i]);
149  return *sites_[i];
150 }
151 
152 /******************************************************************************/
153 
154 void AlignedSequenceContainer::setSite(size_t pos, const Site& site, bool checkPositions) throw (Exception)
155 {
156  // New site's alphabet and site container's alphabet matching verification
157  if (pos >= getNumberOfSites())
158  throw IndexOutOfBoundsException("AlignedSequenceContainer::setSite", pos, 0, getNumberOfSites() - 1);
159  if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
160  throw AlphabetMismatchException("AlignedSequenceContainer::setSite", getAlphabet(), site.getAlphabet());
161 
162  std::vector<int> s = site.getContent();
163 
164  // Check size:
165  if (s.size() != getNumberOfSequences())
166  throw SiteException("AlignedSequenceContainer::setSite, site does not have the appropriate length", &site);
167 
168  // Check position:
169  int position = site.getPosition();
170  if (checkPositions)
171  {
172  // For all positions in vector : throw exception if position already exists
173  for (size_t i = 0; i < positions_.size(); i++)
174  {
175  if (positions_[i] == position)
176  throw SiteException("AlignedSequenceContainer::setSite: Site position already exists in container", &site);
177  }
178  }
179 
180  // For all sequences
181  for (size_t j = 0; j < getNumberOfSequences(); j++)
182  {
183  getSequence_(j).setElement(pos, s[j]);
184  }
185  positions_[pos] = site.getPosition();
186 }
187 
188 /******************************************************************************/
189 
190 Site* AlignedSequenceContainer::removeSite(size_t pos) throw (IndexOutOfBoundsException)
191 {
192  if (pos >= getNumberOfSites())
193  throw IndexOutOfBoundsException("AlignedSequenceContainer::removeSite", pos, 0, getNumberOfSites() - 1);
194 
195  // Get old site
196  getSite(pos); // Creates the site!
197  Site* old = sites_[pos];
198 
199  // For all sequences
200  for (size_t j = 0; j < getNumberOfSequences(); j++)
201  {
202  getSequence_(j).deleteElement(pos);
203  }
204 
205  // Delete site's position
206  positions_.erase(positions_.begin() + static_cast<ptrdiff_t>(pos));
207  length_--;
208 
209  // Actualizes the 'sites' vector:
210  if (sites_[pos])
211  delete sites_[pos];
212  sites_.erase(sites_.begin() + static_cast<ptrdiff_t>(pos));
213 
214  // Send result
215  return old;
216 }
217 
218 /******************************************************************************/
219 
220 void AlignedSequenceContainer::deleteSite(size_t pos) throw (IndexOutOfBoundsException)
221 {
222  if (pos >= getNumberOfSites())
223  throw IndexOutOfBoundsException("AlignedSequenceContainer::deleteSite", pos, 0, getNumberOfSites() - 1);
224 
225  // For all sequences
226  for (size_t j = 0; j < getNumberOfSequences(); j++)
227  {
228  getSequence_(j).deleteElement(pos);
229  }
230 
231  // Delete site's position
232  positions_.erase(positions_.begin() + static_cast<ptrdiff_t>(pos));
233  length_--;
234 
235  // Actualizes the 'sites' vector:
236  if (sites_[pos])
237  delete sites_[pos];
238  sites_.erase(sites_.begin() + static_cast<ptrdiff_t>(pos));
239 }
240 
241 /******************************************************************************/
242 
243 void AlignedSequenceContainer::deleteSites(size_t siteIndex, size_t length) throw (IndexOutOfBoundsException, Exception)
244 {
245  if (siteIndex + length > getNumberOfSites())
246  throw IndexOutOfBoundsException("AlignedSequenceContainer::deleteSites", siteIndex + length, 0, getNumberOfSites() - 1);
247 
248  // For all sequences
249  for (size_t j = 0; j < getNumberOfSequences(); j++)
250  {
251  getSequence_(j).deleteElements(siteIndex, length);
252  }
253 
254  // Delete site's siteIndexition
255  positions_.erase(positions_.begin() + static_cast<ptrdiff_t>(siteIndex),
256  positions_.begin() + static_cast<ptrdiff_t>(siteIndex + length));
257  length_ -= length;
258 
259  // Actualizes the 'sites' vector:
260  for (size_t i = siteIndex; i < siteIndex + length; ++i)
261  {
262  if (sites_[i])
263  delete sites_[i];
264  }
265  sites_.erase(sites_.begin() + static_cast<ptrdiff_t>(siteIndex),
266  sites_.begin() + static_cast<ptrdiff_t>(siteIndex + length));
267 }
268 
269 /******************************************************************************/
270 
271 void AlignedSequenceContainer::addSite(const Site& site, bool checkPositions) throw (Exception)
272 {
273  // New site's alphabet and site container's alphabet matching verification
274  if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
275  throw AlphabetMismatchException("AlignedSequenceContainer::addSite");
276 
277  // Initializing
278  std::vector<int> s = site.getContent();
279 
280  // Check size:
281  if (s.size() != getNumberOfSequences())
282  throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", &site);
283 
284  // Check position:
285 
286  int position = site.getPosition();
287  if (checkPositions)
288  {
289  // For all positions in vector : throw exception if position already exists
290  for (unsigned int i = 0; i < positions_.size(); i++)
291  {
292  if (positions_[i] == position)
293  throw SiteException("AlignedSequenceContainer::addSite: Site position already exists in container", &site);
294  }
295  }
296 
297  // For all sequences
298  for (unsigned int j = 0; j < getNumberOfSequences(); j++)
299  {
300  getSequence_(j).addElement(s[j]);
301  }
302 
303  length_++;
304  positions_.push_back(position);
305 
306  // Actualizes the 'sites' vector:
307  sites_.push_back(0);
308 }
309 
310 /******************************************************************************/
311 
312 void AlignedSequenceContainer::addSite(const Site& site, int position, bool checkPositions) throw (Exception)
313 {
314  // New site's alphabet and site container's alphabet matching verification
315  if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
316  throw AlphabetMismatchException("AlignedSequenceContainer::addSite");
317 
318  // Initializing
319  std::vector<int> s = site.getContent();
320 
321  // Check size:
322  if (s.size() != getNumberOfSequences())
323  throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", &site);
324 
325  // Check position:
326 
327  if (checkPositions)
328  {
329  // For all positions in vector : throw exception if position already exists
330  for (unsigned int i = 0; i < positions_.size(); i++)
331  {
332  if (positions_[i] == position)
333  throw SiteException("AlignedSequenceContainer::addSite: Site position already exists in container", &site);
334  }
335  }
336 
337  // For all sequences
338  for (unsigned int j = 0; j < getNumberOfSequences(); j++)
339  {
340  getSequence_(j).addElement(s[j]);
341  }
342 
343  length_++;
344  positions_.push_back(position);
345 
346  // Actualizes the 'sites' vector:
347  sites_.push_back(0);
348 }
349 
350 /******************************************************************************/
351 
352 void AlignedSequenceContainer::addSite(const Site& site, size_t siteIndex, bool checkPositions) throw (Exception)
353 {
354  if (siteIndex >= getNumberOfSites())
355  throw IndexOutOfBoundsException("AlignedSequenceContainer::addSite", siteIndex, 0, getNumberOfSites() - 1);
356 
357  // New site's alphabet and site container's alphabet matching verification
358  if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
359  throw AlphabetMismatchException("AlignedSequenceContainer::addSite", getAlphabet(), site.getAlphabet());
360 
361  std::vector<int> s = site.getContent();
362 
363  // Check size:
364  if (s.size() != getNumberOfSequences())
365  throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", &site);
366 
367  // Check position:
368  int position = site.getPosition();
369  if (checkPositions)
370  {
371  // For all positions in vector : throw exception if position already exists
372  for (size_t i = 0; i < positions_.size(); i++)
373  {
374  if (positions_[i] == position)
375  throw SiteException("AlignedSequenceContainer::addSite: Site position already exists in container", &site);
376  }
377  }
378 
379  // For all sequences
380  for (size_t j = 0; j < getNumberOfSequences(); j++)
381  {
382  getSequence_(j).addElement(siteIndex, site[j]);
383  }
384 
385  length_++;
386  positions_.insert(positions_.begin() + static_cast<ptrdiff_t>(siteIndex), position);
387 
388  // Actualizes the 'sites' vector:
389  sites_.insert(sites_.begin() + static_cast<ptrdiff_t>(siteIndex), 0);
390 }
391 
392 /******************************************************************************/
393 
394 void AlignedSequenceContainer::addSite(const Site& site, size_t siteIndex, int position, bool checkPositions) throw (Exception)
395 {
396  if (siteIndex >= getNumberOfSites())
397  throw IndexOutOfBoundsException("AlignedSequenceContainer::addSite", siteIndex, 0, getNumberOfSites() - 1);
398 
399  // New site's alphabet and site container's alphabet matching verification
400  if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
401  throw AlphabetMismatchException("AlignedSequenceContainer::addSite", getAlphabet(), site.getAlphabet());
402 
403  std::vector<int> s = site.getContent();
404 
405  // Check size:
406  if (s.size() != getNumberOfSequences())
407  throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", &site);
408 
409  // Check position:
410  if (checkPositions)
411  {
412  // For all positions in vector : throw exception if position already exists
413  for (size_t i = 0; i < positions_.size(); i++)
414  {
415  if (positions_[i] == position)
416  throw SiteException("AlignedSequenceContainer::addSite: Site position already exists in container", &site);
417  }
418  }
419 
420  // For all sequences
421  for (size_t j = 0; j < getNumberOfSequences(); j++)
422  {
423  getSequence_(j).addElement(siteIndex, site[j]);
424  }
425 
426  length_++;
427  positions_.insert(positions_.begin() + static_cast<ptrdiff_t>(siteIndex), position);
428 
429  // Actualizes the 'sites' vector:
430  sites_.insert(sites_.begin() + static_cast<ptrdiff_t>(siteIndex), 0);
431 }
432 
433 /******************************************************************************/
434 
436 {
437  positions_.resize(length_);
438  for (size_t i = 0; i < length_; i++)
439  {
440  positions_[i] = static_cast<int>(i + 1); // start with 1.
441  }
442 }
443 
444 /******************************************************************************/
445 
446 void AlignedSequenceContainer::setSequence(size_t i, const Sequence& sequence, bool checkName) throw (Exception)
447 {
448  if (i >= getNumberOfSequences())
449  throw IndexOutOfBoundsException("AlignedSequenceContainer::setSequence", i, 0, getNumberOfSequences() - 1);
450  // if container has only one sequence
451  if (getNumberOfSequences() == 1)
452  length_ = sequence.size();
453  if (checkSize_(sequence))
454  VectorSequenceContainer::setSequence(i, sequence, checkName);
455  else
456  throw SequenceNotAlignedException("AlignedSequenceContainer::setSequence", &sequence);
457 }
458 
459 /******************************************************************************/
460 
461 void AlignedSequenceContainer::setSequence(const string& name, const Sequence& sequence, bool checkName) throw (Exception)
462 {
463  // if container has only one sequence
464  if (getNumberOfSequences() == 1)
465  length_ = sequence.size();
466  if (checkSize_(sequence))
467  VectorSequenceContainer::setSequence(name, sequence, checkName);
468  else
469  throw SequenceNotAlignedException("AlignedSequenceContainer::setSequence", &sequence);
470 }
471 
472 /******************************************************************************/
473 
474 void AlignedSequenceContainer::addSequence(const Sequence& sequence, bool checkName) throw (Exception)
475 {
476  // if container has only one sequence
477  if (length_ == 0)
478  {
479  length_ = sequence.size();
480  sites_.resize(length_);
481  reindexSites();
482  }
483  if (checkSize_(sequence))
484  VectorSequenceContainer::addSequence(sequence, checkName);
485  else
486  throw SequenceNotAlignedException("AlignedSequenceContainer::addSequence", &sequence);
487 }
488 
489 /******************************************************************************/
490 
491 void AlignedSequenceContainer::addSequence(const Sequence& sequence, size_t i, bool checkName) throw (Exception)
492 {
493  if (i >= getNumberOfSequences())
494  throw IndexOutOfBoundsException("AlignedSequenceContainer::addSequence", i, 0, getNumberOfSequences() - 1);
495  // if container has only one sequence
496  if (length_ == 0)
497  length_ = sequence.size();
498  if (checkSize_(sequence))
499  VectorSequenceContainer::addSequence(sequence, i, checkName);
500  else
501  throw SequenceNotAlignedException("AlignedSequenceContainer::addSequence", &sequence);
502 }
503 
504 /******************************************************************************/
505 
507 {
508  length_ = 0;
510 }
511 
512 /******************************************************************************/
513 
515 {
516  AlignedSequenceContainer* asc = new AlignedSequenceContainer(getAlphabet());
517  asc->setGeneralComments(getGeneralComments());
518  return asc;
519 }
520 
521 /******************************************************************************/
522 
523 
Vint getSitePositions() const
Get all position attributes of sites.
const Site & getSite(size_t siteIndex) const
Get a site from the container.
VectorSequenceContainer & operator=(const VectorSequenceContainer &vsc)
Assign from a VectorSequenceContainer.
void addSite(const Site &site, bool checkPosition=true)
Add a site in the container.
The SiteContainer interface.
Definition: SiteContainer.h:63
The OrderedSequenceContainer interface.
Aligned sequences container.
This alphabet is used to deal NumericAlphabet.
The site exception base class.
void deleteSite(size_t siteIndex)
Delete a site in the container.
The VectorSequenceContainer class.
void deleteSites(size_t siteIndex, size_t length)
Delete a continuous range of sites in the container.
size_t getNumberOfSites() const
Get the number of sites in the container.
STL namespace.
AlignedSequenceContainer & operator=(const AlignedSequenceContainer &asc)
void setSequence(const std::string &name, const Sequence &sequence, bool checkName=true)
Replace a sequence in the container.
virtual void addSequence(const Sequence &sequence, bool checkName=true)
Add a sequence at the end of the container.
void setSequence(const std::string &name, const Sequence &sequence, bool checkName=true)
Replace a sequence in the container.
void clear()
Delete all sequences in the container.
void setSite(size_t siteIndex, const Site &site, bool checkPosition=true)
Set a site in the container.
void addSequence(const Sequence &sequence, bool checkName=true)
Add a sequence at the end of the container.
Site * removeSite(size_t siteIndex)
Remove a site from the container.
AlignedSequenceContainer * createEmptyContainer() const
Return a copy of this container, but with no sequence inside.
AlignedSequenceContainer(const Alphabet *alpha)
Build a new empty container with the specified alphabet.
virtual size_t getNumberOfSites() const =0
Get the number of sites in the container.
The sequence interface.
Definition: Sequence.h:74
virtual Vint getSitePositions() const =0
Get all position attributes of sites.
void clear()
Delete all sequences in the container.
The Site class.
Definition: Site.h:61
Exception thrown when two alphabets do not match.
void reindexSites()
Set all positions attributes.
virtual void deleteElement(size_t pos)
Delete the element at position &#39;pos&#39;.
Definition: SymbolList.cpp:163
Exception thrown when a sequence is not align with others.
void setGeneralComments(const Comments &comments)
Set the comments of this container.