bpp-seq-omics  2.2.0
MafSequence.h
Go to the documentation of this file.
1 //
2 // File: MafSequence.h
3 // Authors: Julien Dutheil
4 // Created: Tue Sep 07 2010
5 //
6 
7 /*
8 Copyright or © or Copr. Bio++ Development Team, (2010)
9 
10 This software is a computer program whose purpose is to provide classes
11 for sequences analysis.
12 
13 This software is governed by the CeCILL license under French law and
14 abiding by the rules of distribution of free software. You can use,
15 modify and/ or redistribute the software under the terms of the CeCILL
16 license as circulated by CEA, CNRS and INRIA at the following URL
17 "http://www.cecill.info".
18 
19 As a counterpart to the access to the source code and rights to copy,
20 modify and redistribute granted by the license, users are provided only
21 with a limited warranty and the software's author, the holder of the
22 economic rights, and the successive licensors have only limited
23 liability.
24 
25 In this respect, the user's attention is drawn to the risks associated
26 with loading, using, modifying and/or developing or reproducing the
27 software by the user in light of its specific status of free software,
28 that may mean that it is complicated to manipulate, and that also
29 therefore means that it is reserved for developers and experienced
30 professionals having in-depth computer knowledge. Users are therefore
31 encouraged to load and test the software's suitability as regards their
32 requirements in conditions enabling the security of their systems and/or
33 data to be ensured and, more generally, to use and operate it in the
34 same conditions as regards security.
35 
36 The fact that you are presently reading this means that you have had
37 knowledge of the CeCILL license and that you accept its terms.
38 */
39 
40 #ifndef _MAFSEQUENCE_H_
41 #define _MAFSEQUENCE_H_
42 
43 #include "../../Feature/SequenceFeature.h"
44 
45 #include <Bpp/Seq/SequenceWithAnnotation.h>
46 #include <Bpp/Seq/Alphabet/AlphabetTools.h>
47 #include <Bpp/Seq/SequenceTools.h>
48 
49 namespace bpp {
50 
63  public SequenceWithAnnotation
64 {
65  private:
67  size_t begin_;
68  std::string species_;
69  std::string chromosome_;
70  char strand_;
71  size_t size_;
72  size_t srcSize_;
73 
74  public:
76  SequenceWithAnnotation(&AlphabetTools::DNA_ALPHABET), hasCoordinates_(false), begin_(0), species_(""), chromosome_(""), strand_(0), size_(0), srcSize_(0)
77  {
78  size_ = 0;
79  }
80 
81  MafSequence(const std::string& name, const std::string& sequence, bool parseName = true):
82  SequenceWithAnnotation(name, sequence, &AlphabetTools::DNA_ALPHABET), hasCoordinates_(false), begin_(0), species_(""), chromosome_(""), strand_(0), size_(0), srcSize_(0)
83  {
84  size_ = SequenceTools::getNumberOfSites(*this);
85  if (parseName)
87  }
88 
89  MafSequence(const std::string& name, const std::string& sequence, size_t begin, char strand, size_t srcSize, bool parseName = true) :
90  SequenceWithAnnotation(name, sequence, &AlphabetTools::DNA_ALPHABET), hasCoordinates_(true), begin_(begin), species_(""), chromosome_(""), strand_(strand), size_(0), srcSize_(srcSize)
91  {
92  size_ = SequenceTools::getNumberOfSites(*this);
93  if (parseName)
95  }
96 
97  MafSequence* clone() const { return new MafSequence(*this); }
98 
100 
101  public:
102  bool hasCoordinates() const { return hasCoordinates_; }
103 
104  void removeCoordinates() { hasCoordinates_ = false; begin_ = 0; }
105 
106  size_t start() const throw (Exception) {
107  if (hasCoordinates_) return begin_;
108  else throw Exception("MafSequence::start(). Sequence " + getName() + " does not have coordinates.");
109  }
110 
111  size_t stop() const {
112  if (hasCoordinates_) return begin_ + size_;
113  else throw Exception("MafSequence::stop(). Sequence " + getName() + " does not have coordinates.");
114  }
115 
121  Range<size_t> getRange(bool origin=true) const {
122  if (hasCoordinates_) {
123  if (origin && strand_ == '-') {
124  return Range<size_t>(srcSize_ - stop(), srcSize_ - start());
125  } else {
126  return Range<size_t>(start(), stop());
127  }
128  }
129  else throw Exception("MafSequence::getRange(). Sequence " + getName() + " does not have coordinates.");
130  }
131 
132  void setName(const std::string& name) {
133  try {
135  } catch (Exception& e) {
136  species_ = "";
137  chromosome_ = "";
138  }
139  SequenceWithAnnotation::setName(name);
140  }
141 
142  static void splitNameIntoSpeciesAndChromosome(const std::string& name, std::string& species, std::string& chr) {
143  size_t pos = name.find(".");
144  if (pos != std::string::npos) {
145  chr = name.substr(pos + 1);
146  species = name.substr(0, pos);
147  } else {
148  throw Exception("MafSequence::splitNameIntospeciesAndChromosome(). Invalid sequence name: " + name);
149  }
150  }
151 
152  const std::string& getSpecies() const { return species_; }
153 
154  const std::string& getChromosome() const { return chromosome_; }
155 
156  char getStrand() const { return strand_; }
157 
158  size_t getGenomicSize() const { return size_; }
159 
160  size_t getSrcSize() const { return srcSize_; }
161 
162  void setStart(size_t begin) { begin_ = begin; hasCoordinates_ = true; }
163 
164  void setChromosome(const std::string& chr) {
165  chromosome_ = chr;
166  SequenceWithAnnotation::setName(species_ + "." + chromosome_);
167  }
168 
169  void setSpecies(const std::string& species) {
170  species_ = species;
171  SequenceWithAnnotation::setName(species_ + "." + chromosome_);
172  }
173 
174  void setStrand(char s) { strand_ = s; }
175 
176  void setSrcSize(size_t srcSize) { srcSize_ = srcSize; }
177 
178  std::string getDescription() const { return getName() + strand_ + ":" + (hasCoordinates_ ? TextTools::toString(start()) + "-" + TextTools::toString(stop()) : "?-?"); }
179 
187  MafSequence* subSequence(size_t startAt, size_t length) const;
188 
189  private:
190  void beforeSequenceChanged(const SymbolListEditionEvent& event) {}
191  void afterSequenceChanged(const SymbolListEditionEvent& event) { size_ = SequenceTools::getNumberOfSites(*this); }
192  void beforeSequenceInserted(const SymbolListInsertionEvent& event) {}
193  void afterSequenceInserted(const SymbolListInsertionEvent& event) { size_ = SequenceTools::getNumberOfSites(*this); }
194  void beforeSequenceDeleted(const SymbolListDeletionEvent& event) {}
195  void afterSequenceDeleted(const SymbolListDeletionEvent& event) { size_ = SequenceTools::getNumberOfSites(*this); }
196  void beforeSequenceSubstituted(const SymbolListSubstitutionEvent& event) {}
197  void afterSequenceSubstituted(const SymbolListSubstitutionEvent& event) {}
198 };
199 
200 } // end of namespace bpp.
201 
202 #endif //_MAFSEQUENCE_H_
void afterSequenceChanged(const SymbolListEditionEvent &event)
Definition: MafSequence.h:191
void setName(const std::string &name)
Definition: MafSequence.h:132
void setStrand(char s)
Definition: MafSequence.h:174
MafSequence(const std::string &name, const std::string &sequence, size_t begin, char strand, size_t srcSize, bool parseName=true)
Definition: MafSequence.h:89
size_t stop() const
Definition: MafSequence.h:111
void beforeSequenceChanged(const SymbolListEditionEvent &event)
Definition: MafSequence.h:190
void afterSequenceDeleted(const SymbolListDeletionEvent &event)
Definition: MafSequence.h:195
bool hasCoordinates() const
Definition: MafSequence.h:102
const std::string & getSpecies() const
Definition: MafSequence.h:152
MafSequence(const std::string &name, const std::string &sequence, bool parseName=true)
Definition: MafSequence.h:81
std::string species_
Definition: MafSequence.h:68
void setChromosome(const std::string &chr)
Definition: MafSequence.h:164
void setStart(size_t begin)
Definition: MafSequence.h:162
static void splitNameIntoSpeciesAndChromosome(const std::string &name, std::string &species, std::string &chr)
Definition: MafSequence.h:142
char getStrand() const
Definition: MafSequence.h:156
size_t getGenomicSize() const
Definition: MafSequence.h:158
MafSequence * subSequence(size_t startAt, size_t length) const
Extract a sub-sequence.
Definition: MafSequence.cpp:48
size_t getSrcSize() const
Definition: MafSequence.h:160
void afterSequenceInserted(const SymbolListInsertionEvent &event)
Definition: MafSequence.h:193
void setSpecies(const std::string &species)
Definition: MafSequence.h:169
std::string chromosome_
Definition: MafSequence.h:69
const std::string & getChromosome() const
Definition: MafSequence.h:154
MafSequence * clone() const
Definition: MafSequence.h:97
void beforeSequenceDeleted(const SymbolListDeletionEvent &event)
Definition: MafSequence.h:194
void setSrcSize(size_t srcSize)
Definition: MafSequence.h:176
void beforeSequenceInserted(const SymbolListInsertionEvent &event)
Definition: MafSequence.h:192
Range< size_t > getRange(bool origin=true) const
Definition: MafSequence.h:121
void removeCoordinates()
Definition: MafSequence.h:104
void afterSequenceSubstituted(const SymbolListSubstitutionEvent &event)
Definition: MafSequence.h:197
void beforeSequenceSubstituted(const SymbolListSubstitutionEvent &event)
Definition: MafSequence.h:196
size_t start() const
Definition: MafSequence.h:106
A sequence class which is used to store data from MAF files.
Definition: MafSequence.h:62
std::string getDescription() const
Definition: MafSequence.h:178