bpp-seq-omics  2.2.0
SequenceFeature.h
Go to the documentation of this file.
1 //
2 // File: SequenceFeature.h
3 // Created by: Julien Dutheil
4 // Created on: Mon Nov 21 2011
5 //
6 
7 /*
8 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10 This software is a computer program whose purpose is to provide classes
11 for sequences analysis.
12 
13 This software is governed by the CeCILL license under French law and
14 abiding by the rules of distribution of free software. You can use,
15 modify and/ or redistribute the software under the terms of the CeCILL
16 license as circulated by CEA, CNRS and INRIA at the following URL
17 "http://www.cecill.info".
18 
19 As a counterpart to the access to the source code and rights to copy,
20 modify and redistribute granted by the license, users are provided only
21 with a limited warranty and the software's author, the holder of the
22 economic rights, and the successive licensors have only limited
23 liability.
24 
25 In this respect, the user's attention is drawn to the risks associated
26 with loading, using, modifying and/or developing or reproducing the
27 software by the user in light of its specific status of free software,
28 that may mean that it is complicated to manipulate, and that also
29 therefore means that it is reserved for developers and experienced
30 professionals having in-depth computer knowledge. Users are therefore
31 encouraged to load and test the software's suitability as regards their
32 requirements in conditions enabling the security of their systems and/or
33 data to be ensured and, more generally, to use and operate it in the
34 same conditions as regards security.
35 
36 The fact that you are presently reading this means that you have had
37 knowledge of the CeCILL license and that you accept its terms.
38 */
39 
40 #ifndef _SEQUENCEFEATURE_H_
41 #define _SEQUENCEFEATURE_H_
42 
43 //From the STL:
44 #include <string>
45 #include <map>
46 #include <set>
47 #include <algorithm>
48 
49 //From bpp-core:
50 #include <Bpp/Clonable.h>
51 #include <Bpp/Numeric/Range.h>
52 
67 namespace bpp
68 {
69 
75 class SeqRange:
76  public Range<size_t>
77 {
78  private:
79  char strand_;
80 
81  public:
87  SeqRange(size_t a, size_t b, char strand = '.'):
88  Range<size_t>(a, b), strand_(strand) {
89  if (strand != '+' && strand != '-' && strand != '?' && strand != '.')
90  strand_ = '.';
91  }
92 
93  SeqRange* clone() const { return new SeqRange(*this); }
94 
95  public:
96  virtual char getStrand() const { return strand_; }
97 
98  virtual bool isNegativeStrand() const { return strand_ == '-'; }
99  virtual bool isStranded() const { return strand_ == '+' || strand_ == '-'; }
100  virtual void invert() {
101  if (isStranded()) {
102  if (isNegativeStrand()) {
103  strand_ = '+';
104  } else {
105  strand_ = '-';
106  }
107  }
108  }
109 
110 };
111 
127  public virtual Clonable
128 {
129  public:
130  static const std::string NO_ATTRIBUTE_SET;
131 
132  public:
133  virtual SequenceFeature* clone() const = 0;
134 
135  public:
139  virtual const std::string& getId() const = 0;
143  virtual void setId(const std::string& id) = 0;
144 
148  virtual const std::string& getSequenceId() const = 0;
152  virtual void setSequenceId(const std::string& id) = 0;
153 
157  virtual const std::string& getSource() const = 0;
161  virtual void setSource(const std::string& source) = 0;
162 
166  virtual const std::string& getType() const = 0;
170  virtual void setType(const std::string& type) = 0;
171 
175  virtual const size_t getStart() const = 0;
176 
180  virtual const size_t getEnd() const = 0;
181 
185  virtual bool isStranded() const = 0;
186 
190  virtual bool isNegativeStrand() const = 0;
191 
195  virtual void invert() = 0;
196 
200  virtual SeqRange getRange() const = 0;
201 
205  virtual bool overlap(const SequenceFeature& feat) const = 0;
206 
210  virtual const double& getScore() const = 0;
214  virtual void setScore(double score) = 0;
215 
220  virtual const std::string& getAttribute(const std::string& attribute) const = 0;
221 
226  virtual std::string& getAttribute(const std::string& attribute) = 0;
227 
231  virtual std::set< std::string > getAttributeList() const = 0;
232 
239  virtual void setAttribute(const std::string& attribute, const std::string& value) = 0;
240 
244  virtual void removeAttribute(const std::string& attribute) = 0;
245 
246 };
247 
254  public SequenceFeature
255 {
256  protected:
257  std::string id_;
258  std::string sequenceId_;
259  std::string source_;
260  std::string type_;
262  double score_;
263  mutable std::map<std::string, std::string> attributes_;
264  //SequenceFeatureSet subFeatures_;
265 
266  public:
268  const std::string& id,
269  const std::string& seqId,
270  const std::string& source,
271  const std::string& type,
272  size_t start,
273  size_t end,
274  char strand,
275  double score = -1):
276  id_(id), sequenceId_(seqId), source_(source),
277  type_(type), range_(start, end, strand), score_(score),
278  attributes_()
279  //attributes_(), subFeatures_()
280  {}
281 
282  virtual BasicSequenceFeature* clone() const { return new BasicSequenceFeature(*this); }
283 
284  public:
285  const std::string& getId() const { return id_; }
286  void setId(const std::string& id) { id_ = id; }
287  const std::string& getSequenceId() const { return sequenceId_; }
288  void setSequenceId(const std::string& sid) { sequenceId_ = sid; }
289  const std::string& getSource() const { return source_; }
290  void setSource(const std::string& source) { source_ = source; }
291  const std::string& getType() const { return type_; }
292  void setType(const std::string& type) { type_ = type; }
293  const size_t getStart() const { return range_.begin(); }
294  const size_t getEnd() const { return range_.end(); }
295  bool isStranded() const { return range_.isStranded(); }
296  bool isNegativeStrand() const { return range_.isNegativeStrand(); }
297  void invert() {
298  range_.invert();
299  }
300  const double& getScore() const { return score_; }
301  void setScore(double score) { score_ = score; }
302 
303  const std::string& getAttribute(const std::string& attribute) const {
304  std::map<std::string, std::string>::iterator it = attributes_.find(attribute);
305  if (it != attributes_.end())
306  return it->second;
307  else
308  return NO_ATTRIBUTE_SET;
309  }
310 
311  std::string& getAttribute(const std::string& attribute) {
312  return attributes_[attribute];
313  }
314 
315  void setAttribute(const std::string& attribute, const std::string& value) {
316  attributes_[attribute] = value;
317  }
318 
319  std::set< std::string > getAttributeList() const {
320  std::set< std::string > d;
321  for (std::map<std::string, std::string>::iterator it = attributes_.begin() ; it != attributes_.end() ; it++) {
322  d.insert(it->first);
323  }
324  return d;
325  }
326 
327  void removeAttribute(const std::string& attribute) {
328  std::map<std::string, std::string>::iterator it = attributes_.find(attribute);
329  if (it != attributes_.end()) {
330  attributes_.erase(it);
331  }
332  }
333 
334  SeqRange getRange() const {
335  return SeqRange(range_);
336  }
337 
338  bool overlap(const SequenceFeature& feat) const {
339  if (feat.getSequenceId() == sequenceId_) {
340  return range_.overlap(feat.getRange());
341  }
342  return false;
343  }
344 
345  //const SequenceFeatureSet& getSubFeatures() const { return subFeatures; }
346  //SequenceFeatureSet& getSubFeatures() { return subFeatures; }
347 
348 };
349 
362 {
363  private:
364  std::vector<SequenceFeature*> features_;
365 
366  public:
368 
369  virtual ~SequenceFeatureSet() { clear(); }
370 
372  features_()
373  {
374  for (std::vector<SequenceFeature*>::const_iterator it = sfs.features_.begin();
375  it != sfs.features_.end();
376  ++it) {
377  features_.push_back((**it).clone());
378  }
379  }
381  {
382  clear();
383  for (std::vector<SequenceFeature*>::const_iterator it = sfs.features_.begin();
384  it != sfs.features_.end();
385  ++it) {
386  features_.push_back((**it).clone());
387  }
388  return *this;
389  }
390 
391  public:
395  void clear()
396  {
397  for (std::vector<SequenceFeature*>::iterator it = features_.begin();
398  it != features_.end();
399  ++it) {
400  delete *it;
401  }
402  features_.clear();
403  }
404 
409  const SequenceFeature& getFeature(size_t i) const {
410  return *features_[i];
411  }
412 
418  const SequenceFeature& operator[](size_t i) const {
419  return *features_[i];
420  }
421 
425  size_t getNumberOfFeatures() const { return features_.size(); }
426 
432  void addFeature(const SequenceFeature& feature) {
433  features_.push_back(feature.clone());
434  }
435 
439  std::set<std::string> getSequences() const {
440  std::set<std::string> seqIds;
441  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
442  it != features_.end();
443  ++it) {
444  seqIds.insert((**it).getSequenceId());
445  }
446  return seqIds;
447  }
448 
452  std::set<std::string> getTypes() const {
453  std::set<std::string> types;
454  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
455  it != features_.end();
456  ++it) {
457  types.insert((**it).getType());
458  }
459  return types;
460  }
461 
468  void fillRangeCollectionForSequence(const std::string& seqId, RangeCollection<size_t>& coords) const {
469  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
470  it != features_.end();
471  ++it) {
472  if ((**it).getSequenceId() == seqId) {
473  coords.addRange((**it).getRange());
474  }
475  }
476  }
477 
482  SequenceFeatureSet* getSubsetForType(const std::vector<std::string>& types) const {
483  SequenceFeatureSet* subset = new SequenceFeatureSet();
484  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
485  it != features_.end();
486  ++it) {
487  if (std::find(types.begin(), types.end(), (**it).getType()) != types.end()) {
488  subset->addFeature(**it);
489  }
490  }
491  return subset;
492  }
493 
494 };
495 
496 } //end of namespace bpp
497 
498 #endif //_SEQUENCEFEATURE_H_
499 
virtual void setSequenceId(const std::string &id)=0
virtual void setAttribute(const std::string &attribute, const std::string &value)=0
Set the value of an attribute.
virtual BasicSequenceFeature * clone() const
std::string & getAttribute(const std::string &attribute)
const std::string & getType() const
virtual const double & getScore() const =0
void addFeature(const SequenceFeature &feature)
Add a feature to the container. The feature will be copied and the copy owned by the container...
virtual void setScore(double score)=0
std::set< std::string > getSequences() const
virtual SeqRange getRange() const =0
virtual void setSource(const std::string &source)=0
virtual const size_t getStart() const =0
void fillRangeCollectionForSequence(const std::string &seqId, RangeCollection< size_t > &coords) const
Get all coordinates of features for a given source. All ranges are added to a RangeCollection contain...
bool overlap(const SequenceFeature &feat) const
std::map< std::string, std::string > attributes_
void setType(const std::string &type)
virtual std::set< std::string > getAttributeList() const =0
virtual const std::string & getType() const =0
std::set< std::string > getAttributeList() const
virtual void invert()=0
virtual const size_t getEnd() const =0
virtual bool isStranded() const
std::set< std::string > getTypes() const
void setSequenceId(const std::string &sid)
virtual void setType(const std::string &type)=0
virtual bool overlap(const SequenceFeature &feat) const =0
const std::string & getSource() const
SeqRange * clone() const
SeqRange(size_t a, size_t b, char strand='.')
size_t getNumberOfFeatures() const
const size_t getStart() const
SequenceFeatureSet * getSubsetForType(const std::vector< std::string > &types) const
static const std::string NO_ATTRIBUTE_SET
virtual const std::string & getSource() const =0
virtual const std::string & getSequenceId() const =0
virtual bool isNegativeStrand() const =0
void setAttribute(const std::string &attribute, const std::string &value)
Set the value of an attribute.
const double & getScore() const
const std::string & getId() const
const size_t getEnd() const
virtual void invert()
virtual bool isNegativeStrand() const
void setSource(const std::string &source)
virtual char getStrand() const
virtual void setId(const std::string &id)=0
The base interface for sequence features.
void setScore(double score)
virtual const std::string & getAttribute(const std::string &attribute) const =0
SequenceFeatureSet(const SequenceFeatureSet &sfs)
void setId(const std::string &id)
void clear()
Delete all features in this set.
BasicSequenceFeature(const std::string &id, const std::string &seqId, const std::string &source, const std::string &type, size_t start, size_t end, char strand, double score=-1)
SequenceFeatureSet & operator=(const SequenceFeatureSet &sfs)
void removeAttribute(const std::string &attribute)
const std::string & getSequenceId() const
virtual bool isStranded() const =0
const SequenceFeature & getFeature(size_t i) const
const SequenceFeature & operator[](size_t i) const
A very simple implementation of the SequenceFeature class.
virtual const std::string & getId() const =0
a coordinate range on a sequence. Stores coordinates as a Range<size_t> object, but also keep the str...
virtual SequenceFeature * clone() const =0
virtual void removeAttribute(const std::string &attribute)=0
A simple ensemble of sequence features.
std::vector< SequenceFeature * > features_
const std::string & getAttribute(const std::string &attribute) const