bpp-seq  2.2.0
Mase.h
Go to the documentation of this file.
1 //
2 // File: Mase.h
3 // Created by: Guillaume Deuchst
4 // Julien Dutheil
5 // Created on: ?
6 //
7 
8 /*
9 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
10 
11 This software is a computer program whose purpose is to provide classes
12 for sequences analysis.
13 
14 This software is governed by the CeCILL license under French law and
15 abiding by the rules of distribution of free software. You can use,
16 modify and/ or redistribute the software under the terms of the CeCILL
17 license as circulated by CEA, CNRS and INRIA at the following URL
18 "http://www.cecill.info".
19 
20 As a counterpart to the access to the source code and rights to copy,
21 modify and redistribute granted by the license, users are provided only
22 with a limited warranty and the software's author, the holder of the
23 economic rights, and the successive licensors have only limited
24 liability.
25 
26 In this respect, the user's attention is drawn to the risks associated
27 with loading, using, modifying and/or developing or reproducing the
28 software by the user in light of its specific status of free software,
29 that may mean that it is complicated to manipulate, and that also
30 therefore means that it is reserved for developers and experienced
31 professionals having in-depth computer knowledge. Users are therefore
32 encouraged to load and test the software's suitability as regards their
33 requirements in conditions enabling the security of their systems and/or
34 data to be ensured and, more generally, to use and operate it in the
35 same conditions as regards security.
36 
37 The fact that you are presently reading this means that you have had
38 knowledge of the CeCILL license and that you accept its terms.
39 */
40 
41 #ifndef _MASE_H_
42 #define _MASE_H_
43 
44 #include "AbstractISequence.h"
45 #include "AbstractIAlignment.h"
46 #include "AbstractOSequence.h"
47 #include "../Sequence.h"
48 #include "../Container/SequenceContainer.h"
49 #include "../Container/VectorSequenceContainer.h"
50 #include <Bpp/Numeric/Range.h>
51 #include <Bpp/Utils/MapTools.h>
52 
53 namespace bpp
54 {
55 
62 {
63  private:
64  mutable std::map<std::string, std::string> trees_;
65  mutable std::map<std::string, MultiRange<size_t> > siteSelections_;
66  mutable std::map<std::string, std::vector<size_t> > sequenceSelections_;
67 
68  public:
70  virtual ~MaseHeader() {}
71 
72  public:
73  size_t getNumberOfTrees() const { return trees_.size(); }
74  size_t getNumberOfSiteSelections() const { return siteSelections_.size(); }
75  size_t getNumberOfSequenceSelections() const { return sequenceSelections_.size(); }
76 
77  std::vector<std::string> getTreeNames() const { return MapTools::getKeys(trees_); }
78  std::vector<std::string> getSiteSelectionNames() const { return MapTools::getKeys(siteSelections_); }
79  std::vector<std::string> getSequenceSelectionNames() const { return MapTools::getKeys(sequenceSelections_); }
80 
81  const std::string& getTree(const std::string& name) const throw (Exception) {
82  if (trees_.find(name) != trees_.end()) {
83  return trees_[name];
84  } else {
85  throw Exception("MaseHeader::getTree. No tree with name " + name);
86  }
87  }
88  const MultiRange<size_t>& getSiteSelection(const std::string& name) const throw (Exception) {
89  if (siteSelections_.find(name) != siteSelections_.end()) {
90  return siteSelections_[name];
91  } else {
92  throw Exception("MaseHeader::getSiteSelection. No site selection with name " + name);
93  }
94  }
95  const std::vector<size_t>& getSequenceSelection(const std::string& name) const throw (Exception) {
96  if (sequenceSelections_.find(name) != sequenceSelections_.end()) {
97  return sequenceSelections_[name];
98  } else {
99  throw Exception("MaseHeader::getSequenceSelection. No sequence selection with name " + name);
100  }
101  }
102 
103  void setTree(const std::string& name, const std::string& tree) {
104  trees_[name] = tree;
105  }
106  void setSiteSelection(const std::string& name, const MultiRange<size_t>& ranges) {
107  siteSelections_[name] = ranges;
108  }
109  void setSequenceSelection(const std::string& name, const std::vector<size_t>& set) {
110  sequenceSelections_[name] = set;
111  }
112 
113 };
114 
125 class Mase:
126  public AbstractISequence,
127  public AbstractIAlignment,
128  public AbstractOSequence
129 {
130 
131  private:
132 
136  unsigned int charsByLine_;
138 
139  public :
146  Mase(unsigned int charsByLine = 100, bool checkSequenceNames = true): charsByLine_(charsByLine), checkNames_(checkSequenceNames) {}
147 
148  // Class destructor
149  virtual ~Mase() {}
150 
151  public:
152 
158  VectorSequenceContainer* readMeta(std::istream& input, const Alphabet* alpha, MaseHeader& header) const throw (Exception)
159  {
160  readHeader_(input, header);
161  return AbstractISequence::readSequences(input, alpha);
162  }
163  VectorSequenceContainer* readMeta(std::string& path, const Alphabet* alpha, MaseHeader& header) const throw (Exception)
164  {
165  std::ifstream input(path.c_str(), std::ios::in);
166  VectorSequenceContainer* sc = readMeta(input, alpha, header);
167  input.close();
168  return sc;
169  }
177  void appendSequencesFromStream(std::istream& input, SequenceContainer& sc) const throw (Exception);
185  void appendAlignmentFromStream(std::istream& input, SiteContainer& sc) const throw (Exception) {
186  appendSequencesFromStream(input, sc); //This might cast an exception if sequences are not aligned!
187  }
196  void writeSequences(std::ostream& output, const SequenceContainer& sc) const throw (Exception);
197  void writeSequences(const std::string& path, const SequenceContainer& sc, bool overwrite = true) const throw (Exception)
198  {
199  AbstractOSequence::writeSequences(path, sc, overwrite);
200  }
208  void writeMeta(std::ostream& output, const SequenceContainer& sc, const MaseHeader& header) const throw (Exception)
209  {
210  writeHeader_(output, header);
211  writeSequences(output, sc);
212  }
213  void writeMeta(const std::string& path, const SequenceContainer& sc, const MaseHeader& header, bool overwrite = true) const throw (Exception)
214  {
215  // Open file in specified mode
216  std::ofstream output(path.c_str(), overwrite ? (std::ios::out) : (std::ios::out | std::ios::app));
217  writeHeader_(output, header);
218  writeSequences(output, sc);
219  output.close();
220  }
228  const std::string getFormatName() const { return "MASE file"; }
229 
230  const std::string getFormatDescription() const
231  {
232  return "Optional file comments (preceeded by ;;), sequence comments (preceeded by ;), sequence name, sequence";
233  }
239  bool checkNames() const { return checkNames_; }
240 
246  void checkNames(bool yn) { checkNames_ = yn; }
247 
248  private:
249  void readHeader_(std::istream& input, MaseHeader& header) const throw (Exception);
250  void writeHeader_(std::ostream& output, const MaseHeader& header) const;
251 };
252 
253 } //end of namespace bpp.
254 
255 #endif // _MASE_H_
256 
virtual ~Mase()
Definition: Mase.h:149
VectorSequenceContainer * readMeta(std::string &path, const Alphabet *alpha, MaseHeader &header) const
Definition: Mase.h:163
bool checkNames_
Definition: Mase.h:137
std::vector< std::string > getSequenceSelectionNames() const
Definition: Mase.h:79
void writeMeta(std::ostream &output, const SequenceContainer &sc, const MaseHeader &header) const
Definition: Mase.h:208
The SiteContainer interface.
Definition: SiteContainer.h:63
This alphabet is used to deal NumericAlphabet.
Partial implementation of the IAlignment interface, dedicated to alignment readers.
The VectorSequenceContainer class.
The Alphabet interface.
Definition: Alphabet.h:130
std::vector< std::string > getTreeNames() const
Definition: Mase.h:77
bool checkNames() const
Definition: Mase.h:239
void writeSequences(std::ostream &output, const SequenceContainer &sc) const =0
Write a container to a stream.
std::map< std::string, std::vector< size_t > > sequenceSelections_
Definition: Mase.h:66
std::vector< std::string > getSiteSelectionNames() const
Definition: Mase.h:78
void setSequenceSelection(const std::string &name, const std::vector< size_t > &set)
Definition: Mase.h:109
void setTree(const std::string &name, const std::string &tree)
Definition: Mase.h:103
void writeSequences(std::ostream &output, const SequenceContainer &sc) const
Write a container to a stream.
Definition: Mase.cpp:119
The mase sequence file format.
Definition: Mase.h:125
virtual ~MaseHeader()
Definition: Mase.h:70
void setSiteSelection(const std::string &name, const MultiRange< size_t > &ranges)
Definition: Mase.h:106
void readHeader_(std::istream &input, MaseHeader &header) const
Definition: Mase.cpp:181
void appendAlignmentFromStream(std::istream &input, SiteContainer &sc) const
Append sequences to a container from a stream.
Definition: Mase.h:185
const std::string & getTree(const std::string &name) const
Definition: Mase.h:81
const MultiRange< size_t > & getSiteSelection(const std::string &name) const
Definition: Mase.h:88
size_t getNumberOfSiteSelections() const
Definition: Mase.h:74
unsigned int charsByLine_
The maximum number of chars to be written on a line.
Definition: Mase.h:136
A class to store information from the header of Mase files.
Definition: Mase.h:61
const std::string getFormatName() const
Definition: Mase.h:228
size_t getNumberOfSequenceSelections() const
Definition: Mase.h:75
std::map< std::string, MultiRange< size_t > > siteSelections_
Definition: Mase.h:65
std::map< std::string, std::string > trees_
Definition: Mase.h:64
Mase(unsigned int charsByLine=100, bool checkSequenceNames=true)
Build a new Mase object.
Definition: Mase.h:146
const std::string getFormatDescription() const
Definition: Mase.h:230
size_t getNumberOfTrees() const
Definition: Mase.h:73
void writeHeader_(std::ostream &output, const MaseHeader &header) const
Definition: Mase.cpp:273
void writeSequences(const std::string &path, const SequenceContainer &sc, bool overwrite=true) const
Write a container to a file.
Definition: Mase.h:197
Partial implementation of the OSequence and OAlignment interfaces.
MaseHeader()
Definition: Mase.h:69
void writeMeta(const std::string &path, const SequenceContainer &sc, const MaseHeader &header, bool overwrite=true) const
Definition: Mase.h:213
virtual void readSequences(std::istream &input, SequenceContainer &sc) const
Add sequences to a container from a stream.
Partial implementation of the ISequence interface.
VectorSequenceContainer * readMeta(std::istream &input, const Alphabet *alpha, MaseHeader &header) const
Definition: Mase.h:158
const std::vector< size_t > & getSequenceSelection(const std::string &name) const
Definition: Mase.h:95
The SequenceContainer interface.
void appendSequencesFromStream(std::istream &input, SequenceContainer &sc) const
Append sequences to a container from a stream.
Definition: Mase.cpp:49
void checkNames(bool yn)
Tell whether the sequence names should be checked when reading from files.
Definition: Mase.h:246