bpp-seq  2.2.0
Fasta.h
Go to the documentation of this file.
1 //
2 // File: Fasta.h
3 // Authors: Guillaume Deuchst
4 // Julien Dutheil
5 // Sylvain Gaillard
6 // Created: Tue Aug 21 2003
7 //
8 
9 /*
10 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
11 
12 This software is a computer program whose purpose is to provide classes
13 for sequences analysis.
14 
15 This software is governed by the CeCILL license under French law and
16 abiding by the rules of distribution of free software. You can use,
17 modify and/ or redistribute the software under the terms of the CeCILL
18 license as circulated by CEA, CNRS and INRIA at the following URL
19 "http://www.cecill.info".
20 
21 As a counterpart to the access to the source code and rights to copy,
22 modify and redistribute granted by the license, users are provided only
23 with a limited warranty and the software's author, the holder of the
24 economic rights, and the successive licensors have only limited
25 liability.
26 
27 In this respect, the user's attention is drawn to the risks associated
28 with loading, using, modifying and/or developing or reproducing the
29 software by the user in light of its specific status of free software,
30 that may mean that it is complicated to manipulate, and that also
31 therefore means that it is reserved for developers and experienced
32 professionals having in-depth computer knowledge. Users are therefore
33 encouraged to load and test the software's suitability as regards their
34 requirements in conditions enabling the security of their systems and/or
35 data to be ensured and, more generally, to use and operate it in the
36 same conditions as regards security.
37 
38 The fact that you are presently reading this means that you have had
39 knowledge of the CeCILL license and that you accept its terms.
40 */
41 
42 #ifndef _BPP_SEQ_IO_FASTA_H_
43 #define _BPP_SEQ_IO_FASTA_H_
44 
45 #include "AbstractISequence.h"
46 #include "AbstractIAlignment.h"
47 #include "AbstractOSequence.h"
48 #include "../Sequence.h"
49 #include "../Container/SequenceContainer.h"
50 #include "../Container/VectorSequenceContainer.h"
51 #include "ISequenceStream.h"
52 #include "OSequenceStream.h"
53 #include "SequenceFileIndex.h"
54 
55 namespace bpp
56 {
57 
63 class Fasta:
64  public AbstractISequence,
65  public AbstractIAlignment,
66  public AbstractOSequence,
67  public virtual ISequenceStream,
68  public virtual OSequenceStream
69 {
70  private:
71 
75  unsigned int charsByLine_; // Number of char by line (output only)
76  bool checkNames_; // If names must be checked in container
77  bool extended_; // If using HUPO-PSI extensions
78  bool strictNames_; // If name is between '>' and first space
79 
80  public:
81 
90  Fasta(unsigned int charsByLine = 100, bool checkSequenceNames = true, bool extended = false, bool strictSequenceNames = false): charsByLine_(charsByLine), checkNames_(checkSequenceNames), extended_(extended), strictNames_(strictSequenceNames) {}
91 
92  // Class destructor
93  virtual ~Fasta() {}
94 
95  public:
96 
102  void appendSequencesFromStream(std::istream& input, SequenceContainer& sc) const throw (Exception);
110  void appendAlignmentFromStream(std::istream& input, SiteContainer& sc) const throw (Exception) {
111  appendSequencesFromStream(input, sc); //This may raise an exception if sequences are not aligned!
112  }
120  void writeSequences(std::ostream& output, const SequenceContainer& sc) const throw (Exception);
121 
122  void writeSequences(const std::string& path, const SequenceContainer& sc, bool overwrite=true) const throw (Exception)
123  {
124  AbstractOSequence::writeSequences(path, sc, overwrite);
125  }
133  const std::string getFormatName() const { return "FASTA file"; };
134  const std::string getFormatDescription() const
135  {
136  return "Sequence name (preceded by >) in one line, sequence content, no comments";
137  }
145  bool nextSequence(std::istream& input, Sequence& seq) const throw (Exception);
153  void writeSequence(std::ostream& output, const Sequence& seq) const throw (Exception);
159  bool checkNames() const { return checkNames_; }
160 
166  void checkNames(bool yn) { checkNames_ = yn; }
167 
171  bool strictNames() const { return strictNames_; }
172 
178  void strictNames(bool yn) { strictNames_ = yn; }
179 
185  public:
188  void build(const std::string& path) throw (Exception);
189  std::streampos getSequencePosition(const std::string& id) const throw (Exception);
190  size_t getNumberOfSequences() const throw (Exception) {
191  return index_.size();
192  }
196  void read(const std::string& path) throw (Exception);
200  void write(const std::string& path) throw (Exception);
204  void getSequence(const std::string& seqid, Sequence& seq, const std::string& path) const;
205  private:
206  std::map<std::string, std::streampos> index_;
207  std::streampos fileSize_;
208  };
209 };
210 
211 } //end of namespace bpp.
212 
213 #endif // _BPP_SEQ_IO_FASTA_H_
214 
The OSequenceStream interface.
void build(const std::string &path)
Build the index given a path to the file.
Definition: Fasta.cpp:231
The fasta sequence file format.
Definition: Fasta.h:63
bool strictNames() const
Definition: Fasta.h:171
void getSequence(const std::string &seqid, Sequence &seq, const std::string &path) const
Get a sequence given its ID.
Definition: Fasta.cpp:282
The SiteContainer interface.
Definition: SiteContainer.h:63
This alphabet is used to deal NumericAlphabet.
std::streampos getSequencePosition(const std::string &id) const
Get the position of a Sequence given its ID.
Definition: Fasta.cpp:252
Fasta(unsigned int charsByLine=100, bool checkSequenceNames=true, bool extended=false, bool strictSequenceNames=false)
Build a new Fasta object.
Definition: Fasta.h:90
Partial implementation of the IAlignment interface, dedicated to alignment readers.
void writeSequences(const std::string &path, const SequenceContainer &sc, bool overwrite=true) const
Write a container to a file.
Definition: Fasta.h:122
const std::string getFormatName() const
Definition: Fasta.h:133
void writeSequences(std::ostream &output, const SequenceContainer &sc) const =0
Write a container to a stream.
void appendAlignmentFromStream(std::istream &input, SiteContainer &sc) const
Append sequences to a container from a stream.
Definition: Fasta.h:110
void read(const std::string &path)
Read the index from a file.
Definition: Fasta.cpp:260
std::streampos fileSize_
Definition: Fasta.h:207
void checkNames(bool yn)
Tell whether the sequence names should be checked when reading from files.
Definition: Fasta.h:166
void strictNames(bool yn)
Tell wethed the sequence name should be restrected to the first non blank characters.
Definition: Fasta.h:178
bool strictNames_
Definition: Fasta.h:78
bool extended_
Definition: Fasta.h:77
size_t getNumberOfSequences() const
Get the number of sequences.
Definition: Fasta.h:190
Index to retrieve Sequence in a file.
std::map< std::string, std::streampos > index_
Definition: Fasta.h:206
void write(const std::string &path)
Write the index to a file.
Definition: Fasta.cpp:274
bool checkNames() const
Definition: Fasta.h:159
void writeSequence(std::ostream &output, const Sequence &seq) const
Read sequence from stream.
Definition: Fasta.cpp:116
The sequence interface.
Definition: Sequence.h:74
virtual ~Fasta()
Definition: Fasta.h:93
void writeSequences(std::ostream &output, const SequenceContainer &sc) const
Write a container to a stream.
Definition: Fasta.cpp:204
The SequenceFileIndex class for Fasta format.
Definition: Fasta.h:184
Partial implementation of the OSequence and OAlignment interfaces.
Partial implementation of the ISequence interface.
void appendSequencesFromStream(std::istream &input, SequenceContainer &sc) const
Append sequences to a container from a stream.
Definition: Fasta.cpp:147
The SequenceContainer interface.
The ISequenceStream interface.
unsigned int charsByLine_
The maximum number of chars to be written on a line.
Definition: Fasta.h:75
bool checkNames_
Definition: Fasta.h:76
bool nextSequence(std::istream &input, Sequence &seq) const
Read sequence from stream.
Definition: Fasta.cpp:56
const std::string getFormatDescription() const
Definition: Fasta.h:134