46 #include "../StringSequenceTools.h" 47 #include <Bpp/Text/TextTools.h> 48 #include <Bpp/Text/StringTokenizer.h> 49 #include <Bpp/Io/FileTools.h> 58 throw IOException(
"Fasta::nextSequence: can't read from istream input");
63 string linebuffer =
"";
67 c =
static_cast<char>(input.peek());
78 getline(input, linebuffer);
82 seqname = string(linebuffer.begin() + 1, linebuffer.end());
84 if (c !=
'>' && !TextTools::isWhiteSpaceCharacter(c)) {
86 content += TextTools::toUpper(TextTools::removeWhiteSpaces(linebuffer));
90 bool res = (!input.eof());
92 if (strictNames_ || extended_) {
93 size_t pos = seqname.find_first_of(
" \t\n");
95 if (pos != string::npos) {
96 seqcmt = seqname.substr(pos + 1);
97 seqname = seqname.substr(0, pos);
100 StringTokenizer st(seqcmt,
" \\",
true,
false);
101 while (st.hasMoreToken()) {
102 seqcmts.push_back(st.nextToken());
105 seqcmts.push_back(seqcmt);
107 seq.setComments(seqcmts);
109 seq.setName(seqname);
110 seq.setContent(content);
119 throw IOException(
"Fasta::writeSequence: can't write to ostream output");
121 output <<
">" << seq.getName();
125 for (
unsigned int i = 0 ; i < seq.getComments().size() ; i++)
127 output <<
" \\" << seq.getComments()[i];
133 for (
size_t i = 0 ; i < seq.size() ; ++i)
135 buffer += seq.getChar(i);
136 if (buffer.size() >= charsByLine_)
138 output << string(buffer.begin(), buffer.begin() + charsByLine_) << endl;
139 buffer.erase(0, charsByLine_);
142 output << string(buffer.begin(), buffer.end()) << endl;
150 throw IOException(
"Fasta::appendFromStream: can't read from istream input");
157 while (!input.eof() && hasSeq)
162 if (extended_ && c ==
'#')
170 if (extended_ && header)
174 line.erase(line.begin());
175 cmts.push_back(line);
188 if (c ==
'>' && last_c ==
'\n')
193 hasSeq = nextSequence(input, tmpseq);
194 vsc.addSequence(tmpseq, checkNames_);
197 if (extended_ && cmts.size()) {
198 vsc.setGeneralComments(cmts);
207 throw IOException(
"Fasta::write: can't write to ostream output");
212 for (
unsigned int i = 0 ; i < sc.getGeneralComments().size() ; i++)
214 output <<
"#\\" << sc.getGeneralComments()[i] << endl;
220 vector<string> names = sc.getSequencesNames();
221 for (
size_t i = 0; i < names.size(); ++i)
223 writeSequence(output, sc.getSequence(names[i]));
233 std::ifstream f_in(path.c_str());
235 f_in.seekg(0, std::ios::end);
236 fileSize_ = f_in.tellg();
238 f_in.seekg(0, std::ios::beg);
239 streampos pos = f_in.tellg();
241 std::string seq_id =
"";
242 while (f_in.get(ch)) {
244 pos =
static_cast<int>(f_in.tellg()) - 1;
245 std::getline(f_in, seq_id);
246 index_[seq_id] = pos;
253 std::map<std::string, streampos>::const_iterator it = index_.find(
id);
254 if (it != index_.end()) {
257 throw Exception(
"Sequence not found: " +
id);
261 std::ifstream f_in(path.c_str());
262 std::string line_buffer =
"";
263 while (!f_in.eof()) {
264 std::getline(f_in, line_buffer);
265 if (bpp::TextTools::isEmpty(bpp::TextTools::removeSurroundingWhiteSpaces(line_buffer))) {
268 bpp::StringTokenizer tk(line_buffer,
"\t");
269 index_[tk.getToken(0)] = bpp::TextTools::toInt(tk.getToken(1));
275 std::ofstream f_out(path.c_str());
276 for (std::map<std::string, streampos>::const_iterator it = index_.begin() ; it != index_.end() ; ++it) {
277 f_out << it->first <<
"\t" << bpp::TextTools::toString(it->second) << std::endl;
284 streampos seq_pos = this->getSequencePosition(seqid);
285 std::ifstream fasta(path.c_str());
286 fasta.seekg(seq_pos);
void build(const std::string &path)
Build the index given a path to the file.
std::vector< std::string > Comments
Declaration of Comments type.
The fasta sequence file format.
void getSequence(const std::string &seqid, Sequence &seq, const std::string &path) const
Get a sequence given its ID.
This alphabet is used to deal NumericAlphabet.
std::streampos getSequencePosition(const std::string &id) const
Get the position of a Sequence given its ID.
void read(const std::string &path)
Read the index from a file.
A basic implementation of the Sequence interface.
void write(const std::string &path)
Write the index to a file.
void writeSequence(std::ostream &output, const Sequence &seq) const
Read sequence from stream.
void writeSequences(std::ostream &output, const SequenceContainer &sc) const
Write a container to a stream.
void appendSequencesFromStream(std::istream &input, SequenceContainer &sc) const
Append sequences to a container from a stream.
The SequenceContainer interface.
bool nextSequence(std::istream &input, Sequence &seq) const
Read sequence from stream.