bpp-seq  2.2.0
PhredPhd.cpp
Go to the documentation of this file.
1 //
2 // File: PhredPhd.cpp
3 // Created by: Sylvain Gaillard
4 // Created on: Wed Nov 5 2008
5 //
6 
7 /*
8 Copyright or © or Copr. CNRS, (November 5, 2008)
9 
10 This software is a computer program whose purpose is to provide classes
11 for sequences analysis.
12 
13 This software is governed by the CeCILL license under French law and
14 abiding by the rules of distribution of free software. You can use,
15 modify and/ or redistribute the software under the terms of the CeCILL
16 license as circulated by CEA, CNRS and INRIA at the following URL
17 "http://www.cecill.info".
18 
19 As a counterpart to the access to the source code and rights to copy,
20 modify and redistribute granted by the license, users are provided only
21 with a limited warranty and the software's author, the holder of the
22 economic rights, and the successive licensors have only limited
23 liability.
24 
25 In this respect, the user's attention is drawn to the risks associated
26 with loading, using, modifying and/or developing or reproducing the
27 software by the user in light of its specific status of free software,
28 that may mean that it is complicated to manipulate, and that also
29 therefore means that it is reserved for developers and experienced
30 professionals having in-depth computer knowledge. Users are therefore
31 encouraged to load and test the software's suitability as regards their
32 requirements in conditions enabling the security of their systems and/or
33 data to be ensured and, more generally, to use and operate it in the
34 same conditions as regards security.
35 
36 The fact that you are presently reading this means that you have had
37 knowledge of the CeCILL license and that you accept its terms.
38 */
39 
40 #include "PhredPhd.h"
41 #include <Bpp/Text/TextTools.h>
42 #include <Bpp/Text/StringTokenizer.h>
43 
44 using namespace bpp;
45 
46 /******************************************************************************/
47 
48 //PhredPhd::PhredPhd() {}
49 
50 /******************************************************************************/
51 
52 bool PhredPhd::nextSequence(std::istream& input, Sequence& seq) const throw (Exception) {
53  std::vector<int> pos;
54  return nextSequence(input, seq, pos);
55 }
56 
57 /******************************************************************************/
58 
59 bool PhredPhd::nextSequence(std::istream& input, Sequence& seq, std::vector<int>& pos) const throw (Exception) {
60  if (!input) {
61  throw IOException ("PhredPhd::read: fail to open stream");
62  }
63 
64  bool flag = false;
65  std::string name, sequence = ""; // Initialization
66  std::vector<int> q, p;
67 
68  flag = parseFile_(input, name, sequence, q, p);
69  // Sequence creation
70  if(name == "")
71  throw Exception("PhredPhd::read: sequence without name!");
72  seq.setName(name);
73  seq.setContent(sequence);
74  try {
75  SequenceWithQuality& sq = dynamic_cast<SequenceWithQuality&>(seq);
76  sq.setQualities(q);
77  } catch (...) {
78  }
79  return flag;
80 }
81 
82 /******************************************************************************/
83 
84 bool PhredPhd::parseFile_(std::istream& input, std::string& name, std::string& sequence, std::vector<int>& qual, std::vector<int>& pos) const {
85  bool readSeqFlag = false;
86  std::string temp;
87  // Read sequence info
88  // Main loop : for all lines
89  while (!input.eof()) {
90  std::getline(input, temp, '\n'); // Copy current line in temporary string
91  StringTokenizer st(temp, " ");
92  if (st.hasMoreToken()) {
93  if (st.getToken(0) == "BEGIN_SEQUENCE") {
94  name = st.getToken(1);
95  }
96  std::string flag = st.getToken(0);
97  while (flag != "END_SEQUENCE" && !input.eof()) {
98  getline(input, temp, '\n');
99  StringTokenizer st2(temp, " ");
100  if (st2.hasMoreToken()) {
101  flag = st2.getToken(0);
102  }
103  if (flag == "BEGIN_DNA") {
104  readSeqFlag = parseDNA_(input, sequence, qual, pos);
105  break; // End the whole loop after parsing DNA
106  }
107  }
108  }
109  }
110  return readSeqFlag;
111 }
112 
113 /******************************************************************************/
114 
115 bool PhredPhd::parseDNA_(std::istream& input, std::string& sequence, std::vector<int>& qual, std::vector<int>& pos) const {
116  bool readSeqFlag = false;
117  std::string line_buffer;
118  std::string flag;
119  sequence.clear();
120  qual.clear();
121  pos.clear();
122  while (flag != "END_DNA" && !input.eof()) {
123  std::getline(input, line_buffer, '\n');
124  StringTokenizer st(line_buffer, " ");
125  if (st.hasMoreToken()) {
126  flag = TextTools::toUpper(st.getToken(0));
127  if (st.numberOfRemainingTokens() == 3) {
128  sequence += flag;
129  qual.push_back(TextTools::toInt(st.getToken(1)));
130  pos.push_back(TextTools::toInt(st.getToken(2)));
131  readSeqFlag = true;
132  }
133  }
134  }
135  return readSeqFlag;
136 }
137 
138 /******************************************************************************/
This alphabet is used to deal NumericAlphabet.
A SequenceWithAnnotation class with quality scores attached.
bool nextSequence(std::istream &input, Sequence &seq) const
Read sequence from stream.
Definition: PhredPhd.cpp:52
bool parseFile_(std::istream &input, std::string &name, std::string &sequence, std::vector< int > &qual, std::vector< int > &pos) const
Global file parser.
Definition: PhredPhd.cpp:84
void setQualities(const std::vector< int > &quality)
Set the whole quality scores.
The sequence interface.
Definition: Sequence.h:74
bool parseDNA_(std::istream &input, std::string &sequence, std::vector< int > &qual, std::vector< int > &pos) const
Parse the DNA part of the file.
Definition: PhredPhd.cpp:115