42 #include "../StringSequenceTools.h" 51 if (!input) {
throw IOException (
"Mase::read : fail to open file"); }
55 string temp, name, sequence =
"";
56 bool comments =
false;
59 fileComments = vsc.getGeneralComments();
64 getline(input, temp,
'\n');
74 if(temp !=
"") fileComments.push_back(temp);
79 if ((name !=
"") && (sequence !=
""))
82 vsc.addSequence(
BasicSequence(name, sequence, seqComments, vsc.getAlphabet()), checkNames_);
89 temp.erase(temp.begin());
90 if (temp !=
"") seqComments.push_back(temp);
103 else sequence += temp;
108 if ((name !=
"") && (sequence !=
""))
110 vsc.addSequence(
BasicSequence(name, sequence, seqComments, vsc.getAlphabet()), checkNames_);
114 vsc.setGeneralComments(fileComments);
122 if (!output) {
throw IOException (
"Mase::write : failed to open file"); }
124 Comments comments = sc.getGeneralComments();
127 if (comments.size() == 0) {
128 output <<
";;" << endl;
130 for (
unsigned int i = 0 ; i < comments.size() ; i++) {
131 output <<
";;" << comments[i] << endl;
134 string seq, temp =
"";
137 vector<string> names = sc.getSequencesNames();
138 for (
unsigned int i = 0 ; i < names.size() ; i ++)
140 comments = sc.getComments(names[i]);
144 if (comments.size() == 0)
146 output <<
";" << endl;
150 for (
unsigned int j = 0 ; j < comments.size() ; j++)
152 output <<
";" << comments[j] << endl;
157 output << names[i] << endl;
160 seq = sc.toString(names[i]);
163 if (seq.size() > charsByLine_)
166 temp.erase(temp.begin() + charsByLine_ , temp.end());
167 output << temp << endl;
168 seq.erase(seq.begin(), seq.begin() + charsByLine_);
172 output << seq << endl;
185 if (input.peek() ==
';') {
188 if (input.peek() ==
';') {
190 string line = FileTools::getNextLine(input);
195 string::size_type index = line.find(
"# of");
196 if (index < line.npos) {
197 StringTokenizer st(
string(line.begin() +
static_cast<ptrdiff_t
>(index + 4), line.end()),
" \t=;");
199 unsigned int numberOfSegments = TextTools::to<unsigned int>(st.nextToken());
200 string name = st.unparseRemainingTokens();
202 MultiRange<size_t> siteSelection;
203 while (siteSelection.size() < numberOfSegments) {
204 line = FileTools::getNextLine(input);
205 if (line[0] !=
';' || line[1] !=
';')
206 throw Exception(
"Mase::readHeader_(): corrupted file, site selection " + name +
" is incomplete. Aborting.");
207 line = line.substr(2);
208 StringTokenizer st2(line);
209 while (st2.hasMoreToken()) {
210 StringTokenizer st3(st2.nextToken(),
",");
211 unsigned int begin = TextTools::to<unsigned int>(st3.nextToken());
212 unsigned int end = TextTools::to<unsigned int>(st3.nextToken());
214 siteSelection.addRange(Range<size_t>(begin - 1, end));
216 if (siteSelection.size() > numberOfSegments)
217 throw Exception(
"Mase::readHeader_(): incorrected file, found " + TextTools::toString(siteSelection.size()) +
"segments while expected " + TextTools::toString(numberOfSegments));
219 header.setSiteSelection(name, siteSelection);
222 index = line.find(
"@ of");
223 if (index < line.npos) {
224 StringTokenizer st(line.substr(index + 4),
" \t=;");
226 unsigned int numberOfSequences = TextTools::to<unsigned int>(st.nextToken());
227 string name = st.unparseRemainingTokens();
229 vector<size_t> sequenceSelection;
230 while (sequenceSelection.size() < numberOfSequences) {
231 line = FileTools::getNextLine(input);
232 if (line[0] !=
';' || line[1] !=
';')
233 throw Exception(
"Mase::readHeader_(): corrupted file, sequence selection " + name +
" is incomplete. Aborting.");
234 line = line.substr(2);
235 StringTokenizer st2(line,
", ");
236 while (st2.hasMoreToken()) {
237 unsigned int pos = TextTools::to<unsigned int>(st2.nextToken());
239 sequenceSelection.push_back(pos);
241 if (sequenceSelection.size() > numberOfSequences)
242 throw Exception(
"Mase::readHeader_(): incorrected file, found " + TextTools::toString(sequenceSelection.size()) +
"sequences while expected " + TextTools::toString(numberOfSequences));
244 header.setSequenceSelection(name, sequenceSelection);
247 index = line.find(
"$");
248 if (index < line.npos) {
249 string name = TextTools::removeSurroundingWhiteSpaces(line.substr(index + 1));
253 line = FileTools::getNextLine(input);
254 if (line[0] !=
';' || line[1] !=
';')
255 throw Exception(
"Mase::readHeader_(): corrupted file, tree " + name +
" is incomplete. Aborting.");
256 line = TextTools::removeSurroundingWhiteSpaces(line.substr(2));
258 }
while (! TextTools::endsWith(line,
";"));
259 header.setTree(name, tree);
277 for (
size_t i = 0; i < treeNames.size(); ++i) {
278 output <<
";;$ " + treeNames[i] << endl;
279 output <<
";;" + header.
getTree(treeNames[i]);
285 for (
size_t i = 0; i < siteSelectionNames.size(); ++i) {
287 output <<
";;# of segments=" << ranges.size() <<
" " << siteSelectionNames[i] << endl;
289 for (
unsigned int j = 0; j < ranges.size(); ++j) {
290 output <<
" " << (ranges.getRange(j).begin() + 1) <<
"," << ranges.getRange(j).end();
291 if ((j + 1) % 10 == 0)
292 output << endl <<
";;";
299 for (
size_t i = 0; i < sequenceSelectionNames.size(); ++i) {
301 output <<
";;@ of species=" <<
set.size() <<
" " << sequenceSelectionNames[i] << endl;
303 for (
unsigned int j = 0; j <
set.size(); ++j) {
304 output <<
" " <<
set[j];
305 if ((j + 1) % 10 == 0)
306 output << endl <<
";;";
std::vector< std::string > Comments
Declaration of Comments type.
This alphabet is used to deal NumericAlphabet.
void writeSequences(std::ostream &output, const SequenceContainer &sc) const
Write a container to a stream.
void readHeader_(std::istream &input, MaseHeader &header) const
A basic implementation of the Sequence interface.
void writeHeader_(std::ostream &output, const MaseHeader &header) const
The SequenceContainer interface.
void appendSequencesFromStream(std::istream &input, SequenceContainer &sc) const
Append sequences to a container from a stream.