41 #include "../Container/SequenceContainerTools.h" 42 #include <Bpp/Text/TextTools.h> 43 #include <Bpp/Text/StringTokenizer.h> 44 #include <Bpp/Io/FileTools.h> 60 string::size_type index = s.find(namesSplit_);
61 if(index == string::npos)
throw Exception(
"No sequence name found.");
62 v[0] = TextTools::removeSurroundingWhiteSpaces(s.substr(0, index));
63 v[1] = TextTools::removeFirstWhiteSpaces (s.substr(index + namesSplit_.size()));
67 v[0] = TextTools::removeSurroundingWhiteSpaces(s.substr(0, 10));
80 getline(in, temp,
'\n');
81 temp = TextTools::removeSurroundingWhiteSpaces(FileTools::getNextLine(in));
92 v = splitNameAndSequence(temp);
101 if (!TextTools::isEmpty(name))
104 asc.addSequence(
BasicSequence(name, seq, asc.getAlphabet()), checkNames_);
112 if (TextTools::isEmpty(name))
113 throw Exception(
"First sequence in file has no name!");
114 seq += TextTools::removeWhiteSpaces(temp);
123 temp = TextTools::removeSurroundingWhiteSpaces(FileTools::getNextLine(in));
127 asc.addSequence(
BasicSequence(name, seq, asc.getAlphabet()), checkNames_);
137 getline(in, temp,
'\n');
138 StringTokenizer st(temp);
139 unsigned int nbSequences = TextTools::to<unsigned int>(st.nextToken());
141 temp = FileTools::getNextLine(in);
143 vector<string> names, seqs;
145 for (
unsigned int i = 0; i < nbSequences && !in.eof() && !TextTools::isEmpty(temp); i++)
147 vector<string> v = splitNameAndSequence(temp);
148 names.push_back(v[0]);
149 seqs.push_back(v[1]);
150 getline(in, temp,
'\n');
154 temp = FileTools::getNextLine(in);
157 for (
unsigned int i = 0; i < names.size(); i++)
159 if (TextTools::isEmpty(temp))
160 throw IOException(
"Phylip::readInterleaved. Bad file,there are not the same number of sequence in each block.");
161 seqs[i] += TextTools::removeWhiteSpaces(temp);
162 getline(in, temp,
'\n');
164 temp = FileTools::getNextLine(in);
166 for (
unsigned int i = 0; i < names.size(); i++)
168 asc.addSequence(
BasicSequence(names[i], seqs[i], asc.getAlphabet()), checkNames_);
177 if (!input) {
throw IOException (
"Phylip::read: fail to open file"); }
179 if(sequential_) readSequential (input, vsc);
180 else readInterleaved(input, vsc);
188 ifstream file (path.c_str(), ios::in);
189 if (! file) {
throw IOException (
"Phylip::getNumberOfSequences: failed to open file"); }
190 string firstLine = FileTools::getNextLine(file);
191 StringTokenizer st(firstLine,
" \t");
192 istringstream iss(st.nextToken());
203 vector<string> sizedNames(names.size());
209 for (
size_t i = 0; i < names.size(); i++)
210 if (names[i].size() > sizeMax) sizeMax = names[i].size();
212 for (
size_t i = 0; i < names.size(); i++)
213 sizedNames[i] = TextTools::resizeRight(names[i], sizeMax) + namesSplit_;
218 for(
unsigned int i = 0; i < names.size(); i++) sizedNames[i] = TextTools::resizeRight(names[i], 10);
219 cout <<
"Warning: names have been truncated to 10 characters. They may be ambiguous sequence names then." << endl;
233 vector<string> names = getSizedNames(seqNames);
234 for (
size_t i = 0; i < seqNames.size(); ++i)
236 vector<string> seq = TextTools::split(sc.
toString(seqNames[i]), charsByLine_);
237 out << names[i] << seq[0] << endl;
238 for (
size_t j = 1; j < seq.size(); ++j)
240 out << string(names[i].size(),
' ') << seq[j] << endl;
253 vector<string> names = getSizedNames(seqNames);
256 for (
size_t i = 0; i < seqNames.size(); ++i)
258 seqs[i] = TextTools::split(sc.
toString(seqNames[i]), charsByLine_);
261 for (
size_t i = 0; i < names.size(); ++i)
263 out << names[i] << seqs[i][0] << endl;
267 for (
size_t j = 1; j < seqs[0].size(); ++j)
271 out << seqs[i][j] << endl;
282 if (sc.getNumberOfSequences() == 0)
283 throw Exception(
"Phylip::write. SequenceContainer appear to contain no sequence.");
286 if (!output) {
throw IOException (
"Phylip::write : failed to open file"); }
288 if (sequential_) writeSequential (output, sc);
289 else writeInterleaved(output, sc);
294 const std::string
Phylip::getFormatName()
const {
return "Phylip file, " + string(extended_ ?
"extended," :
"") + string(sequential_ ?
"sequential" :
"interleaved"); }
300 return "Phylip file format, sequential and interleaved. PAML extension also supported.";
void writeInterleaved(std::ostream &out, const SequenceContainer &sc) const
void readSequential(std::istream &in, SiteContainer &asc) const
The SiteContainer interface.
const std::string getFormatDescription() const
This alphabet is used to deal NumericAlphabet.
std::vector< std::string > getSizedNames(const std::vector< std::string > &names) const
void appendAlignmentFromStream(std::istream &input, SiteContainer &sc) const
Append sequences to a container from a stream.
const std::string getFormatName() const
virtual std::string toString(const std::string &name) const =0
Convert a particular sequence to a string.
void readInterleaved(std::istream &in, SiteContainer &asc) const
virtual unsigned int getStateCodingSize() const =0
Get the size of the string coding a state.
virtual size_t getNumberOfSequences() const =0
Get the number of sequences in the container.
void writeAlignment(std::ostream &output, const SiteContainer &sc) const
Write a container to a stream.
A basic implementation of the Sequence interface.
virtual const Sequence & getSequence(const std::string &name) const =0
Retrieve a sequence object from the container.
virtual std::vector< std::string > getSequencesNames() const =0
Get all the names of the sequences in the container.
void writeSequential(std::ostream &out, const SequenceContainer &sc) const
const std::vector< std::string > splitNameAndSequence(const std::string &s) const
virtual const Alphabet * getAlphabet() const =0
Get sequence container's alphabet.
unsigned int getNumberOfSequences(const std::string &path) const
The SequenceContainer interface.