41 #include "../Container/VectorSequenceContainer.h" 42 #include "../Container/AlignedSequenceContainer.h" 43 #include "../Container/SequenceContainerTools.h" 44 #include <Bpp/Text/StringTokenizer.h> 45 #include <Bpp/Text/TextTools.h> 55 for (
size_t i = 0; i < maseFileHeader.size(); i++)
57 string current = maseFileHeader[i];
58 string::size_type index = current.find(
"# of");
59 if (index < current.npos)
61 StringTokenizer st(
string(current.begin() +
static_cast<ptrdiff_t
>(index + 4), current.end()),
" \t=;");
63 size_t numberOfSegments = TextTools::to<size_t>(st.nextToken());
64 string name = st.unparseRemainingTokens();
71 while (i < maseFileHeader.size())
73 current = maseFileHeader[i++];
74 StringTokenizer st2(current);
76 while (st2.hasMoreToken())
78 StringTokenizer st3(st2.nextToken(),
",");
79 size_t begin = TextTools::to<size_t>(st3.nextToken());
80 size_t end = TextTools::to<size_t>(st3.nextToken());
83 for (
size_t j = begin; j <= end; j++)
85 selection.push_back(j - 1);
88 if (counter == numberOfSegments)
95 if (selection.size() == 0)
97 throw IOException(
"Site set " + setName +
" has not been found in the sequence file.");
107 for (
size_t i = 0; i < maseFileHeader.size(); i++)
109 string current = maseFileHeader[i];
111 string::size_type index = current.find(
"@ of");
112 if (index < current.npos)
114 StringTokenizer st(
string(current.begin() +
static_cast<ptrdiff_t
>(index + 4), current.end()),
" \t=;");
116 size_t numberOfSequences = TextTools::to<size_t>(st.nextToken());
117 string name = st.unparseRemainingTokens();
124 while (i < maseFileHeader.size())
126 current = maseFileHeader[i++];
127 StringTokenizer st2(current,
",");
128 while (st2.hasMoreToken())
130 int seqIndex = TextTools::toInt(st2.nextToken());
133 selection.push_back(static_cast<size_t>(seqIndex - 1));
135 if (counter == numberOfSequences)
142 if (selection.size() == 0)
144 throw IOException(
"Sequence set " + setName +
" has not been found in the sequence file.");
153 const string& setName)
throw (IOException)
155 SiteSelection ss = getSiteSet(sequences.getGeneralComments(), setName);
174 return SiteContainerTools::getSelectedPositions(sequences, ss);
186 const std::string& setName)
throw (IOException)
190 SequenceContainerTools::getSelectedSequences(sequences, ss, *cont);
196 map<string, size_t> MaseTools::getAvailableSiteSelections(
const Comments& maseHeader)
198 map<string, size_t> selections;
199 for (
size_t i = 0; i < maseHeader.size(); i++)
201 string current = maseHeader[i];
203 string::size_type index = current.find(
"# of");
204 if (index < current.npos)
206 StringTokenizer st(
string(current.begin() +
static_cast<ptrdiff_t
>(index + 4), current.end()),
" \t\n\f\r=;");
208 size_t numberOfSegments = TextTools::to<size_t>(st.nextToken());
209 string name = st.nextToken();
210 while (st.hasMoreToken())
212 name +=
" " + st.nextToken();
216 while (i < maseHeader.size())
219 current = maseHeader[i];
220 StringTokenizer st2(current);
222 while (st2.hasMoreToken())
224 StringTokenizer st3(st2.nextToken(),
",");
225 size_t begin = TextTools::to<size_t>(st3.nextToken());
226 size_t end = TextTools::to<size_t>(st3.nextToken());
228 nbSites += end - begin + 1;
230 if (counter == numberOfSegments)
232 selections[name] = nbSites;
243 map<string, size_t> MaseTools::getAvailableSequenceSelections(
const Comments& maseHeader)
245 map<string, size_t> selections;
246 for (
size_t i = 0; i < maseHeader.size(); i++)
248 string current = maseHeader[i];
250 string::size_type index = current.find(
"@ of");
251 if (index < current.npos)
253 StringTokenizer st(
string(current.begin() +
static_cast<ptrdiff_t
>(index + 4), current.end()),
" \t\n\f\r=;");
255 size_t numberOfSequences = TextTools::fromString<size_t>(st.nextToken());
256 string name = st.nextToken();
257 while (st.hasMoreToken())
259 name += st.nextToken();
261 selections[name] = numberOfSequences;
269 size_t MaseTools::getPhase(
const Comments& maseFileHeader,
const string& setName)
throw (Exception)
272 string::size_type index = 0;
273 for (
size_t i = 0; i < maseFileHeader.size(); i++)
275 string current = maseFileHeader[i];
277 index = current.find(
"# of");
278 if (index < current.npos)
280 StringTokenizer st(
string(current.begin() +
static_cast<ptrdiff_t
>(index + 12), current.end()),
" \t\n\f\r=;");
284 while (st.hasMoreToken())
286 name = st.nextToken();
295 index = current.find(
"/codon_start");
296 if (index < current.npos)
298 StringTokenizer st(
string(current.begin() +
static_cast<ptrdiff_t
>(index + 12), current.end()),
" \t\n\f\r=;");
299 phase = TextTools::to<size_t>(st.nextToken());
302 throw Exception(
"PolymorphismSequenceContainer::getPhase: no /codon_start found, or site selection missing.");
std::vector< size_t > SiteSelection
std::vector< std::string > Comments
Declaration of Comments type.
The SiteContainer interface.
The OrderedSequenceContainer interface.
This alphabet is used to deal NumericAlphabet.
The VectorSequenceContainer class.
std::vector< size_t > SequenceSelection
The SequenceContainer interface.