54 const OrderedSequenceContainer* seqc = 0;
57 seqc =
dynamic_cast<OrderedSequenceContainer*
>(ms.readSequences(path, alpha ));
66 Comments maseFileHeader = seqc->getGeneralComments();
68 map<string, size_t> groupMap = MaseTools::getAvailableSequenceSelections(maseFileHeader);
69 for (map<string, size_t>::iterator mi = groupMap.begin(); mi != groupMap.end(); mi++)
72 if (key.compare(0, 8,
"OUTGROUP") == 0)
77 ss = MaseTools::getSequenceSet(maseFileHeader, key);
79 catch (IOException& ioe)
84 for (
size_t i = 0; i != ss.size(); i++)
90 catch (SequenceNotFoundException& snfe)
105 SequenceSelection ss;
107 for (
size_t i = 0; i < psc.getNumberOfSequences(); i++)
109 if (!psc.isIngroupMember(i))
112 if (ss.size() == psc.getNumberOfSequences())
115 throw Exception(
"PolymorphismSequenceContainerTools::extractIngroup: no Ingroup sequences found.");
117 for (
size_t i = ss.size(); i > 0; --i)
128 SequenceSelection ss;
130 for (
size_t i = 0; i < psc.getNumberOfSequences(); i++)
132 if (psc.isIngroupMember(i) )
135 if (ss.size() == psc.getNumberOfSequences())
138 throw Exception(
"PolymorphismSequenceContainerTools::extractOutgroup: no Outgroup sequences found.");
140 for (
size_t i = ss.size(); i > 0; i--)
151 SequenceSelection ss;
153 for (
size_t i = 0; i < psc.getNumberOfSequences(); i++)
155 if (psc.getGroupId(i) != group_id)
158 if (ss.size() == psc.getNumberOfSequences())
161 throw GroupNotFoundException(
"PolymorphismSequenceContainerTools::extractGroup: group_id not found.", group_id);
163 for (
size_t i = ss.size(); i > 0; i--)
175 for (
size_t i = 0; i < ss.size(); i++)
186 newpsc->setGeneralComments(psc.getGeneralComments());
194 size_t nbSeq = psc.getNumberOfSequences();
196 for (
size_t i = 0; i < nbSeq; ++i)
200 vector<size_t> vv(n);
201 RandomTools::getSample(v, vv, replace);
210 vector<string> seqNames = psc.getSequencesNames();
212 noGapCont->setSequencesNames(seqNames,
false);
213 size_t nbSeq = psc.getNumberOfSequences();
214 for (
size_t i = 0; i < nbSeq; i++)
225 NoGapSiteContainerIterator ngsi(psc);
226 while (ngsi.hasMoreSites())
227 noGapCont->addSite(*ngsi.nextSite());
235 size_t count = psc.getNumberOfSites();
237 SimpleSiteContainerIterator* ssi;
242 npsc = extractIngroup(psc);
250 ssi =
new SimpleSiteContainerIterator(*npsc);
253 ssi =
new SimpleSiteContainerIterator(psc);
254 while (ssi->hasMoreSites())
255 if (SiteTools::hasGap(*ssi->nextSite()))
265 size_t count = psc.getNumberOfSites();
267 SimpleSiteContainerIterator* ssi;
272 npsc = extractIngroup(psc);
280 ssi =
new SimpleSiteContainerIterator(*npsc);
283 ssi =
new SimpleSiteContainerIterator(psc);
284 while (ssi->hasMoreSites())
285 if (!SiteTools::isComplete(*ssi->nextSite()))
295 vector<string> seqNames = psc.getSequencesNames();
297 complete->setSequencesNames(seqNames,
false);
298 size_t nbSeq = psc.getNumberOfSequences();
299 for (
size_t i = 0; i < nbSeq; i++)
310 CompleteSiteContainerIterator csi(psc);
311 while (csi.hasMoreSites())
312 complete->addSite(*csi.nextSite());
321 while (SiteTools::hasGap(psci->getSite(0)))
324 size_t n = psci->getNumberOfSites();
325 while (SiteTools::hasGap(psci->getSite(n - i - 1)))
327 psci->deleteSite(n - i - 1);
337 SiteContainer* pscc = MaseTools::getSelectedSites(psc, setName);
338 Comments maseFileHeader = psc.getGeneralComments();
341 for (
size_t i = 1; i < MaseTools::getPhase(maseFileHeader, setName); i++)
347 for (
size_t i = 0; i < psc.getNumberOfSequences(); i++)
357 psci->deleteGeneralComments();
367 Comments maseFileHeader = psc.getGeneralComments();
368 SiteSelection codss = MaseTools::getSiteSet(maseFileHeader, setName);
369 for (
size_t i = 0; i < psc.getNumberOfSites(); i++)
371 if (find(codss.begin(), codss.end(), i) == codss.end())
374 const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss);
376 for (
size_t i = 0; i < psc.getNumberOfSequences(); i++)
394 Comments maseFileHeader = psc.getGeneralComments();
398 start = MaseTools::getPhase(maseFileHeader, setName);
406 if ((
int)pos - (
int)start >= 0)
410 while (i < psc.getNumberOfSites())
415 const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss);
417 for (
size_t j = 0; j < psc.getNumberOfSequences(); j++)
435 const std::string& setName,
436 const GeneticCode* gCode)
438 Comments maseFileHeader = psc.getGeneralComments();
440 SiteSelection codss = MaseTools::getSiteSet(maseFileHeader, setName);
444 start = MaseTools::getPhase(maseFileHeader, setName);
451 size_t first = 0, last = psc.getNumberOfSites();
454 psc.getSite(codss[0]).getValue(0) == 0 &&
455 psc.getSite(codss[1]).getValue(0) == 3 &&
456 psc.getSite(codss[2]).getValue(0) == 2)
459 int c1 = psc.getSite(codss[codss.size() - 3]).getValue(0);
460 int c2 = psc.getSite(codss[codss.size() - 2]).getValue(0);
461 int c3 = psc.getSite(codss[codss.size() - 1]).getValue(0);
462 if (gCode->isStop(gCode->getSourceAlphabet()->getCodon(c1, c2, c3)))
463 last = codss[codss.size() - 1];
465 for (
size_t i = first; i < last; i++)
467 if (find(codss.begin(), codss.end(), i) == codss.end())
472 const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss);
474 for (
size_t i = 0; i < psc.getNumberOfSequences(); i++)
492 Comments maseFileHeader = psc.getGeneralComments();
494 SiteSelection codss = MaseTools::getSiteSet(maseFileHeader, setName);
495 size_t start = MaseTools::getPhase(maseFileHeader, setName);
499 psc.getSite(codss[0]).getValue(0) == 0 &&
500 psc.getSite(codss[1]).getValue(0) == 3 &&
501 psc.getSite(codss[2]).getValue(0) == 2)
503 for (
size_t i = 0; i < last; i++)
505 if (find(codss.begin(), codss.end(), i) == codss.end())
510 const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss);
512 for (
size_t i = 0; i < psc.getNumberOfSequences(); i++)
530 const std::string& setName,
531 const GeneticCode* gCode)
533 Comments maseFileHeader = psc.getGeneralComments();
535 SiteSelection codss = MaseTools::getSiteSet(maseFileHeader, setName);
536 size_t first = psc.getNumberOfSites() - 1;
538 int c1 = psc.getSite(codss[codss.size() - 3]).getValue(0);
539 int c2 = psc.getSite(codss[codss.size() - 2]).getValue(0);
540 int c3 = psc.getSite(codss[codss.size() - 1]).getValue(0);
541 if (gCode->isStop(gCode->getSourceAlphabet()->getCodon(c1, c2, c3)))
542 first = codss[codss.size() - 1];
543 for (
size_t i = first; i < psc.getNumberOfSites(); i++)
545 if (find(codss.begin(), codss.end(), i) == codss.end())
550 const SiteContainer* sc = SiteContainerTools::getSelectedSites(psc, ss);
552 for (
size_t i = 0; i < psc.getNumberOfSequences(); i++)
572 Comments maseFileHeader = psc.getGeneralComments();
573 if (!maseFileHeader.size())
575 map<string, size_t> groupMap = MaseTools::getAvailableSequenceSelections(maseFileHeader);
576 for (map<string, size_t>::iterator mi = groupMap.begin(); mi != groupMap.end(); mi++)
579 if (key.compare(0, 7,
"INGROUP") == 0)
581 StringTokenizer* sptk =
new StringTokenizer(key,
"_");
582 speciesName = sptk->getToken(1) +
" " + sptk->getToken(2);
void setAsOutgroupMember(size_t index)
Set a sequence as outgroup member by index.
The GroupNotFoundException class.
PolymorphismSequenceContainer * clone() const
Clone a PolymorphismSequenceContainer.
size_t getGroupId(size_t index) const
Get the group identifier of the sequence.
bool isIngroupMember(size_t index) const
Tell if the sequence is ingroup by index.
unsigned int getSequenceCount(size_t index) const
Get the count of a sequence by index.
void addSequenceWithFrequency(const Sequence &sequence, unsigned int frequency, bool checkName=true)
Add a sequence to the container.
void setGroupId(size_t index, size_t group_id)
Set the group identifier of a sequence.
void deleteSequence(size_t index)
Delete a sequence by index.
void setAsIngroupMember(size_t index)
Set a sequence as ingroup member by index.
void setSequenceCount(size_t index, unsigned int count)
Set the count of a sequence by index.
The PolymorphismSequenceContainer class.