41 #include <Bpp/Seq/Container/SequenceContainerTools.h> 42 #include <Bpp/Seq/Container/VectorSiteContainer.h> 43 #include <Bpp/Seq/SiteTools.h> 46 #include <Bpp/Numeric/NumConstants.h> 59 if (seqs1.size() > 1 || seqs2.size() > 1)
60 throw Exception(
"PairwiseDivergenceMafStatistics::compute. Duplicated sequence for species " + species1_ +
"or " + species2_ +
".");
61 if (seqs1.size() == 0 || seqs2.size() == 0)
62 result_.setValue(NumConstants::NaN());
64 result_.setValue(100. - SequenceTools::getPercentIdentity(*seqs1[0], *seqs2[0],
true));
70 for (
int i = 0; i < static_cast<int>(alphabet_->getSize()); ++i) {
71 tags.push_back(alphabet_->intToChar(i));
73 tags.push_back(
"Gap");
74 tags.push_back(
"Unresolved");
81 std::map<int, int> counts;
82 SequenceContainerTools::getCounts(block.
getAlignment(), counts);
83 for (
int i = 0; i < static_cast<int>(alphabet_->getSize()); ++i) {
84 result_.setValue(alphabet_->intToChar(i), counts[i]);
86 result_.setValue(
"Gap", counts[alphabet_->getGapCharacterCode()]);
87 double countUnres = 0;
88 for (map<int, int>::iterator it = counts.begin(); it != counts.end(); ++it) {
89 if (alphabet_->isUnresolved(it->first))
90 countUnres += it->second;
92 result_.setValue(
"Unresolved", countUnres);
97 VectorSiteContainer* alignment =
new VectorSiteContainer(block.
getAlignment().getAlphabet());
98 for (
size_t i = 0; i < species_.size(); ++i) {
101 for (
size_t j = 0; j < selection.size(); ++j) {
102 alignment->addSequence(*selection[j]);
112 size_t n = VectorTools::vectorUnion(species).size();
114 for (
size_t i = 0; i < species.size(); ++i)
117 throw Exception(
"AbstractSpeciesMultipleSelectionMafStatistics (constructor). Species selections must be fully distinct.");
122 vector<SiteContainer*> alignments;
123 for (
size_t k = 0; k <
species_.size(); ++k) {
124 VectorSiteContainer* alignment =
new VectorSiteContainer(block.
getAlignment().getAlphabet());
125 for (
size_t i = 0; i <
species_[k].size(); ++i) {
128 for (
size_t j = 0; j < selection.size(); ++j) {
129 alignment->addSequence(*selection[j]);
133 alignments.push_back(alignment);
142 tags.push_back(
"Bin" + TextTools::toString(i + 1));
144 tags.push_back(
"Unresolved");
145 tags.push_back(
"Saturated");
146 tags.push_back(
"Ignored");
152 unsigned int nbUnresolved = 0;
153 unsigned int nbSaturated = 0;
154 unsigned int nbIgnored = 0;
159 auto_ptr<SiteContainer> alignment;
160 const Sequence* outgroupSeq = 0;
174 for (
size_t i = 0; i < alignment->getNumberOfSites(); ++i) {
176 const Site& site = alignment->getSite(i);
177 map<int, unsigned int> counts;
178 bool isUnresolved =
false;
179 bool isSaturated =
false;
180 for (
size_t j = 0; !isUnresolved && !isSaturated && j < site.size(); ++j) {
186 if (counts.size() > 2) {
193 }
else if (isSaturated) {
195 }
else if (hasOutgroup && (
196 alignment->getAlphabet()->isGap((*outgroupSeq)[i]) ||
197 alignment->getAlphabet()->isUnresolved((*outgroupSeq)[i]))) {
202 if (counts.size() == 1) {
204 if (counts.begin()->first == (*outgroupSeq)[i])
207 count = counts.begin()->second;
212 map<int, unsigned int>::iterator it = counts.begin();
213 unsigned int count1 = it->second;
215 unsigned int count2 = it->second;
217 if (counts.begin()->first == (*outgroupSeq)[i])
218 count = counts.rbegin()->second;
219 else if (counts.rbegin()->first == (*outgroupSeq)[i])
220 count = counts.begin()->second;
226 count = min(count1, count2);
234 }
catch (OutOfRangeException& oof) {
244 for (
size_t i = 0; i <
counts_.size(); ++i) {
252 tags.push_back(
"f1100");
253 tags.push_back(
"f0110");
254 tags.push_back(
"f1010");
255 tags.push_back(
"Ignored");
263 if (alignment->getNumberOfSequences() == 4) {
264 unsigned int nbIgnored = 0;
266 const Site& site = alignment->getSite(i);
267 if (SiteTools::isComplete(site)) {
268 if (site[0] == site[1] &&
269 site[2] != site[1] &&
272 else if (site[1] == site[2] &&
273 site[1] != site[0] &&
276 else if (site[0] == site[2] &&
277 site[1] != site[0] &&
299 tags.push_back(
"NbWithoutGap");
300 tags.push_back(
"NbComplete");
301 tags.push_back(
"NbParsimonyInformative");
308 unsigned int nbNg = 0;
309 unsigned int nbCo = 0;
310 unsigned int nbPi = 0;
311 if (alignment->getNumberOfSequences() > 0) {
312 for (
size_t i = 0; i < alignment->getNumberOfSites(); ++i) {
313 if (!SiteTools::hasGap(alignment->getSite(i)))
315 if (SiteTools::isComplete(alignment->getSite(i)))
317 if (SiteTools::isParsimonyInformativeSite(alignment->getSite(i)))
331 tags.push_back(
"FP");
332 tags.push_back(
"PF");
333 tags.push_back(
"FF");
335 tags.push_back(
"FX");
336 tags.push_back(
"PX");
337 tags.push_back(
"XF");
338 tags.push_back(
"XP");
344 vector<int> patterns(sites.getNumberOfSites());
345 for (
size_t i = 0; i < sites.getNumberOfSites(); ++i) {
346 const Site& site = sites.getSite(i);
348 if (SiteTools::isComplete(site)) {
349 if (SiteTools::isConstant(site)) {
363 unsigned int nbF = 0;
364 unsigned int nbP = 0;
365 unsigned int nbFF = 0;
366 unsigned int nbFP = 0;
367 unsigned int nbPF = 0;
368 unsigned int nbX = 0;
369 unsigned int nbFX = 0;
370 unsigned int nbPX = 0;
371 unsigned int nbXF = 0;
372 unsigned int nbXP = 0;
376 if (alignments[0]->getNumberOfSequences() > 0) {
379 if (alignments[1]->getNumberOfSequences() > 0) {
384 int p1 = patterns1[i];
385 int p2 = patterns2[i];
446 tags.push_back(
"NbSeggregating");
447 tags.push_back(
"WattersonTheta");
454 unsigned int nbSeg = 0;
455 unsigned int nbTot = 0;
456 if (alignment->getNumberOfSequences() > 0) {
457 for (
size_t i = 0; i < alignment->getNumberOfSites(); ++i) {
458 const Site& site = alignment->getSite(i);
459 if (SiteTools::isComplete(site)) {
461 if (!SiteTools::isConstant(site))
468 size_t n = alignment->getNumberOfSequences();
470 for (
double i = 1; i < n; ++i)
472 wt =
static_cast<double>(nbSeg) / (static_cast<double>(nbTot) * hf);
const MafSequence & getSequenceForSpecies(const std::string &species) const
std::vector< std::string > getSupportedTags() const
void compute(const MafBlock &block)
void compute(const MafBlock &block)
std::vector< const MafSequence * > getSequencesForSpecies(const std::string &species) const
std::vector< std::string > getSupportedTags() const
std::vector< unsigned int > counts_
void compute(const MafBlock &block)
size_t getNumberOfSites() const
std::vector< std::string > getSupportedTags() const
void compute(const MafBlock &block)
AlignedSequenceContainer & getAlignment()
bool hasSequenceForSpecies(const std::string &species) const
A synteny block data structure, the basic unit of a MAF alignement file.
std::vector< std::string > getSupportedTags() const
SiteContainer * getSiteContainer_(const MafBlock &block)
void compute(const MafBlock &block)
void compute(const MafBlock &block)
std::vector< std::vector< std::string > > species_
const Alphabet * alphabet_
std::vector< SiteContainer * > getSiteContainers_(const MafBlock &block)
std::vector< unsigned int > counts_
size_t getCategory(double value) const
void compute(const MafBlock &block)
std::vector< std::string > getSupportedTags() const
static std::vector< int > getPatterns_(const SiteContainer &sites)
virtual void setValue(const std::string &tag, double value)
Associate a value to a certain tag. Any existing tag will be overwritten.
AbstractSpeciesMultipleSelectionMafStatistics(const std::vector< std::vector< std::string > > &species)
size_t getNumberOfSequences() const
MafStatisticsResult result_
size_t getNumberOfCategories() const
std::vector< std::string > getSupportedTags() const