43 #include "../PatternTools.h" 44 #include "../SitePatterns.h" 47 #include <Bpp/App/ApplicationTools.h> 48 #include <Bpp/Numeric/AutoParameter.h> 51 #include <Bpp/Seq/SiteTools.h> 52 #include <Bpp/Seq/Sequence.h> 53 #include <Bpp/Seq/Container/AlignedSequenceContainer.h> 54 #include <Bpp/Seq/DistanceMatrix.h> 69 const std::string& seq1,
const std::string& seq2,
70 const SiteContainer& data,
72 DiscreteDistribution* rDist,
73 bool verbose)
throw (Exception) :
75 shrunkData_(0), seqnames_(2), model_(model), brLenParameters_(), pxy_(), dpxy_(), d2pxy_(),
76 rootPatternLinks_(), rootWeights_(), nbSites_(0), nbClasses_(0), nbStates_(0), nbDistinctSites_(0),
77 rootLikelihoods_(), rootLikelihoodsS_(), rootLikelihoodsSR_(), dLikelihoods_(), d2Likelihoods_(),
78 leafLikelihoods1_(), leafLikelihoods2_(),
79 minimumBrLen_(0.000001), brLenConstraint_(0), brLen_(0)
84 if (data_->getAlphabet()->getAlphabetType()
85 != model_->getAlphabet()->getAlphabetType())
86 throw AlphabetMismatchException(
"TwoTreeTreeLikelihood::TwoTreeTreeLikelihood. Data and model must have the same alphabet type.",
88 model_->getAlphabet());
90 nbSites_ = data_->getNumberOfSites();
91 nbClasses_ = rateDistribution_->getNumberOfCategories();
92 nbStates_ = model_->getNumberOfStates();
94 ApplicationTools::displayMessage(
"Double-Recursive Homogeneous Tree Likelihood");
101 nbDistinctSites_ = shrunkData_->getNumberOfSites();
103 ApplicationTools::displayResult(
"Number of distinct sites", TextTools::toString(nbDistinctSites_));
106 if (verbose) ApplicationTools::displayTask(
"Init likelihoods arrays recursively");
108 const SiteContainer* sequences =
new AlignedSequenceContainer(*shrunkData_);
109 initTreeLikelihoods(*sequences);
112 brLen_ = minimumBrLen_;
113 brLenConstraint_ =
new IntervalConstraint(1, minimumBrLen_,
true);
115 if (verbose) ApplicationTools::displayTaskDone();
122 shrunkData_ (dynamic_cast<SiteContainer*>(lik.shrunkData_->clone())),
123 seqnames_ (lik.seqnames_),
125 brLenParameters_ (lik.brLenParameters_),
129 rootPatternLinks_ (lik.rootPatternLinks_),
130 rootWeights_ (lik.rootWeights_),
131 nbSites_ (lik.nbSites_),
132 nbClasses_ (lik.nbClasses_),
133 nbStates_ (lik.nbStates_),
134 nbDistinctSites_ (lik.nbDistinctSites_),
135 rootLikelihoods_ (lik.rootLikelihoods_),
136 rootLikelihoodsS_ (lik.rootLikelihoodsS_),
137 rootLikelihoodsSR_ (lik.rootLikelihoodsSR_),
138 dLikelihoods_ (lik.dLikelihoods_),
139 d2Likelihoods_ (lik.d2Likelihoods_),
140 leafLikelihoods1_ (lik.leafLikelihoods1_),
141 leafLikelihoods2_ (lik.leafLikelihoods2_),
142 minimumBrLen_ (lik.minimumBrLen_),
143 brLenConstraint_ (dynamic_cast<Constraint*>(lik.brLenConstraint_->clone())),
199 if (!
initialized_)
throw Exception(
"TwoTreeLikelihood::getBranchLengthsParameters(). Object is not initialized.");
207 if (!
initialized_)
throw Exception(
"TwoTreeLikelihood::getSubstitutionModelParameters(). Object is not initialized.");
208 return model_->getParameters().getCommonParametersWith(getParameters());
289 addParameters_(
model_->getIndependentParameters());
300 brLen_ = getParameterValue(
"BrLen");
302 model_->matchParametersValues(getParameters());
313 ApplicationTools::displayWarning(
"Branch length is too small: " + TextTools::toString(
brLen_) +
". Value is set to " + TextTools::toString(
minimumBrLen_));
323 throw (ParameterNotFoundException, ConstraintException)
325 setParametersValues(parameters);
340 VVdouble* pxy_c = &
pxy_[c];
345 Vdouble* pxy_c_x = &(*pxy_c)[x];
349 (*pxy_c_x)[y] = Q(x, y);
360 VVdouble* dpxy_c = &
dpxy_[c];
366 Vdouble* dpxy_c_x = &(*dpxy_c)[x];
370 (*dpxy_c_x)[y] = rc * dQ(x, y);
382 VVdouble* d2pxy_c = &
d2pxy_[c];
388 Vdouble* d2pxy_c_x = &(*d2pxy_c)[x];
392 (*d2pxy_c_x)[y] = rc * rc * d2Q(x, y);
421 const Sequence* seq1 = &sequences.getSequence(seqnames_[0]);
422 const Sequence* seq2 = &sequences.getSequence(seqnames_[1]);
423 leafLikelihoods1_.resize(nbDistinctSites_);
424 leafLikelihoods2_.resize(nbDistinctSites_);
425 for (
size_t i = 0; i < nbDistinctSites_; i++)
427 Vdouble* leafLikelihoods1_i = &leafLikelihoods1_[i];
428 Vdouble* leafLikelihoods2_i = &leafLikelihoods2_[i];
429 leafLikelihoods1_i->resize(nbStates_);
430 leafLikelihoods2_i->resize(nbStates_);
431 int state1 = seq1->getValue(i);
432 int state2 = seq2->getValue(i);
433 for (
size_t s = 0; s < nbStates_; s++)
439 (*leafLikelihoods1_i)[s] = model_->getInitValue(s, state1);
440 (*leafLikelihoods2_i)[s] = model_->getInitValue(s, state2);
442 catch (SequenceNotFoundException& snfe)
444 throw SequenceNotFoundException(
"TwoTreeLikelihood::initTreelikelihoods. Leaf name in tree not found in site conainer: ", snfe.getSequenceId());
450 rootLikelihoods_.resize(nbDistinctSites_);
451 rootLikelihoodsS_.resize(nbDistinctSites_);
452 rootLikelihoodsSR_.resize(nbDistinctSites_);
453 for (
size_t i = 0; i < nbDistinctSites_; i++)
455 VVdouble* rootLikelihoods_i = &rootLikelihoods_[i];
456 Vdouble* rootLikelihoodsS_i = &rootLikelihoodsS_[i];
457 rootLikelihoods_i->resize(nbClasses_);
458 rootLikelihoodsS_i->resize(nbClasses_);
459 for (
size_t c = 0; c < nbClasses_; c++)
461 Vdouble* rootLikelihoods_i_c = &(*rootLikelihoods_i)[c];
462 rootLikelihoods_i_c->resize(nbStates_);
463 for (
size_t s = 0; s < nbStates_; s++)
465 (*rootLikelihoods_i_c)[s] = 1.;
471 dLikelihoods_.resize(nbDistinctSites_);
472 d2Likelihoods_.resize(nbDistinctSites_);
486 Vdouble* rootLikelihoods_i_c = &(*rootLikelihoods_i)[c];
487 VVdouble* pxy_c = &
pxy_[c];
490 Vdouble* pxy_c_x = &(*pxy_c)[x];
492 double l1 = (*leafLikelihoods1_i)[x];
495 double l2 = (*leafLikelihoods2_i)[y];
496 l += l1 * l2 * (*pxy_c_x)[y];
498 (*rootLikelihoods_i_c)[x] = l;
513 (*rootLikelihoodsS_i)[c] = 0;
515 Vdouble* rootLikelihoods_i_c = &(*rootLikelihoods_i)[c];
519 (*rootLikelihoodsS_i)[c] += fr[x] * (*rootLikelihoods_i_c)[x];
537 VVdouble* dpxy_c = &
dpxy_[c];
541 Vdouble* dpxy_c_x = &(*dpxy_c)[x];
542 double l1 = (*leafLikelihoods1_i)[x];
546 double l2 = (*leafLikelihoods2_i)[y];
547 dlicx += l1 * l2 * (*dpxy_c_x)[y];
568 VVdouble* d2pxy_c = &
d2pxy_[c];
572 Vdouble* d2pxy_c_x = &(*d2pxy_c)[x];
573 double l1 = (*leafLikelihoods1_i)[x];
577 double l2 = (*leafLikelihoods2_i)[y];
578 d2licx += l1 * l2 * (*d2pxy_c_x)[y];
593 if (!hasParameter(variable))
594 throw ParameterNotFoundException(
"TwoTreeLikelihood::getFirstOrderDerivative().", variable);
595 if (getRateDistributionParameters().hasParameter(variable))
597 cout <<
"DEBUB: WARNING!!! Derivatives respective to rate distribution parameter are not implemented." << endl;
600 if (getSubstitutionModelParameters().hasParameter(variable))
602 cout <<
"DEBUB: WARNING!!! Derivatives respective to substitution model parameters are not implemented." << endl;
612 for (
size_t i = 0; i < nbDistinctSites_; i++)
614 d += rootWeights_[i] * dLikelihoods_[i];
624 if (!hasParameter(variable))
625 throw ParameterNotFoundException(
"TwoTreeLikelihood::getSecondOrderDerivative().", variable);
626 if (getRateDistributionParameters().hasParameter(variable))
628 cout <<
"DEBUB: WARNING!!! Derivatives respective to rate distribution parameter are not implemented." << endl;
631 if (getSubstitutionModelParameters().hasParameter(variable))
633 cout <<
"DEBUB: WARNING!!! Derivatives respective to substitution model parameters are not implemented." << endl;
643 for (
size_t i = 0; i < nbDistinctSites_; i++)
645 d2 += rootWeights_[i] * (d2Likelihoods_[i] - pow(dLikelihoods_[i], 2));
654 size_t n =
sites_->getNumberOfSequences();
655 vector<string> names =
sites_->getSequencesNames();
657 dist_ =
new DistanceMatrix(names);
658 optimizer_->setVerbose(static_cast<unsigned int>(max(static_cast<int>(
verbose_) - 2, 0)));
659 for (
size_t i = 0; i < n; ++i)
664 ApplicationTools::displayGauge(i, n - 1,
'=');
666 for (
size_t j = i + 1; j < n; j++)
670 ApplicationTools::displayGauge(j - i - 1, n - i - 2,
'=');
676 size_t d = SymbolListTools::getNumberOfDistinctPositions(
sites_->getSequence(i),
sites_->getSequence(j));
677 size_t g = SymbolListTools::getNumberOfPositionsWithoutGap(
sites_->getSequence(i),
sites_->getSequence(j));
681 optimizer_->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
687 (*dist_)(i, j) = (*
dist_)(j, i) = lik->getParameterValue(
"BrLen");
690 if (
verbose_ > 1 && ApplicationTools::message) ApplicationTools::message->endLine();
VVdouble leafLikelihoods1_
Interface for all substitution models.
Constraint * brLenConstraint_
VVdouble rootLikelihoodsS_
auto_ptr< DiscreteDistribution > rateDist_
const std::vector< size_t > & getIndices() const
VVdouble leafLikelihoods2_
Vdouble rootLikelihoodsSR_
Partial implementation of the DiscreteRatesAcrossSitesTreeLikelihood interface.
TwoTreeLikelihood(const std::string &seq1, const std::string &seq2, const SiteContainer &data, SubstitutionModel *model, DiscreteDistribution *rDist, bool verbose)
ParameterList getBranchLengthsParameters() const
Get the branch lengths parameters.
This class is a simplified version of DRHomogeneousTreeLikelihood for 2-Trees.
void setParameters(const ParameterList ¶meters)
Implements the Function interface.
double getLikelihoodForASite(size_t site) const
Get the likelihood for a site.
virtual void initParameters()
This builds the parameters list from all parametrizable objects, i.e. substitution model...
TwoTreeLikelihood & operator=(const TwoTreeLikelihood &lik)
SubstitutionModel * model_
virtual ~TwoTreeLikelihood()
const SiteContainer * sites_
void enableDerivatives(bool yn)
Tell if derivatives must be computed.
virtual void initBranchLengthsParameters()
const std::vector< unsigned int > & getWeights() const
virtual double getMinimumBranchLength() const
double getLogLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
Get the logarithm of the likelihood for a site knowing its rate class.
void fireParameterChanged(const ParameterList ¶ms)
virtual const Vdouble & getFrequencies() const =0
virtual const Matrix< double > & getd2Pij_dt2(double t) const =0
std::vector< unsigned int > rootWeights_
The frequency of each site.
virtual const Matrix< double > & getPij_t(double t) const =0
double getLogLikelihood() const
Get the logarithm of the likelihood for the whole dataset.
void initialize()
Init the likelihood object.
virtual void initTreeLikelihoods(const SequenceContainer &sequences)
This method initializes the leaves according to a sequence container.
bool computeFirstOrderDerivatives_
virtual void computeTreeDLikelihood()
void computeMatrix()
Perform the distance computation.
std::vector< std::string > seqnames_
std::vector< size_t > rootPatternLinks_
As previous, but for the global container.
virtual void computeTreeLikelihood()
virtual void computeTreeD2Likelihood()
AbstractDiscreteRatesAcrossSitesTreeLikelihood & operator=(const AbstractDiscreteRatesAcrossSitesTreeLikelihood &tl)
SiteContainer * getSites() const
double getLogLikelihoodForASite(size_t site) const
Get the logarithm of the likelihood for a site.
double getSecondOrderDerivative(const std::string &variable) const
double getFirstOrderDerivative(const std::string &variable) const
double getLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
Get the likelihood for a site knowing its rate class.
double getLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
Get the likelihood for a site knowing its rate class and its ancestral state.
SiteContainer * shrunkData_
virtual double freq(size_t i) const =0
double getLogLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
Get the logarithm of the likelihood for a site knowing its rate class and its ancestral state...
virtual const Matrix< double > & getdPij_dt(double t) const =0
Data structure for site patterns.
DiscreteDistribution * rateDistribution_
double getLikelihood() const
Get the likelihood for the whole dataset.
ParameterList brLenParameters_
VVVdouble rootLikelihoods_
ParameterList parameters_
virtual void applyParameters()
All parameters are stores in a parameter list.
bool computeSecondOrderDerivatives_
ParameterList getSubstitutionModelParameters() const
Get the parameters associated to substitution model(s).
auto_ptr< SubstitutionModel > model_