41 #include "../PatternTools.h" 44 #include <Bpp/Seq/SiteTools.h> 45 #include <Bpp/Seq/Container/AlignedSequenceContainer.h> 46 #include <Bpp/Seq/Container/SequenceContainerTools.h> 48 #include <Bpp/Text/TextTools.h> 49 #include <Bpp/App/ApplicationTools.h> 63 DiscreteDistribution* rDist,
78 const SiteContainer& data,
80 DiscreteDistribution* rDist,
98 rateDistribution_->getNumberOfCategories());
141 ApplicationTools::displayTask(
"Initializing data structure");
142 likelihoodData_->initLikelihoods(*data_, *model_);
144 ApplicationTools::displayTaskDone();
146 nbSites_ = likelihoodData_->getNumberOfSites();
147 nbDistinctSites_ = likelihoodData_->getNumberOfDistinctSites();
148 nbStates_ = likelihoodData_->getNumberOfStates();
151 ApplicationTools::displayResult(
"Number of distinct sites",
152 TextTools::toString(nbDistinctSites_));
153 initialized_ =
false;
165 l *= std::pow((*lik)[i], (
int)(*w)[i]);
180 la[i] = (*w)[i] * log((*lik)[i]);
182 sort(la.begin(), la.end());
234 throw (ParameterNotFoundException, ConstraintException)
236 setParametersValues(parameters);
245 if (
rateDistribution_->getParameters().getCommonParametersWith(params).size() > 0
246 ||
model_->getParameters().getCommonParametersWith(params).size() > 0)
251 else if (params.size() > 0)
254 for (
size_t i = 0; i < params.size(); i++)
256 string s = params[i].getName();
257 if (s.substr(0, 5) ==
"BrLen")
284 throw Exception(
"DRHomogeneousTreeLikelihood::getValue(). Instance is not initialized.");
301 double dLi, dLic, dLicx;
305 VVdouble* likelihoods_father_node_i = &(*likelihoods_father_node)[i];
306 VVdouble* larray_i = &larray[i];
310 Vdouble* likelihoods_father_node_i_c = &(*likelihoods_father_node_i)[c];
311 Vdouble* larray_i_c = &(*larray_i)[c];
312 VVdouble* dpxy_node_c = &(*dpxy_node)[c];
316 Vdouble* dpxy_node_c_x = &(*dpxy_node_c)[x];
320 dLicx += (*dpxy_node_c_x)[y] * (*likelihoods_father_node_i_c)[y];
322 dLicx *= (*larray_i_c)[x];
327 (*dLikelihoods_node)[i] = dLi / (*rootLikelihoodsSR)[i];
336 for (
size_t k = 0; k <
nbNodes_; k++)
347 if (!hasParameter(variable))
348 throw ParameterNotFoundException(
"DRHomogeneousTreeLikelihood::getFirstOrderDerivative().", variable);
349 if (getRateDistributionParameters().hasParameter(variable))
351 throw Exception(
"Derivatives respective to rate distribution parameters are not implemented.");
353 if (getSubstitutionModelParameters().hasParameter(variable))
355 throw Exception(
"Derivatives respective to substitution model parameters are not implemented.");
363 size_t brI = TextTools::to<size_t>(variable.substr(5));
364 const Node* branch = nodes_[brI];
365 Vdouble* dLikelihoods_branch = &likelihoodData_->getDLikelihoodArray(branch->getId());
367 const vector<unsigned int>* w = &likelihoodData_->getWeights();
368 for (
size_t i = 0; i < nbDistinctSites_; i++)
370 d += (*w)[i] * (*dLikelihoods_branch)[i];
388 double d2Li, d2Lic, d2Licx;
392 VVdouble* likelihoods_father_node_i = &(*likelihoods_father_node)[i];
393 VVdouble* larray_i = &larray[i];
397 Vdouble* likelihoods_father_node_i_c = &(*likelihoods_father_node_i)[c];
398 Vdouble* larray_i_c = &(*larray_i)[c];
399 VVdouble* d2pxy_node_c = &(*d2pxy_node)[c];
403 Vdouble* d2pxy_node_c_x = &(*d2pxy_node_c)[x];
407 d2Licx += (*d2pxy_node_c_x)[y] * (*likelihoods_father_node_i_c)[y];
409 d2Licx *= (*larray_i_c)[x];
414 (*d2Likelihoods_node)[i] = d2Li / (*rootLikelihoodsSR)[i];
422 for (
size_t k = 0; k <
nbNodes_; k++)
433 if (!hasParameter(variable))
434 throw ParameterNotFoundException(
"DRHomogeneousTreeLikelihood::getSecondOrderDerivative().", variable);
435 if (getRateDistributionParameters().hasParameter(variable))
437 throw Exception(
"Derivatives respective to rate distribution parameters are not implemented.");
439 if (getSubstitutionModelParameters().hasParameter(variable))
441 throw Exception(
"Derivatives respective to substitution model parameters are not implemented.");
449 size_t brI = TextTools::to<size_t>(variable.substr(5));
450 const Node* branch = nodes_[brI];
451 Vdouble* _dLikelihoods_branch = &likelihoodData_->getDLikelihoodArray(branch->getId());
452 Vdouble* _d2Likelihoods_branch = &likelihoodData_->getD2LikelihoodArray(branch->getId());
454 const vector<unsigned int>* w = &likelihoodData_->getWeights();
455 for (
size_t i = 0; i < nbDistinctSites_; i++)
457 d2 += (*w)[i] * ((*_d2Likelihoods_branch)[i] - pow((*_dLikelihoods_branch)[i], 2));
501 for (
size_t l = 0; l < nbNodes; l++)
506 VVVdouble* _likelihoods_node_son = &(*_likelihoods_node)[son->
getId()];
514 Vdouble* _likelihoods_leaf_i = &(*_likelihoods_leaf)[i];
515 VVdouble* _likelihoods_node_son_i = &(*_likelihoods_node_son)[i];
519 Vdouble* _likelihoods_node_son_i_c = &(*_likelihoods_node_son_i)[c];
523 (*_likelihoods_node_son_i_c)[x] = (*_likelihoods_leaf_i)[x];
534 vector<const VVVdouble*> iLik(nbSons);
535 vector<const VVVdouble*> tProb(nbSons);
536 for (
size_t n = 0; n < nbSons; n++)
540 iLik[n] = &(*_likelihoods_son)[sonSon->
getId()];
556 for (
size_t n = 0; n < nbSons; n++)
567 VVVdouble* _likelihoods_node_father = &(*_likelihoods_node)[father->
getId()];
580 Vdouble* _likelihoods_leaf_i = &(*_likelihoods_leaf)[i];
581 VVdouble* _likelihoods_node_father_i = &(*_likelihoods_node_father)[i];
585 Vdouble* _likelihoods_node_father_i_c = &(*_likelihoods_node_father_i)[c];
589 (*_likelihoods_node_father_i_c)[x] = (*_likelihoods_leaf_i)[x];
596 vector<const Node*> nodes;
599 for (
size_t n = 0; n < nbFatherSons; n++)
603 nodes.push_back(son);
609 size_t nbSons = nodes.size();
611 vector<const VVVdouble*> iLik(nbSons);
612 vector<const VVVdouble*> tProb(nbSons);
613 for (
size_t n = 0; n < nbSons; n++)
615 const Node* fatherSon = nodes[n];
617 iLik[n] = &(*_likelihoods_father)[fatherSon->
getId()];
636 VVdouble* _likelihoods_node_father_i = &(*_likelihoods_node_father)[i];
639 Vdouble* _likelihoods_node_father_i_c = &(*_likelihoods_node_father_i)[c];
642 (*_likelihoods_node_father_i_c)[x] *=
rootFreqs_[x];
650 for (
size_t i = 0; i < nbNodeSons; i++)
669 VVdouble* rootLikelihoods_i = &(*rootLikelihoods)[i];
670 Vdouble* leavesLikelihoods_root_i = &(*leavesLikelihoods_root)[i];
673 Vdouble* rootLikelihoods_i_c = &(*rootLikelihoods_i)[c];
676 (*rootLikelihoods_i_c)[x] = (*leavesLikelihoods_root_i)[x];
688 vector<const VVVdouble*> iLik(nbNodes);
689 vector<const VVVdouble*> tProb(nbNodes);
690 for (
size_t n = 0; n < nbNodes; n++)
694 iLik[n] = &(*likelihoods_root)[son->
getId()];
704 VVdouble* rootLikelihoods_i = &(*rootLikelihoods)[i];
705 Vdouble* rootLikelihoodsS_i = &(*rootLikelihoodsS)[i];
706 (*rootLikelihoodsSR)[i] = 0;
710 Vdouble* rootLikelihoods_i_c = &(*rootLikelihoods_i)[c];
711 double* rootLikelihoodsS_i_c = &(*rootLikelihoodsS_i)[c];
712 (*rootLikelihoodsS_i_c) = 0;
716 (*rootLikelihoodsS_i_c) +=
rootFreqs_[x] * (*rootLikelihoods_i_c)[x];
718 (*rootLikelihoodsSR)[i] += p[c] * (*rootLikelihoodsS_i_c);
722 if ((*rootLikelihoodsSR)[i] < 0)
723 (*rootLikelihoodsSR)[i] = 0.;
732 int nodeId = node->
getId();
742 VVdouble* likelihoodArray_i = &likelihoodArray[i];
743 Vdouble* leavesLikelihoods_node_i = &(*leavesLikelihoods_node)[i];
747 Vdouble* likelihoodArray_i_c = &(*likelihoodArray_i)[c];
751 (*likelihoodArray_i_c)[x] = (*leavesLikelihoods_node_i)[x];
762 VVdouble* likelihoodArray_i = &likelihoodArray[i];
766 Vdouble* likelihoodArray_i_c = &(*likelihoodArray_i)[c];
770 (*likelihoodArray_i_c)[x] = 1.;
778 vector<const VVVdouble*> iLik;
779 vector<const VVVdouble*> tProb;
781 for (
size_t n = 0; n < nbNodes; n++)
784 if (son != sonNode) {
786 iLik.push_back(&(*likelihoods_node)[son->
getId()]);
795 throw Exception(
"DRHomogeneousTreeLikelihood::computeLikelihoodAtNode_(...). 'sonNode' not found as a son of 'node'.");
810 VVdouble* likelihoodArray_i = &likelihoodArray[i];
813 Vdouble* likelihoodArray_i_c = &(*likelihoodArray_i)[c];
826 const vector<const VVVdouble*>& iLik,
827 const vector<const VVVdouble*>& tProb,
830 size_t nbDistinctSites,
838 for (
size_t n = 0; n < nbNodes; n++)
840 const VVVdouble* pxy_n = tProb[n];
841 const VVVdouble* iLik_n = iLik[n];
843 for (
size_t i = 0; i < nbDistinctSites; i++)
846 const VVdouble* iLik_n_i = &(*iLik_n)[i];
847 VVdouble* oLik_i = &(oLik)[i];
849 for (
size_t c = 0; c < nbClasses; c++)
852 const Vdouble* iLik_n_i_c = &(*iLik_n_i)[c];
853 Vdouble* oLik_i_c = &(*oLik_i)[c];
854 const VVdouble* pxy_n_c = &(*pxy_n)[c];
855 for (
size_t x = 0; x < nbStates; x++)
858 const Vdouble* pxy_n_c_x = &(*pxy_n_c)[x];
859 double likelihood = 0;
860 for (
size_t y = 0; y < nbStates; y++)
862 likelihood += (*pxy_n_c_x)[y] * (*iLik_n_i_c)[y];
865 (*oLik_i_c)[x] *= likelihood;
875 const vector<const VVVdouble*>& iLik,
876 const vector<const VVVdouble*>& tProb,
877 const VVVdouble* iLikR,
878 const VVVdouble* tProbR,
881 size_t nbDistinctSites,
889 for (
size_t n = 0; n < nbNodes; n++)
891 const VVVdouble* pxy_n = tProb[n];
892 const VVVdouble* iLik_n = iLik[n];
894 for (
size_t i = 0; i < nbDistinctSites; i++)
897 const VVdouble* iLik_n_i = &(*iLik_n)[i];
898 VVdouble* oLik_i = &(oLik)[i];
900 for (
size_t c = 0; c < nbClasses; c++)
903 const Vdouble* iLik_n_i_c = &(*iLik_n_i)[c];
904 Vdouble* oLik_i_c = &(*oLik_i)[c];
905 const VVdouble* pxy_n_c = &(*pxy_n)[c];
906 for (
size_t x = 0; x < nbStates; x++)
909 const Vdouble* pxy_n_c_x = &(*pxy_n_c)[x];
910 double likelihood = 0;
911 for (
size_t y = 0; y < nbStates; y++)
915 likelihood += (*pxy_n_c_x)[y] * (*iLik_n_i_c)[y];
919 (*oLik_i_c)[x] *= likelihood;
926 for (
size_t i = 0; i < nbDistinctSites; i++)
929 const VVdouble* iLikR_i = &(*iLikR)[i];
930 VVdouble* oLik_i = &(oLik)[i];
932 for (
size_t c = 0; c < nbClasses; c++)
935 const Vdouble* iLikR_i_c = &(*iLikR_i)[c];
936 Vdouble* oLik_i_c = &(*oLik_i)[c];
937 const VVdouble* pxyR_c = &(*tProbR)[c];
938 for (
size_t x = 0; x < nbStates; x++)
940 double likelihood = 0;
941 for (
size_t y = 0; y < nbStates; y++)
944 const Vdouble* pxyR_c_y = &(*pxyR_c)[y];
945 likelihood += (*pxyR_c_y)[x] * (*iLikR_i_c)[y];
948 (*oLik_i_c)[x] *= likelihood;
958 cout <<
"Likelihoods at node " << node->
getId() <<
": " << endl;
962 cout <<
"Array for sub-node " << subNode->
getId() << endl;
968 cout <<
"Array for father node " << father->
getId() << endl;
971 cout <<
" ***" << endl;
virtual void computeTreeD2Likelihoods()
std::vector< double > rootFreqs_
double getFirstOrderDerivative(const std::string &variable) const
DRHomogeneousTreeLikelihood & operator=(const DRHomogeneousTreeLikelihood &lik)
double getLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
Get the likelihood for a site knowing its rate class and its ancestral state.
Interface for all substitution models.
virtual void computeAllTransitionProbabilities()
Fill the pxy_, dpxy_ and d2pxy_ arrays for all nodes.
VVVdouble & getRootLikelihoodArray()
double getLikelihood() const
Get the likelihood for the whole dataset.
std::map< int, VVVdouble > dpxy_
void setParameters(const ParameterList ¶meters)
Implements the Function interface.
DRHomogeneousTreeLikelihood(const Tree &tree, SubstitutionModel *model, DiscreteDistribution *rDist, bool checkRooted=true, bool verbose=true)
Build a new DRHomogeneousTreeLikelihood object without data.
AbstractHomogeneousTreeLikelihood & operator=(const AbstractHomogeneousTreeLikelihood &lik)
Assignation operator.
virtual void computeTreeD2LikelihoodAtNode(const Node *node)
VVdouble & getRootSiteLikelihoodArray()
virtual void computeSubtreeLikelihoodPostfix(const Node *node)
DRASDRTreeLikelihoodData * likelihoodData_
virtual const Node * getSon(size_t pos) const
Vdouble & getRootRateSiteLikelihoodArray()
virtual void resetLikelihoodArrays(const Node *node)
const std::map< int, VVVdouble > & getLikelihoodArrays(int nodeId) const
Vdouble & getDLikelihoodArray(int nodeId)
const std::vector< unsigned int > & getWeights() const
virtual const Node * getFather() const
Get the father of this node is there is one.
Vdouble & getD2LikelihoodArray(int nodeId)
Interface for phylogenetic tree objects.
static void displayLikelihoodArray(const VVVdouble &likelihoodArray)
Print the likelihood array to terminal (debugging tool).
SubstitutionModel * model_
double getLogLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
Get the logarithm of the likelihood for a site knowing its rate class and its ancestral state...
void init_()
Method called by constructors.
virtual bool hasFather() const
Tell if this node has a father node.
virtual bool isLeaf() const
virtual void computeRootLikelihood()
void setData(const SiteContainer &sites)
Set the dataset for which the likelihood must be evaluated.
double getValue() const
Function and NNISearchable interface.
void computeTreeLikelihood()
double getLogLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
Get the logarithm of the likelihood for a site knowing its rate class.
VVdouble & getLeafLikelihoods(int nodeId)
virtual int getId() const
Get the node's id.
std::map< int, VVVdouble > d2pxy_
double getLikelihoodForASite(size_t site) const
Get the likelihood for a site.
virtual void fireParameterChanged(const ParameterList ¶ms)
bool computeFirstOrderDerivatives_
This class implements the likelihood computation for a tree using the double-recursive algorithm...
static void resetLikelihoodArray(VVVdouble &likelihoodArray)
Set all conditional likelihoods to 1.
double getSecondOrderDerivative(const std::string &variable) const
TreeTemplate< Node > * tree_
VVVdouble & getLikelihoodArray(int parentId, int neighborId)
The phylogenetic node class.
virtual void displayLikelihood(const Node *node)
This method is mainly for debugging purpose.
double getLogLikelihoodForASite(size_t site) const
Get the logarithm of the likelihood for a site.
virtual void computeTransitionProbabilitiesForNode(const Node *node)
Fill the pxy_, dpxy_ and d2pxy_ arrays for one node.
std::vector< Node * > nodes_
Pointer toward all nodes in the tree.
double getLogLikelihood() const
Get the logarithm of the likelihood for the whole dataset.
std::map< int, VVVdouble > pxy_
DiscreteDistribution * rateDistribution_
virtual size_t getNumberOfSons() const
static void computeLikelihoodFromArrays(const std::vector< const VVVdouble *> &iLik, const std::vector< const VVVdouble *> &tProb, VVVdouble &oLik, size_t nbNodes, size_t nbDistinctSites, size_t nbClasses, size_t nbStates, bool reset=true)
Compute conditional likelihoods.
virtual void computeSubtreeLikelihoodPrefix(const Node *node)
Likelihood data structure for rate across sites models, using a double-recursive algorithm.
DRASDRTreeLikelihoodData * clone() const
virtual void applyParameters()
All parameters are stored in a parameter list. This function apply these parameters to the substituti...
virtual void computeLikelihoodAtNode_(const Node *node, VVVdouble &likelihoodArray, const Node *sonNode=0) const
virtual void computeTreeDLikelihoods()
Partial implementation for homogeneous model of the TreeLikelihood interface.
bool isInitialized() const
bool computeSecondOrderDerivatives_
void setTree(const TreeTemplate< Node > *tree)
Set the tree associated to the data.
virtual ~DRHomogeneousTreeLikelihood()
size_t getRootArrayPosition(const size_t site) const
virtual void computeTreeDLikelihoodAtNode(const Node *node)
double getLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
Get the likelihood for a site knowing its rate class.