bpp-phyl  2.2.0
DRHomogeneousMixedTreeLikelihood.cpp
Go to the documentation of this file.
1 //
2 // File: DRHomogeneousMixedTreeLikelihood.cpp
3 // Created by: Laurent Gueguen
4 //
5 
6 /*
7  Copyright or © or Copr. CNRS, (November 16, 2004)
8 
9  This software is a computer program whose purpose is to provide classes
10  for phylogenetic data analysis.
11 
12  This software is governed by the CeCILL license under French law and
13  abiding by the rules of distribution of free software. You can use,
14  modify and/ or redistribute the software under the terms of the CeCILL
15  license as circulated by CEA, CNRS and INRIA at the following URL
16  "http://www.cecill.info".
17 
18  As a counterpart to the access to the source code and rights to copy,
19  modify and redistribute granted by the license, users are provided only
20  with a limited warranty and the software's author, the holder of the
21  economic rights, and the successive licensors have only limited
22  liability.
23 
24  In this respect, the user's attention is drawn to the risks associated
25  with loading, using, modifying and/or developing or reproducing the
26  software by the user in light of its specific status of free software,
27  that may mean that it is complicated to manipulate, and that also
28  therefore means that it is reserved for developers and experienced
29  professionals having in-depth computer knowledge. Users are therefore
30  encouraged to load and test the software's suitability as regards their
31  requirements in conditions enabling the security of their systems and/or
32  data to be ensured and, more generally, to use and operate it in the
33  same conditions as regards security.
34 
35  The fact that you are presently reading this means that you have had
36  knowledge of the CeCILL license and that you accept its terms.
37  */
38 
40 
41 
42 // From the STL:
43 #include <iostream>
44 
45 #include <math.h>
46 #include "../PatternTools.h"
47 
48 #include <Bpp/Numeric/VectorTools.h>
49 #include <Bpp/App/ApplicationTools.h>
50 
51 using namespace bpp;
52 using namespace std;
53 
55  const Tree& tree,
56  SubstitutionModel* model,
57  DiscreteDistribution* rDist,
58  bool checkRooted,
59  bool verbose,
60  bool rootArray) throw (Exception) :
61  DRHomogeneousTreeLikelihood(tree, model, rDist, checkRooted, verbose),
62  treeLikelihoodsContainer_(),
63  probas_(),
64  rootArray_(rootArray)
65 {
66  MixedSubstitutionModel* mixedmodel;
67 
68  if ((mixedmodel = dynamic_cast<MixedSubstitutionModel*>(model_)) == NULL)
69  throw Exception("Bad model: DRHomogeneousMixedTreeLikelihood needs a MixedSubstitutionModel.");
70 
71  size_t s = mixedmodel->getNumberOfModels();
72  for (size_t i = 0; i < s; i++)
73  {
74  treeLikelihoodsContainer_.push_back(
75  new DRHomogeneousTreeLikelihood(tree, mixedmodel->getNModel(i), rDist, checkRooted, false));
76  probas_.push_back(mixedmodel->getNProbability(i));
77  }
78 }
79 
81  const Tree& tree,
82  const SiteContainer& data,
83  SubstitutionModel* model,
84  DiscreteDistribution* rDist,
85  bool checkRooted,
86  bool verbose,
87  bool rootArray)
88 throw (Exception) :
89  DRHomogeneousTreeLikelihood(tree, model, rDist, checkRooted, verbose),
90  treeLikelihoodsContainer_(),
91  probas_(),
92  rootArray_(rootArray)
93 {
94  MixedSubstitutionModel* mixedmodel;
95 
96  if ((mixedmodel = dynamic_cast<MixedSubstitutionModel*>(model_)) == NULL)
97  throw Exception("Bad model: DRHomogeneousMixedTreeLikelihood needs a MixedSubstitutionModel.");
98 
99  size_t s = mixedmodel->getNumberOfModels();
100 
101  for (size_t i = 0; i < s; i++)
102  {
103  treeLikelihoodsContainer_.push_back(
104  new DRHomogeneousTreeLikelihood(tree, mixedmodel->getNModel(i), rDist, checkRooted, false));
105  probas_.push_back(mixedmodel->getNProbability(i));
106  }
107  setData(data);
108 }
109 
110 
112 {
114 
115  treeLikelihoodsContainer_.clear();
116  probas_.clear();
117 
118  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
119  {
120  treeLikelihoodsContainer_.push_back(lik.treeLikelihoodsContainer_[i]->clone());
121  probas_.push_back(lik.probas_[i]);
122  }
123 
124  rootArray_=lik.rootArray_;
125 
126  return *this;
127 }
128 
131  treeLikelihoodsContainer_(lik.treeLikelihoodsContainer_.size()),
132  probas_(lik.probas_.size()),
133  rootArray_(lik.rootArray_)
134 {
135  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
136  {
137  treeLikelihoodsContainer_.push_back(lik.treeLikelihoodsContainer_[i]->clone());
138  probas_.push_back(lik.probas_[i]);
139  }
140 }
141 
143 {
144  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
145  {
146  delete treeLikelihoodsContainer_[i];
147  }
148 }
149 
150 
152 {
153  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
154  {
155  treeLikelihoodsContainer_[i]->initialize();
156  }
158  if(rootArray_)
160 }
161 
162 void DRHomogeneousMixedTreeLikelihood::setData(const SiteContainer& sites) throw (Exception)
163 {
165 
166  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
167  {
168  treeLikelihoodsContainer_[i]->setData(sites);
169  }
170 }
171 
172 
174 {
175  applyParameters();
176 
177  MixedSubstitutionModel* mixedmodel = dynamic_cast<MixedSubstitutionModel*>(model_);
178 
179  size_t s = mixedmodel->getNumberOfModels();
180 
181  const SubstitutionModel* pm;
182  for (size_t i = 0; i < s; i++)
183  {
184  ParameterList pl;
185  pm = mixedmodel->getNModel(i);
186  pl.addParameters(pm->getParameters());
187  pl.includeParameters(getParameters());
188  treeLikelihoodsContainer_[i]->matchParametersValues(pl);
189  }
190  probas_ = mixedmodel->getProbabilities();
191 
193 }
194 
196 {
197  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
198  {
199  treeLikelihoodsContainer_[i]->resetLikelihoodArrays(node);
200  }
201 }
202 
204 {
205  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
206  {
207  treeLikelihoodsContainer_[i]->computeTreeLikelihood();
208  }
209  if(rootArray_)
211 }
212 
213 /******************************************************************************
214 * Likelihoods *
215 ******************************************************************************/
216 
218 {
219  double l = 1.;
220  vector<Vdouble*> llik;
221  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
222  {
224  }
225 
226  double x;
227  const vector<unsigned int> * w = &likelihoodData_->getWeights();
228  for (unsigned int i = 0; i < nbDistinctSites_; i++)
229  {
230  x = 0;
231  for (unsigned int j = 0; j < treeLikelihoodsContainer_.size(); j++)
232  {
233  x += (*llik[j])[i] * probas_[j];
234  }
235  l *= std::pow(x, (int)(*w)[i]);
236  }
237  return l;
238 }
239 
241 {
242  double ll = 0;
243 
244  vector<Vdouble*> llik;
245  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
246  {
248  }
249 
250  double x;
251  const vector<unsigned int> * w = &likelihoodData_->getWeights();
252  vector<double> la(nbDistinctSites_);
253  for (unsigned int i = 0; i < nbDistinctSites_; i++)
254  {
255  x = 0;
256  for (unsigned int j = 0; j < treeLikelihoodsContainer_.size(); j++)
257  {
258  x += (*llik[j])[i] * probas_[j];
259  }
260  la[i] = (*w)[i] * log(x);
261  }
262  sort(la.begin(), la.end());
263  for (size_t i = nbDistinctSites_; i > 0; i--)
264  {
265  ll += la[i - 1];
266  }
267 
268  return ll;
269 }
270 
271 
273 {
274  double res = 0;
275  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
276  {
277  res += treeLikelihoodsContainer_[i]->getLikelihoodForASite(site) * probas_[i];
278  }
279 
280  return res;
281 }
282 
284 {
285  double x = getLikelihoodForASite(site);
286  if (x < 0) x = 0;
287  return log(x);
288 }
289 
291 {
292  double res = 0;
293  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
294  {
295  res += treeLikelihoodsContainer_[i]->getLikelihoodForASiteForARateClass(site, rateClass) * probas_[i];
296  }
297 
298  return res;
299 }
300 
302 {
303  double x = getLikelihoodForASiteForARateClass(site, rateClass);
304  if (x < 0) x = 0;
305  return log(x);
306 }
307 
308 double DRHomogeneousMixedTreeLikelihood::getLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
309 {
310  double res = 0;
311 
312  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
313  {
314  res += treeLikelihoodsContainer_[i]->getLikelihoodForASiteForARateClassForAState(site, rateClass, state) * probas_[i];
315  }
316 
317  return res;
318 }
319 
320 double DRHomogeneousMixedTreeLikelihood::getLogLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
321 {
322  double x = getLikelihoodForASiteForARateClassForAState(site, rateClass, state);
323  if (x < 0) x = 0;
324  return log(x);
325 }
326 
327 
329 {
330  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
331  {
332  treeLikelihoodsContainer_[i]->computeSubtreeLikelihoodPostfix(node);
333  }
334 }
335 
337 {
338  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
339  {
340  treeLikelihoodsContainer_[i]->computeSubtreeLikelihoodPostfix(node);
341  }
342 }
343 
345 {
346  for (size_t i = 0; i < treeLikelihoodsContainer_.size(); i++)
347  {
348  treeLikelihoodsContainer_[i]->computeRootLikelihood();
349  }
350 }
351 
352 void DRHomogeneousMixedTreeLikelihood::computeLikelihoodAtNode_(const Node* node, VVVdouble& likelihoodArray, const Node* sonNode) const
353 {
354  likelihoodArray.resize(nbDistinctSites_);
355  for (size_t i = 0; i < nbDistinctSites_; i++){
356  VVdouble* likelihoodArray_i = &likelihoodArray[i];
357  likelihoodArray_i->resize(nbClasses_);
358  for (size_t c = 0; c < nbClasses_; c++) {
359  Vdouble* likelihoodArray_i_c = &(*likelihoodArray_i)[c];
360  likelihoodArray_i_c->resize(nbStates_);
361  for (size_t x = 0; x < nbStates_; x++)
362  (*likelihoodArray_i_c)[x] = 0;
363  }
364  }
365 
366  VVVdouble lArray;
367  for (size_t nm = 0; nm < treeLikelihoodsContainer_.size(); nm++)
368  {
369  treeLikelihoodsContainer_[nm]->computeLikelihoodAtNode_(node, lArray, sonNode);
370 
371  for (size_t i = 0; i < nbDistinctSites_; i++)
372  {
373  VVdouble* likelihoodArray_i = &likelihoodArray[i];
374  VVdouble* lArray_i = &lArray[i];
375 
376  for (size_t c = 0; c < nbClasses_; c++)
377  {
378  Vdouble* likelihoodArray_i_c = &(*likelihoodArray_i)[c];
379  Vdouble* lArray_i_c = &(*lArray_i)[c];
380  for (size_t x = 0; x < nbStates_; x++)
381  (*likelihoodArray_i_c)[x] += (*lArray_i_c)[x] * probas_[nm];
382  }
383  }
384 
385  }
386 }
387 
388 /******************************************************************************
389 * First Order Derivatives *
390 ******************************************************************************/
391 
393 {
394  for (size_t i = 0; i < treeLikelihoodsContainer_.size(); i++)
395  {
396  treeLikelihoodsContainer_[i]->computeTreeDLikelihoods();
397  }
398 }
399 
400 double DRHomogeneousMixedTreeLikelihood::getFirstOrderDerivative(const std::string& variable) const
401 throw (Exception)
402 {
403  if (!hasParameter(variable))
404  throw ParameterNotFoundException("DRHomogeneousTreeLikelihood::getFirstOrderDerivative().", variable);
405  if (getRateDistributionParameters().hasParameter(variable))
406  {
407  throw Exception("Derivatives respective to rate distribution parameters are not implemented.");
408  }
409  if (getSubstitutionModelParameters().hasParameter(variable))
410  {
411  throw Exception("Derivatives respective to substitution model parameters are not implemented.");
412  }
413 
414  //
415  // Computation for branch lengths:
416  //
417 
418  // Get the node with the branch whose length must be derivated:
419  unsigned int brI = TextTools::to<unsigned int>(variable.substr(5));
420  const Node* branch = nodes_[brI];
421  vector< Vdouble*> _vdLikelihoods_branch;
422  for (size_t i = 0; i < treeLikelihoodsContainer_.size(); i++)
423  {
424  _vdLikelihoods_branch.push_back(&treeLikelihoodsContainer_[i]->likelihoodData_->getDLikelihoodArray(branch->getId()));
425  }
426 
427  double d = 0;
428  double x;
429  const vector<unsigned int> * w = &likelihoodData_->getWeights();
430  for (unsigned int i = 0; i < nbDistinctSites_; i++)
431  {
432  x = 0;
433  for (unsigned int j = 0; j < treeLikelihoodsContainer_.size(); j++)
434  {
435  x += (*_vdLikelihoods_branch[j])[i] * probas_[j];
436  }
437  d += (*w)[i] * x;
438  }
439 
440  return -d;
441 }
442 
443 
444 /******************************************************************************
445 * Second Order Derivatives *
446 ******************************************************************************/
447 
449 {
450  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
451  {
452  treeLikelihoodsContainer_[i]->computeTreeD2LikelihoodAtNode(node);
453  }
454 }
455 
457 {
458  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
459  {
460  treeLikelihoodsContainer_[i]->computeTreeD2Likelihoods();
461  }
462 }
463 
464 double DRHomogeneousMixedTreeLikelihood::getSecondOrderDerivative(const std::string& variable) const
465 throw (Exception)
466 {
467  if (!hasParameter(variable))
468  throw ParameterNotFoundException("DRHomogeneousTreeLikelihood::getFirstOrderDerivative().", variable);
469  if (getRateDistributionParameters().hasParameter(variable))
470  {
471  throw Exception("Derivatives respective to rate distribution parameters are not implemented.");
472  }
473  if (getSubstitutionModelParameters().hasParameter(variable))
474  {
475  throw Exception("Derivatives respective to substitution model parameters are not implemented.");
476  }
477 
478  //
479  // Computation for branch lengths:
480  //
481 
482  // Get the node with the branch whose length must be derivated:
483  unsigned int brI = TextTools::to<unsigned int>(variable.substr(5));
484  const Node* branch = nodes_[brI];
485  vector< Vdouble*> _vdLikelihoods_branch, _vd2Likelihoods_branch;
486  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
487  {
488  _vdLikelihoods_branch.push_back(&treeLikelihoodsContainer_[i]->likelihoodData_->getDLikelihoodArray(branch->getId()));
489  _vd2Likelihoods_branch.push_back(&treeLikelihoodsContainer_[i]->likelihoodData_->getD2LikelihoodArray(branch->getId()));
490  }
491 
492  double d = 0;
493  double x, x2;
494  const vector<unsigned int> * w = &likelihoodData_->getWeights();
495  for (unsigned int i = 0; i < nbDistinctSites_; i++)
496  {
497  x = 0;
498  x2 = 0;
499  for (unsigned int j = 0; j < treeLikelihoodsContainer_.size(); j++)
500  {
501  x += (*_vdLikelihoods_branch[j])[i] * probas_[j];
502  }
503  for (unsigned int j = 0; j < treeLikelihoodsContainer_.size(); j++)
504  {
505  x2 += (*_vd2Likelihoods_branch[j])[i] * probas_[j];
506  }
507 
508  d += (*w)[i] * (x2 - pow(x, 2));
509  }
510 
511  return -d;
512 }
513 
514 
516 {
517  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
518  {
519  treeLikelihoodsContainer_[i]->displayLikelihood(node);
520  }
521 }
522 
DRHomogeneousMixedTreeLikelihood & operator=(const DRHomogeneousMixedTreeLikelihood &lik)
DRHomogeneousTreeLikelihood & operator=(const DRHomogeneousTreeLikelihood &lik)
double getFirstOrderDerivative(const std::string &variable) const
Interface for all substitution models.
virtual void computeLikelihoodAtNode_(const Node *node, VVVdouble &likelihoodArray, const Node *sonNode=0) const
virtual double getNProbability(size_t i) const =0
Returns the probability of a specific model from the mixture.
double getLikelihoodForASite(size_t site) const
Get the likelihood for a site.
DRASDRTreeLikelihoodData * likelihoodData_
STL namespace.
virtual void computeSubtreeLikelihoodPostfix(const Node *node)
Compute the likelihood for a subtree defined by the Tree::Node node.
std::vector< DRHomogeneousTreeLikelihood * > treeLikelihoodsContainer_
const std::vector< unsigned int > & getWeights() const
Interface for phylogenetic tree objects.
Definition: Tree.h:148
double getLogLikelihoodForASite(size_t site) const
Get the logarithm of the likelihood for a site.
double getLogLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
Get the logarithm of the likelihood for a site knowing its rate class.
double getSecondOrderDerivative(const std::string &variable) const
void setData(const SiteContainer &sites)
Set the dataset for which the likelihood must be evaluated.
DRHomogeneousMixedTreeLikelihood(const Tree &tree, SubstitutionModel *model, DiscreteDistribution *rDist, bool checkRooted=true, bool verbose=true, bool rootArray=false)
Build a new DRHomogeneousMixedTreeLikelihood object without data.
virtual void computeSubtreeLikelihoodPrefix(const Node *node)
double getLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
Get the likelihood for a site knowing its rate class.
This class implements the likelihood computation for a tree using the double-recursive algorithm...
double getLikelihood() const
Get the likelihood for the whole dataset.
virtual const std::vector< double > & getProbabilities() const =0
void fireParameterChanged(const ParameterList &params)
virtual void displayLikelihood(const Node *node)
This method is mainly for debugging purpose.
The phylogenetic node class.
Definition: Node.h:90
void setData(const SiteContainer &sites)
Set the dataset for which the likelihood must be evaluated.
double getLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
Get the likelihood for a site knowing its rate class and its ancestral state.
virtual size_t getNumberOfModels() const =0
virtual void applyParameters()
All parameters are stored in a parameter list. This function apply these parameters to the substituti...
A class to compute the average of several DRHomogeneousTreeLikelihood defined from a Mixed Substituti...
double getLogLikelihood() const
Get the logarithm of the likelihood for the whole dataset.
virtual const SubstitutionModel * getNModel(size_t i) const =0
Returns a specific model from the mixture.
double getLogLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
Get the logarithm of the likelihood for a site knowing its rate class and its ancestral state...
Interface for Substitution models, defined as a mixture of "simple" substitution models.