bpp-phyl  2.2.0
DRHomogeneousTreeLikelihood.cpp
Go to the documentation of this file.
1 //
2 // File: DRHomogeneousTreeLikelihood.cpp
3 // Created by: Julien Dutheil
4 // Created on: Fri Oct 17 18:14:51 2003
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
41 #include "../PatternTools.h"
42 
43 // From SeqLib:
44 #include <Bpp/Seq/SiteTools.h>
45 #include <Bpp/Seq/Container/AlignedSequenceContainer.h>
46 #include <Bpp/Seq/Container/SequenceContainerTools.h>
47 
48 #include <Bpp/Text/TextTools.h>
49 #include <Bpp/App/ApplicationTools.h>
50 
51 using namespace bpp;
52 
53 // From the STL:
54 #include <iostream>
55 
56 using namespace std;
57 
58 /******************************************************************************/
59 
61  const Tree& tree,
62  SubstitutionModel* model,
63  DiscreteDistribution* rDist,
64  bool checkRooted,
65  bool verbose)
66 throw (Exception) :
67  AbstractHomogeneousTreeLikelihood(tree, model, rDist, checkRooted, verbose),
68  likelihoodData_(0),
69  minusLogLik_(-1.)
70 {
71  init_();
72 }
73 
74 /******************************************************************************/
75 
77  const Tree& tree,
78  const SiteContainer& data,
79  SubstitutionModel* model,
80  DiscreteDistribution* rDist,
81  bool checkRooted,
82  bool verbose)
83 throw (Exception) :
84  AbstractHomogeneousTreeLikelihood(tree, model, rDist, checkRooted, verbose),
85  likelihoodData_(0),
86  minusLogLik_(-1.)
87 {
88  init_();
89  setData(data);
90 }
91 
92 /******************************************************************************/
93 
94 void DRHomogeneousTreeLikelihood::init_() throw (Exception)
95 {
96  likelihoodData_ = new DRASDRTreeLikelihoodData(
97  tree_,
98  rateDistribution_->getNumberOfCategories());
99 }
100 
101 /******************************************************************************/
102 
105  likelihoodData_(0),
106  minusLogLik_(-1.)
107 {
111 }
112 
113 /******************************************************************************/
114 
116 {
118  if (likelihoodData_)
119  delete likelihoodData_;
123  return *this;
124 }
125 
126 /******************************************************************************/
127 
129 {
130  delete likelihoodData_;
131 }
132 
133 /******************************************************************************/
134 
135 void DRHomogeneousTreeLikelihood::setData(const SiteContainer& sites) throw (Exception)
136 {
137  if (data_)
138  delete data_;
139  data_ = PatternTools::getSequenceSubset(sites, *tree_->getRootNode());
140  if (verbose_)
141  ApplicationTools::displayTask("Initializing data structure");
142  likelihoodData_->initLikelihoods(*data_, *model_);
143  if (verbose_)
144  ApplicationTools::displayTaskDone();
145 
146  nbSites_ = likelihoodData_->getNumberOfSites();
147  nbDistinctSites_ = likelihoodData_->getNumberOfDistinctSites();
148  nbStates_ = likelihoodData_->getNumberOfStates();
149 
150  if (verbose_)
151  ApplicationTools::displayResult("Number of distinct sites",
152  TextTools::toString(nbDistinctSites_));
153  initialized_ = false;
154 }
155 
156 /******************************************************************************/
157 
159 {
160  double l = 1.;
162  const vector<unsigned int>* w = &likelihoodData_->getWeights();
163  for (size_t i = 0; i < nbDistinctSites_; i++)
164  {
165  l *= std::pow((*lik)[i], (int)(*w)[i]);
166  }
167  return l;
168 }
169 
170 /******************************************************************************/
171 
173 {
174  double ll = 0;
176  const vector<unsigned int>* w = &likelihoodData_->getWeights();
177  vector<double> la(nbDistinctSites_);
178  for (size_t i = 0; i < nbDistinctSites_; i++)
179  {
180  la[i] = (*w)[i] * log((*lik)[i]);
181  }
182  sort(la.begin(), la.end());
183  for (size_t i = nbDistinctSites_; i > 0; i--)
184  {
185  ll += la[i - 1];
186  }
187  return ll;
188 }
189 
190 /******************************************************************************/
191 
193 {
195 }
196 
197 /******************************************************************************/
198 
200 {
202 }
203 
204 /******************************************************************************/
205 double DRHomogeneousTreeLikelihood::getLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
206 {
208 }
209 
210 /******************************************************************************/
211 
213 {
215 }
216 
217 /******************************************************************************/
218 
219 double DRHomogeneousTreeLikelihood::getLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
220 {
221  return likelihoodData_->getRootLikelihoodArray()[likelihoodData_->getRootArrayPosition(site)][rateClass][static_cast<size_t>(state)];
222 }
223 
224 /******************************************************************************/
225 
226 double DRHomogeneousTreeLikelihood::getLogLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
227 {
228  return log(likelihoodData_->getRootLikelihoodArray()[likelihoodData_->getRootArrayPosition(site)][rateClass][static_cast<size_t>(state)]);
229 }
230 
231 /******************************************************************************/
232 
233 void DRHomogeneousTreeLikelihood::setParameters(const ParameterList& parameters)
234 throw (ParameterNotFoundException, ConstraintException)
235 {
236  setParametersValues(parameters);
237 }
238 
239 /******************************************************************************/
240 
241 void DRHomogeneousTreeLikelihood::fireParameterChanged(const ParameterList& params)
242 {
243  applyParameters();
244 
245  if (rateDistribution_->getParameters().getCommonParametersWith(params).size() > 0
246  || model_->getParameters().getCommonParametersWith(params).size() > 0)
247  {
248  // Rate parameter changed, need to recompute all probs:
250  }
251  else if (params.size() > 0)
252  {
253  // We may save some computations:
254  for (size_t i = 0; i < params.size(); i++)
255  {
256  string s = params[i].getName();
257  if (s.substr(0, 5) == "BrLen")
258  {
259  // Branch length parameter:
260  computeTransitionProbabilitiesForNode(nodes_[TextTools::to < size_t > (s.substr(5))]);
261  }
262  }
263  }
264 
267  {
269  }
271  {
273  }
274 
276 }
277 
278 /******************************************************************************/
279 
281 throw (Exception)
282 {
283  if (!isInitialized())
284  throw Exception("DRHomogeneousTreeLikelihood::getValue(). Instance is not initialized.");
285  return minusLogLik_;
286 }
287 
288 /******************************************************************************
289 * First Order Derivatives *
290 ******************************************************************************/
292 {
293  const Node* father = node->getFather();
294  VVVdouble* likelihoods_father_node = &likelihoodData_->getLikelihoodArray(father->getId(), node->getId());
295  Vdouble* dLikelihoods_node = &likelihoodData_->getDLikelihoodArray(node->getId());
296  VVVdouble* dpxy_node = &dpxy_[node->getId()];
297  VVVdouble larray;
298  computeLikelihoodAtNode_(father, larray, node);
299  Vdouble* rootLikelihoodsSR = &likelihoodData_->getRootRateSiteLikelihoodArray();
300 
301  double dLi, dLic, dLicx;
302 
303  for (size_t i = 0; i < nbDistinctSites_; i++)
304  {
305  VVdouble* likelihoods_father_node_i = &(*likelihoods_father_node)[i];
306  VVdouble* larray_i = &larray[i];
307  dLi = 0;
308  for (size_t c = 0; c < nbClasses_; c++)
309  {
310  Vdouble* likelihoods_father_node_i_c = &(*likelihoods_father_node_i)[c];
311  Vdouble* larray_i_c = &(*larray_i)[c];
312  VVdouble* dpxy_node_c = &(*dpxy_node)[c];
313  dLic = 0;
314  for (size_t x = 0; x < nbStates_; x++)
315  {
316  Vdouble* dpxy_node_c_x = &(*dpxy_node_c)[x];
317  dLicx = 0;
318  for (size_t y = 0; y < nbStates_; y++)
319  {
320  dLicx += (*dpxy_node_c_x)[y] * (*likelihoods_father_node_i_c)[y];
321  }
322  dLicx *= (*larray_i_c)[x];
323  dLic += dLicx;
324  }
325  dLi += rateDistribution_->getProbability(c) * dLic;
326  }
327  (*dLikelihoods_node)[i] = dLi / (*rootLikelihoodsSR)[i];
328  // cout << dLi << "\t" << (*rootLikelihoodsSR)[i] << endl;
329  }
330 }
331 
332 /******************************************************************************/
333 
335 {
336  for (size_t k = 0; k < nbNodes_; k++)
337  {
339  }
340 }
341 
342 /******************************************************************************/
343 
344 double DRHomogeneousTreeLikelihood::getFirstOrderDerivative(const std::string& variable) const
345 throw (Exception)
346 {
347  if (!hasParameter(variable))
348  throw ParameterNotFoundException("DRHomogeneousTreeLikelihood::getFirstOrderDerivative().", variable);
349  if (getRateDistributionParameters().hasParameter(variable))
350  {
351  throw Exception("Derivatives respective to rate distribution parameters are not implemented.");
352  }
353  if (getSubstitutionModelParameters().hasParameter(variable))
354  {
355  throw Exception("Derivatives respective to substitution model parameters are not implemented.");
356  }
357 
358  //
359  // Computation for branch lengths:
360  //
361 
362  // Get the node with the branch whose length must be derivated:
363  size_t brI = TextTools::to<size_t>(variable.substr(5));
364  const Node* branch = nodes_[brI];
365  Vdouble* dLikelihoods_branch = &likelihoodData_->getDLikelihoodArray(branch->getId());
366  double d = 0;
367  const vector<unsigned int>* w = &likelihoodData_->getWeights();
368  for (size_t i = 0; i < nbDistinctSites_; i++)
369  {
370  d += (*w)[i] * (*dLikelihoods_branch)[i];
371  }
372  return -d;
373 }
374 
375 /******************************************************************************
376 * Second Order Derivatives *
377 ******************************************************************************/
379 {
380  const Node* father = node->getFather();
381  VVVdouble* likelihoods_father_node = &likelihoodData_->getLikelihoodArray(father->getId(), node->getId());
382  Vdouble* d2Likelihoods_node = &likelihoodData_->getD2LikelihoodArray(node->getId());
383  VVVdouble* d2pxy_node = &d2pxy_[node->getId()];
384  VVVdouble larray;
385  computeLikelihoodAtNode_(father, larray, node);
386  Vdouble* rootLikelihoodsSR = &likelihoodData_->getRootRateSiteLikelihoodArray();
387 
388  double d2Li, d2Lic, d2Licx;
389 
390  for (size_t i = 0; i < nbDistinctSites_; i++)
391  {
392  VVdouble* likelihoods_father_node_i = &(*likelihoods_father_node)[i];
393  VVdouble* larray_i = &larray[i];
394  d2Li = 0;
395  for (size_t c = 0; c < nbClasses_; c++)
396  {
397  Vdouble* likelihoods_father_node_i_c = &(*likelihoods_father_node_i)[c];
398  Vdouble* larray_i_c = &(*larray_i)[c];
399  VVdouble* d2pxy_node_c = &(*d2pxy_node)[c];
400  d2Lic = 0;
401  for (size_t x = 0; x < nbStates_; x++)
402  {
403  Vdouble* d2pxy_node_c_x = &(*d2pxy_node_c)[x];
404  d2Licx = 0;
405  for (size_t y = 0; y < nbStates_; y++)
406  {
407  d2Licx += (*d2pxy_node_c_x)[y] * (*likelihoods_father_node_i_c)[y];
408  }
409  d2Licx *= (*larray_i_c)[x];
410  d2Lic += d2Licx;
411  }
412  d2Li += rateDistribution_->getProbability(c) * d2Lic;
413  }
414  (*d2Likelihoods_node)[i] = d2Li / (*rootLikelihoodsSR)[i];
415  }
416 }
417 
418 /******************************************************************************/
419 
421 {
422  for (size_t k = 0; k < nbNodes_; k++)
423  {
425  }
426 }
427 
428 /******************************************************************************/
429 
430 double DRHomogeneousTreeLikelihood::getSecondOrderDerivative(const std::string& variable) const
431 throw (Exception)
432 {
433  if (!hasParameter(variable))
434  throw ParameterNotFoundException("DRHomogeneousTreeLikelihood::getSecondOrderDerivative().", variable);
435  if (getRateDistributionParameters().hasParameter(variable))
436  {
437  throw Exception("Derivatives respective to rate distribution parameters are not implemented.");
438  }
439  if (getSubstitutionModelParameters().hasParameter(variable))
440  {
441  throw Exception("Derivatives respective to substitution model parameters are not implemented.");
442  }
443 
444  //
445  // Computation for branch lengths:
446  //
447 
448  // Get the node with the branch whose length must be derivated:
449  size_t brI = TextTools::to<size_t>(variable.substr(5));
450  const Node* branch = nodes_[brI];
451  Vdouble* _dLikelihoods_branch = &likelihoodData_->getDLikelihoodArray(branch->getId());
452  Vdouble* _d2Likelihoods_branch = &likelihoodData_->getD2LikelihoodArray(branch->getId());
453  double d2 = 0;
454  const vector<unsigned int>* w = &likelihoodData_->getWeights();
455  for (size_t i = 0; i < nbDistinctSites_; i++)
456  {
457  d2 += (*w)[i] * ((*_d2Likelihoods_branch)[i] - pow((*_dLikelihoods_branch)[i], 2));
458  }
459  return -d2;
460 }
461 
462 /******************************************************************************/
463 
465 {
466  for (size_t n = 0; n < node->getNumberOfSons(); n++)
467  {
468  const Node* subNode = node->getSon(n);
470  }
471  if (node->hasFather())
472  {
473  const Node* father = node->getFather();
475  }
476 }
477 
478 /******************************************************************************/
479 
481 {
482  computeSubtreeLikelihoodPostfix(tree_->getRootNode());
483  computeSubtreeLikelihoodPrefix(tree_->getRootNode());
485 }
486 
487 /******************************************************************************/
488 
490 {
491 // if(node->isLeaf()) return;
492 // cout << node->getId() << "\t" << (node->hasName()?node->getName():"") << endl;
493  if (node->getNumberOfSons() == 0)
494  return;
495 
496  // Set all likelihood arrays to 1 for a start:
497  resetLikelihoodArrays(node);
498 
499  map<int, VVVdouble>* _likelihoods_node = &likelihoodData_->getLikelihoodArrays(node->getId());
500  size_t nbNodes = node->getNumberOfSons();
501  for (size_t l = 0; l < nbNodes; l++)
502  {
503  // For each son node...
504 
505  const Node* son = node->getSon(l);
506  VVVdouble* _likelihoods_node_son = &(*_likelihoods_node)[son->getId()];
507 
508  if (son->isLeaf())
509  {
510  VVdouble* _likelihoods_leaf = &likelihoodData_->getLeafLikelihoods(son->getId());
511  for (size_t i = 0; i < nbDistinctSites_; i++)
512  {
513  // For each site in the sequence,
514  Vdouble* _likelihoods_leaf_i = &(*_likelihoods_leaf)[i];
515  VVdouble* _likelihoods_node_son_i = &(*_likelihoods_node_son)[i];
516  for (size_t c = 0; c < nbClasses_; c++)
517  {
518  // For each rate classe,
519  Vdouble* _likelihoods_node_son_i_c = &(*_likelihoods_node_son_i)[c];
520  for (size_t x = 0; x < nbStates_; x++)
521  {
522  // For each initial state,
523  (*_likelihoods_node_son_i_c)[x] = (*_likelihoods_leaf_i)[x];
524  }
525  }
526  }
527  }
528  else
529  {
530  computeSubtreeLikelihoodPostfix(son); // Recursive method:
531  size_t nbSons = son->getNumberOfSons();
532  map<int, VVVdouble>* _likelihoods_son = &likelihoodData_->getLikelihoodArrays(son->getId());
533 
534  vector<const VVVdouble*> iLik(nbSons);
535  vector<const VVVdouble*> tProb(nbSons);
536  for (size_t n = 0; n < nbSons; n++)
537  {
538  const Node* sonSon = son->getSon(n);
539  tProb[n] = &pxy_[sonSon->getId()];
540  iLik[n] = &(*_likelihoods_son)[sonSon->getId()];
541  }
542  computeLikelihoodFromArrays(iLik, tProb, *_likelihoods_node_son, nbSons, nbDistinctSites_, nbClasses_, nbStates_, false);
543  }
544  }
545 }
546 
547 /******************************************************************************/
548 
550 {
551  if (!node->hasFather())
552  {
553  // 'node' is the root of the tree.
554  // Just call the method on each son node:
555  size_t nbSons = node->getNumberOfSons();
556  for (size_t n = 0; n < nbSons; n++)
557  {
559  }
560  return;
561  }
562  else
563  {
564  const Node* father = node->getFather();
565  map<int, VVVdouble>* _likelihoods_node = &likelihoodData_->getLikelihoodArrays(node->getId());
566  map<int, VVVdouble>* _likelihoods_father = &likelihoodData_->getLikelihoodArrays(father->getId());
567  VVVdouble* _likelihoods_node_father = &(*_likelihoods_node)[father->getId()];
568  if (node->isLeaf())
569  {
570  resetLikelihoodArray(*_likelihoods_node_father);
571  }
572 
573  if (father->isLeaf())
574  {
575  // If the tree is rooted by a leaf
576  VVdouble* _likelihoods_leaf = &likelihoodData_->getLeafLikelihoods(father->getId());
577  for (size_t i = 0; i < nbDistinctSites_; i++)
578  {
579  // For each site in the sequence,
580  Vdouble* _likelihoods_leaf_i = &(*_likelihoods_leaf)[i];
581  VVdouble* _likelihoods_node_father_i = &(*_likelihoods_node_father)[i];
582  for (size_t c = 0; c < nbClasses_; c++)
583  {
584  // For each rate classe,
585  Vdouble* _likelihoods_node_father_i_c = &(*_likelihoods_node_father_i)[c];
586  for (size_t x = 0; x < nbStates_; x++)
587  {
588  // For each initial state,
589  (*_likelihoods_node_father_i_c)[x] = (*_likelihoods_leaf_i)[x];
590  }
591  }
592  }
593  }
594  else
595  {
596  vector<const Node*> nodes;
597  // Add brothers:
598  size_t nbFatherSons = father->getNumberOfSons();
599  for (size_t n = 0; n < nbFatherSons; n++)
600  {
601  const Node* son = father->getSon(n);
602  if (son->getId() != node->getId())
603  nodes.push_back(son); // This is a real brother, not current node!
604  }
605  // Now the real stuff... We've got to compute the likelihoods for the
606  // subtree defined by node 'father'.
607  // This is the same as postfix method, but with different subnodes.
608 
609  size_t nbSons = nodes.size(); // In case of a bifurcating tree, this is equal to 1, excepted for the root.
610 
611  vector<const VVVdouble*> iLik(nbSons);
612  vector<const VVVdouble*> tProb(nbSons);
613  for (size_t n = 0; n < nbSons; n++)
614  {
615  const Node* fatherSon = nodes[n];
616  tProb[n] = &pxy_[fatherSon->getId()];
617  iLik[n] = &(*_likelihoods_father)[fatherSon->getId()];
618  }
619 
620  if (father->hasFather())
621  {
622  const Node* fatherFather = father->getFather();
623  computeLikelihoodFromArrays(iLik, tProb, &(*_likelihoods_father)[fatherFather->getId()], &pxy_[father->getId()], *_likelihoods_node_father, nbSons, nbDistinctSites_, nbClasses_, nbStates_, false);
624  }
625  else
626  {
627  computeLikelihoodFromArrays(iLik, tProb, *_likelihoods_node_father, nbSons, nbDistinctSites_, nbClasses_, nbStates_, false);
628  }
629  }
630 
631  if (!father->hasFather())
632  {
633  // We have to account for the root frequencies:
634  for (size_t i = 0; i < nbDistinctSites_; i++)
635  {
636  VVdouble* _likelihoods_node_father_i = &(*_likelihoods_node_father)[i];
637  for (size_t c = 0; c < nbClasses_; c++)
638  {
639  Vdouble* _likelihoods_node_father_i_c = &(*_likelihoods_node_father_i)[c];
640  for (size_t x = 0; x < nbStates_; x++)
641  {
642  (*_likelihoods_node_father_i_c)[x] *= rootFreqs_[x];
643  }
644  }
645  }
646  }
647 
648  // Call the method on each son node:
649  size_t nbNodeSons = node->getNumberOfSons();
650  for (size_t i = 0; i < nbNodeSons; i++)
651  {
652  computeSubtreeLikelihoodPrefix(node->getSon(i)); // Recursive method.
653  }
654  }
655 }
656 
657 /******************************************************************************/
658 
660 {
661  const Node* root = tree_->getRootNode();
662  VVVdouble* rootLikelihoods = &likelihoodData_->getRootLikelihoodArray();
663  // Set all likelihoods to 1 for a start:
664  if (root->isLeaf())
665  {
666  VVdouble* leavesLikelihoods_root = &likelihoodData_->getLeafLikelihoods(root->getId());
667  for (size_t i = 0; i < nbDistinctSites_; i++)
668  {
669  VVdouble* rootLikelihoods_i = &(*rootLikelihoods)[i];
670  Vdouble* leavesLikelihoods_root_i = &(*leavesLikelihoods_root)[i];
671  for (size_t c = 0; c < nbClasses_; c++)
672  {
673  Vdouble* rootLikelihoods_i_c = &(*rootLikelihoods_i)[c];
674  for (size_t x = 0; x < nbStates_; x++)
675  {
676  (*rootLikelihoods_i_c)[x] = (*leavesLikelihoods_root_i)[x];
677  }
678  }
679  }
680  }
681  else
682  {
683  resetLikelihoodArray(*rootLikelihoods);
684  }
685 
686  map<int, VVVdouble>* likelihoods_root = &likelihoodData_->getLikelihoodArrays(root->getId());
687  size_t nbNodes = root->getNumberOfSons();
688  vector<const VVVdouble*> iLik(nbNodes);
689  vector<const VVVdouble*> tProb(nbNodes);
690  for (size_t n = 0; n < nbNodes; n++)
691  {
692  const Node* son = root->getSon(n);
693  tProb[n] = &pxy_[son->getId()];
694  iLik[n] = &(*likelihoods_root)[son->getId()];
695  }
696  computeLikelihoodFromArrays(iLik, tProb, *rootLikelihoods, nbNodes, nbDistinctSites_, nbClasses_, nbStates_, false);
697 
698  Vdouble p = rateDistribution_->getProbabilities();
699  VVdouble* rootLikelihoodsS = &likelihoodData_->getRootSiteLikelihoodArray();
700  Vdouble* rootLikelihoodsSR = &likelihoodData_->getRootRateSiteLikelihoodArray();
701  for (size_t i = 0; i < nbDistinctSites_; i++)
702  {
703  // For each site in the sequence,
704  VVdouble* rootLikelihoods_i = &(*rootLikelihoods)[i];
705  Vdouble* rootLikelihoodsS_i = &(*rootLikelihoodsS)[i];
706  (*rootLikelihoodsSR)[i] = 0;
707  for (size_t c = 0; c < nbClasses_; c++)
708  {
709  // For each rate classe,
710  Vdouble* rootLikelihoods_i_c = &(*rootLikelihoods_i)[c];
711  double* rootLikelihoodsS_i_c = &(*rootLikelihoodsS_i)[c];
712  (*rootLikelihoodsS_i_c) = 0;
713  for (size_t x = 0; x < nbStates_; x++)
714  {
715  // For each initial state,
716  (*rootLikelihoodsS_i_c) += rootFreqs_[x] * (*rootLikelihoods_i_c)[x];
717  }
718  (*rootLikelihoodsSR)[i] += p[c] * (*rootLikelihoodsS_i_c);
719  }
720 
721  // Final checking (for numerical errors):
722  if ((*rootLikelihoodsSR)[i] < 0)
723  (*rootLikelihoodsSR)[i] = 0.;
724  }
725 }
726 
727 /******************************************************************************/
728 
729 void DRHomogeneousTreeLikelihood::computeLikelihoodAtNode_(const Node* node, VVVdouble& likelihoodArray, const Node* sonNode) const
730 {
731  // const Node * node = tree_->getNode(nodeId);
732  int nodeId = node->getId();
733  likelihoodArray.resize(nbDistinctSites_);
734  map<int, VVVdouble>* likelihoods_node = &likelihoodData_->getLikelihoodArrays(nodeId);
735 
736  // Initialize likelihood array:
737  if (node->isLeaf())
738  {
739  VVdouble* leavesLikelihoods_node = &likelihoodData_->getLeafLikelihoods(nodeId);
740  for (size_t i = 0; i < nbDistinctSites_; i++)
741  {
742  VVdouble* likelihoodArray_i = &likelihoodArray[i];
743  Vdouble* leavesLikelihoods_node_i = &(*leavesLikelihoods_node)[i];
744  likelihoodArray_i->resize(nbClasses_);
745  for (size_t c = 0; c < nbClasses_; c++)
746  {
747  Vdouble* likelihoodArray_i_c = &(*likelihoodArray_i)[c];
748  likelihoodArray_i_c->resize(nbStates_);
749  for (size_t x = 0; x < nbStates_; x++)
750  {
751  (*likelihoodArray_i_c)[x] = (*leavesLikelihoods_node_i)[x];
752  }
753  }
754  }
755  }
756  else
757  {
758  // Otherwise:
759  // Set all likelihoods to 1 for a start:
760  for (size_t i = 0; i < nbDistinctSites_; i++)
761  {
762  VVdouble* likelihoodArray_i = &likelihoodArray[i];
763  likelihoodArray_i->resize(nbClasses_);
764  for (size_t c = 0; c < nbClasses_; c++)
765  {
766  Vdouble* likelihoodArray_i_c = &(*likelihoodArray_i)[c];
767  likelihoodArray_i_c->resize(nbStates_);
768  for (size_t x = 0; x < nbStates_; x++)
769  {
770  (*likelihoodArray_i_c)[x] = 1.;
771  }
772  }
773  }
774  }
775 
776  size_t nbNodes = node->getNumberOfSons();
777 
778  vector<const VVVdouble*> iLik;
779  vector<const VVVdouble*> tProb;
780  bool test = false;
781  for (size_t n = 0; n < nbNodes; n++)
782  {
783  const Node* son = node->getSon(n);
784  if (son != sonNode) {
785  tProb.push_back(&pxy_[son->getId()]);
786  iLik.push_back(&(*likelihoods_node)[son->getId()]);
787  } else {
788  test = true;
789  }
790  }
791  if (sonNode) {
792  if (test)
793  nbNodes--;
794  else
795  throw Exception("DRHomogeneousTreeLikelihood::computeLikelihoodAtNode_(...). 'sonNode' not found as a son of 'node'.");
796  }
797 
798  if (node->hasFather())
799  {
800  const Node* father = node->getFather();
801  computeLikelihoodFromArrays(iLik, tProb, &(*likelihoods_node)[father->getId()], &pxy_[nodeId], likelihoodArray, nbNodes, nbDistinctSites_, nbClasses_, nbStates_, false);
802  }
803  else
804  {
805  computeLikelihoodFromArrays(iLik, tProb, likelihoodArray, nbNodes, nbDistinctSites_, nbClasses_, nbStates_, false);
806 
807  // We have to account for the equilibrium frequencies:
808  for (size_t i = 0; i < nbDistinctSites_; i++)
809  {
810  VVdouble* likelihoodArray_i = &likelihoodArray[i];
811  for (size_t c = 0; c < nbClasses_; c++)
812  {
813  Vdouble* likelihoodArray_i_c = &(*likelihoodArray_i)[c];
814  for (size_t x = 0; x < nbStates_; x++)
815  {
816  (*likelihoodArray_i_c)[x] *= rootFreqs_[x];
817  }
818  }
819  }
820  }
821 }
822 
823 /******************************************************************************/
824 
826  const vector<const VVVdouble*>& iLik,
827  const vector<const VVVdouble*>& tProb,
828  VVVdouble& oLik,
829  size_t nbNodes,
830  size_t nbDistinctSites,
831  size_t nbClasses,
832  size_t nbStates,
833  bool reset)
834 {
835  if (reset)
836  resetLikelihoodArray(oLik);
837 
838  for (size_t n = 0; n < nbNodes; n++)
839  {
840  const VVVdouble* pxy_n = tProb[n];
841  const VVVdouble* iLik_n = iLik[n];
842 
843  for (size_t i = 0; i < nbDistinctSites; i++)
844  {
845  // For each site in the sequence,
846  const VVdouble* iLik_n_i = &(*iLik_n)[i];
847  VVdouble* oLik_i = &(oLik)[i];
848 
849  for (size_t c = 0; c < nbClasses; c++)
850  {
851  // For each rate classe,
852  const Vdouble* iLik_n_i_c = &(*iLik_n_i)[c];
853  Vdouble* oLik_i_c = &(*oLik_i)[c];
854  const VVdouble* pxy_n_c = &(*pxy_n)[c];
855  for (size_t x = 0; x < nbStates; x++)
856  {
857  // For each initial state,
858  const Vdouble* pxy_n_c_x = &(*pxy_n_c)[x];
859  double likelihood = 0;
860  for (size_t y = 0; y < nbStates; y++)
861  {
862  likelihood += (*pxy_n_c_x)[y] * (*iLik_n_i_c)[y];
863  }
864  // We store this conditionnal likelihood into the corresponding array:
865  (*oLik_i_c)[x] *= likelihood;
866  }
867  }
868  }
869  }
870 }
871 
872 /******************************************************************************/
873 
875  const vector<const VVVdouble*>& iLik,
876  const vector<const VVVdouble*>& tProb,
877  const VVVdouble* iLikR,
878  const VVVdouble* tProbR,
879  VVVdouble& oLik,
880  size_t nbNodes,
881  size_t nbDistinctSites,
882  size_t nbClasses,
883  size_t nbStates,
884  bool reset)
885 {
886  if (reset)
887  resetLikelihoodArray(oLik);
888 
889  for (size_t n = 0; n < nbNodes; n++)
890  {
891  const VVVdouble* pxy_n = tProb[n];
892  const VVVdouble* iLik_n = iLik[n];
893 
894  for (size_t i = 0; i < nbDistinctSites; i++)
895  {
896  // For each site in the sequence,
897  const VVdouble* iLik_n_i = &(*iLik_n)[i];
898  VVdouble* oLik_i = &(oLik)[i];
899 
900  for (size_t c = 0; c < nbClasses; c++)
901  {
902  // For each rate classe,
903  const Vdouble* iLik_n_i_c = &(*iLik_n_i)[c];
904  Vdouble* oLik_i_c = &(*oLik_i)[c];
905  const VVdouble* pxy_n_c = &(*pxy_n)[c];
906  for (size_t x = 0; x < nbStates; x++)
907  {
908  // For each initial state,
909  const Vdouble* pxy_n_c_x = &(*pxy_n_c)[x];
910  double likelihood = 0;
911  for (size_t y = 0; y < nbStates; y++)
912  {
913  // cout << "1:" << (* pxy_n_c_x)[y] << endl;
914  // cout << "2:" << (* iLik_n_i_c)[y] << endl;
915  likelihood += (*pxy_n_c_x)[y] * (*iLik_n_i_c)[y];
916  // cout << i << "\t" << c << "\t" << x << "\t" << y << "\t" << (* pxy__son_c_x)[y] << "\t" << (* likelihoods_root_son_i_c)[y] << endl;
917  }
918  // We store this conditionnal likelihood into the corresponding array:
919  (*oLik_i_c)[x] *= likelihood;
920  }
921  }
922  }
923  }
924 
925  // Now deal with the subtree containing the root:
926  for (size_t i = 0; i < nbDistinctSites; i++)
927  {
928  // For each site in the sequence,
929  const VVdouble* iLikR_i = &(*iLikR)[i];
930  VVdouble* oLik_i = &(oLik)[i];
931 
932  for (size_t c = 0; c < nbClasses; c++)
933  {
934  // For each rate classe,
935  const Vdouble* iLikR_i_c = &(*iLikR_i)[c];
936  Vdouble* oLik_i_c = &(*oLik_i)[c];
937  const VVdouble* pxyR_c = &(*tProbR)[c];
938  for (size_t x = 0; x < nbStates; x++)
939  {
940  double likelihood = 0;
941  for (size_t y = 0; y < nbStates; y++)
942  {
943  // For each final state,
944  const Vdouble* pxyR_c_y = &(*pxyR_c)[y];
945  likelihood += (*pxyR_c_y)[x] * (*iLikR_i_c)[y];
946  }
947  // We store this conditionnal likelihood into the corresponding array:
948  (*oLik_i_c)[x] *= likelihood;
949  }
950  }
951  }
952 }
953 
954 /******************************************************************************/
955 
957 {
958  cout << "Likelihoods at node " << node->getId() << ": " << endl;
959  for (size_t n = 0; n < node->getNumberOfSons(); n++)
960  {
961  const Node* subNode = node->getSon(n);
962  cout << "Array for sub-node " << subNode->getId() << endl;
964  }
965  if (node->hasFather())
966  {
967  const Node* father = node->getFather();
968  cout << "Array for father node " << father->getId() << endl;
970  }
971  cout << " ***" << endl;
972 }
973 
974 /*******************************************************************************/
975 
double getFirstOrderDerivative(const std::string &variable) const
DRHomogeneousTreeLikelihood & operator=(const DRHomogeneousTreeLikelihood &lik)
double getLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
Get the likelihood for a site knowing its rate class and its ancestral state.
Interface for all substitution models.
virtual void computeAllTransitionProbabilities()
Fill the pxy_, dpxy_ and d2pxy_ arrays for all nodes.
double getLikelihood() const
Get the likelihood for the whole dataset.
void setParameters(const ParameterList &parameters)
Implements the Function interface.
DRHomogeneousTreeLikelihood(const Tree &tree, SubstitutionModel *model, DiscreteDistribution *rDist, bool checkRooted=true, bool verbose=true)
Build a new DRHomogeneousTreeLikelihood object without data.
AbstractHomogeneousTreeLikelihood & operator=(const AbstractHomogeneousTreeLikelihood &lik)
Assignation operator.
virtual void computeTreeD2LikelihoodAtNode(const Node *node)
virtual void computeSubtreeLikelihoodPostfix(const Node *node)
DRASDRTreeLikelihoodData * likelihoodData_
virtual const Node * getSon(size_t pos) const
Definition: Node.h:395
STL namespace.
virtual void resetLikelihoodArrays(const Node *node)
const std::map< int, VVVdouble > & getLikelihoodArrays(int nodeId) const
const std::vector< unsigned int > & getWeights() const
virtual const Node * getFather() const
Get the father of this node is there is one.
Definition: Node.h:339
Vdouble & getD2LikelihoodArray(int nodeId)
Interface for phylogenetic tree objects.
Definition: Tree.h:148
static void displayLikelihoodArray(const VVVdouble &likelihoodArray)
Print the likelihood array to terminal (debugging tool).
double getLogLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
Get the logarithm of the likelihood for a site knowing its rate class and its ancestral state...
void init_()
Method called by constructors.
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:379
virtual bool isLeaf() const
Definition: Node.h:692
void setData(const SiteContainer &sites)
Set the dataset for which the likelihood must be evaluated.
double getValue() const
Function and NNISearchable interface.
double getLogLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
Get the logarithm of the likelihood for a site knowing its rate class.
VVdouble & getLeafLikelihoods(int nodeId)
virtual int getId() const
Get the node&#39;s id.
Definition: Node.h:203
double getLikelihoodForASite(size_t site) const
Get the likelihood for a site.
static SiteContainer * getSequenceSubset(const SiteContainer &sequenceSet, const Node &node)
Extract the sequences corresponding to a given subtree.
virtual void fireParameterChanged(const ParameterList &params)
This class implements the likelihood computation for a tree using the double-recursive algorithm...
static void resetLikelihoodArray(VVVdouble &likelihoodArray)
Set all conditional likelihoods to 1.
double getSecondOrderDerivative(const std::string &variable) const
VVVdouble & getLikelihoodArray(int parentId, int neighborId)
The phylogenetic node class.
Definition: Node.h:90
virtual void displayLikelihood(const Node *node)
This method is mainly for debugging purpose.
double getLogLikelihoodForASite(size_t site) const
Get the logarithm of the likelihood for a site.
virtual void computeTransitionProbabilitiesForNode(const Node *node)
Fill the pxy_, dpxy_ and d2pxy_ arrays for one node.
std::vector< Node * > nodes_
Pointer toward all nodes in the tree.
double getLogLikelihood() const
Get the logarithm of the likelihood for the whole dataset.
virtual size_t getNumberOfSons() const
Definition: Node.h:388
static void computeLikelihoodFromArrays(const std::vector< const VVVdouble *> &iLik, const std::vector< const VVVdouble *> &tProb, VVVdouble &oLik, size_t nbNodes, size_t nbDistinctSites, size_t nbClasses, size_t nbStates, bool reset=true)
Compute conditional likelihoods.
virtual void computeSubtreeLikelihoodPrefix(const Node *node)
Likelihood data structure for rate across sites models, using a double-recursive algorithm.
DRASDRTreeLikelihoodData * clone() const
virtual void applyParameters()
All parameters are stored in a parameter list. This function apply these parameters to the substituti...
virtual void computeLikelihoodAtNode_(const Node *node, VVVdouble &likelihoodArray, const Node *sonNode=0) const
Partial implementation for homogeneous model of the TreeLikelihood interface.
void setTree(const TreeTemplate< Node > *tree)
Set the tree associated to the data.
size_t getRootArrayPosition(const size_t site) const
virtual void computeTreeDLikelihoodAtNode(const Node *node)
double getLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
Get the likelihood for a site knowing its rate class.