bpp-phyl  2.2.0
AbstractNonHomogeneousTreeLikelihood.cpp
Go to the documentation of this file.
1 //
2 // File: AbstractNonHomogeneousTreeLikelihood.cpp
3 // Created by: Julien Dutheil
4 // Created on: Tue Oct 09 16:07 2007
5 // From file: AbstractHomogeneousTreeLikelihood.cpp
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for phylogenetic data analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  encouraged to load and test the software's suitability as regards their
31  requirements in conditions enabling the security of their systems and/or
32  data to be ensured and, more generally, to use and operate it in the
33  same conditions as regards security.
34 
35  The fact that you are presently reading this means that you have had
36  knowledge of the CeCILL license and that you accept its terms.
37 */
38 
40 #include "../PatternTools.h"
41 
42 //From SeqLib:
43 #include <Bpp/Seq/SiteTools.h>
44 #include <Bpp/Seq/Container/SequenceContainerTools.h>
45 
46 #include <Bpp/Text/TextTools.h>
47 #include <Bpp/App/ApplicationTools.h>
48 
49 using namespace bpp;
50 
51 // From the STL:
52 #include <iostream>
53 
54 using namespace std;
55 
56 /******************************************************************************/
57 
59  const Tree& tree,
60  SubstitutionModelSet* modelSet,
61  DiscreteDistribution* rDist,
62  bool verbose,
63  bool reparametrizeRoot)
64  throw (Exception) :
66  modelSet_(0),
67  brLenParameters_(),
68  pxy_(),
69  dpxy_(),
70  d2pxy_(),
71  rootFreqs_(),
72  nodes_(),
73  idToNode_(),
74  nbSites_(),
75  nbDistinctSites_(),
76  nbClasses_(),
77  nbStates_(),
78  nbNodes_(),
79  verbose_(),
80  minimumBrLen_(),
81  maximumBrLen_(),
82  brLenConstraint_(0),
83  reparametrizeRoot_(reparametrizeRoot),
84  root1_(),
85  root2_()
86 {
87  init_(tree, modelSet, rDist, verbose);
88 }
89 
90 /******************************************************************************/
91 
95  modelSet_(lik.modelSet_),
96  brLenParameters_(lik.brLenParameters_),
97  pxy_(lik.pxy_),
98  dpxy_(lik.dpxy_),
99  d2pxy_(lik.d2pxy_),
100  rootFreqs_(lik.rootFreqs_),
101  nodes_(),
102  idToNode_(),
103  nbSites_(lik.nbSites_),
104  nbDistinctSites_(lik.nbDistinctSites_),
105  nbClasses_(lik.nbClasses_),
106  nbStates_(lik.nbStates_),
107  nbNodes_(lik.nbNodes_),
108  verbose_(lik.verbose_),
109  minimumBrLen_(lik.minimumBrLen_),
110  maximumBrLen_(lik.maximumBrLen_),
111  brLenConstraint_(dynamic_cast<Constraint*>(lik.brLenConstraint_->clone())),
112  reparametrizeRoot_(lik.reparametrizeRoot_),
113  root1_(lik.root1_),
114  root2_(lik.root2_)
115 {
116  nodes_ = tree_->getNodes();
117  nodes_.pop_back(); //Remove the root node (the last added!).
118  //Rebuild nodes index:
119  for (unsigned int i = 0; i < nodes_.size(); i++)
120  {
121  const Node* node = nodes_[i];
122  idToNode_[node->getId()] = node;
123  }
124 }
125 
126 /******************************************************************************/
127 
130 {
132  modelSet_ = lik.modelSet_;
134  pxy_ = lik.pxy_;
135  dpxy_ = lik.dpxy_;
136  d2pxy_ = lik.d2pxy_;
137  rootFreqs_ = lik.rootFreqs_;
138  nodes_ = tree_->getNodes();
139  nodes_.pop_back(); //Remove the root node (the last added!).
140  nbSites_ = lik.nbSites_;
142  nbClasses_ = lik.nbClasses_;
143  nbStates_ = lik.nbStates_;
144  nbNodes_ = lik.nbNodes_;
145  verbose_ = lik.verbose_;
148  if (brLenConstraint_.get()) brLenConstraint_.release();
149  brLenConstraint_.reset(lik.brLenConstraint_->clone());
151  root1_ = lik.root1_;
152  root2_ = lik.root2_;
153  //Rebuild nodes index:
154  for( unsigned int i = 0; i < nodes_.size(); i++)
155  {
156  const Node * node = nodes_[i];
157  idToNode_[node->getId()] = node;
158  }
159  return *this;
160 }
161 
162 /******************************************************************************/
163 
165  const Tree& tree,
166  SubstitutionModelSet* modelSet,
167  DiscreteDistribution* rDist,
168  bool verbose) throw (Exception)
169 {
170  TreeTools::checkIds(tree, true);
171  tree_ = new TreeTemplate<Node>(tree);
172  root1_ = tree_->getRootNode()->getSon(0)->getId();
173  root2_ = tree_->getRootNode()->getSon(1)->getId();
174  nodes_ = tree_->getNodes();
175  nodes_.pop_back(); //Remove the root node (the last added!).
176  nbNodes_ = nodes_.size();
177  //Build nodes index:
178  for (unsigned int i = 0; i < nodes_.size(); i++)
179  {
180  const Node * node = nodes_[i];
181  idToNode_[node->getId()] = node;
182  }
183  nbClasses_ = rateDistribution_->getNumberOfCategories();
184 
185  verbose_ = verbose;
186 
187  minimumBrLen_ = 0.000001;
188  maximumBrLen_ = 10000;
189  brLenConstraint_.reset(new IntervalConstraint(minimumBrLen_, maximumBrLen_, true, true));
190  setSubstitutionModelSet(modelSet);
191 }
192 
193 /******************************************************************************/
194 
196 {
197  //Check:
198  if (data_)
199  {
200  if (modelSet->getAlphabet()->getAlphabetType() != data_->getAlphabet()->getAlphabetType())
201  throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::setSubstitutionModelSet(). Model alphabet do not match existing data.");
202  }
203 
204  modelSet_ = modelSet;
205 
206  if (data_)
207  {
208  if (modelSet->getNumberOfStates() != modelSet_->getNumberOfStates())
209  setData(*data_); //Have to reinitialize the whole data structure.
210  }
211 
212  nbStates_ = modelSet->getNumberOfStates();
213 
214  //Allocate transition probabilities arrays:
215  for (unsigned int l = 0; l < nbNodes_; l++)
216  {
217  //For each son node,i
218  Node* son = nodes_[l];
219 
220  VVVdouble* pxy__son = & pxy_[son->getId()];
221  pxy__son->resize(nbClasses_);
222  for (unsigned int c = 0; c < nbClasses_; c++)
223  {
224  VVdouble * pxy__son_c = & (* pxy__son)[c];
225  pxy__son_c->resize(nbStates_);
226  for(unsigned int x = 0; x < nbStates_; x++)
227  {
228  (*pxy__son_c)[x].resize(nbStates_);
229  }
230  }
231 
232  VVVdouble* dpxy__son = & dpxy_[son->getId()];
233  dpxy__son->resize(nbClasses_);
234  for (unsigned int c = 0; c < nbClasses_; c++)
235  {
236  VVdouble * dpxy__son_c = & (* dpxy__son)[c];
237  dpxy__son_c->resize(nbStates_);
238  for(unsigned int x = 0; x < nbStates_; x++)
239  {
240  (* dpxy__son_c)[x].resize(nbStates_);
241  }
242  }
243 
244  VVVdouble* d2pxy__son = & d2pxy_[son->getId()];
245  d2pxy__son->resize(nbClasses_);
246  for (unsigned int c = 0; c < nbClasses_; c++)
247  {
248  VVdouble * d2pxy__son_c = & (* d2pxy__son)[c];
249  d2pxy__son_c->resize(nbStates_);
250  for(unsigned int x = 0; x < nbStates_; x++)
251  {
252  (* d2pxy__son_c)[x].resize(nbStates_);
253  }
254  }
255  }
256 
257  //We have to reset parameters. If the instance is not initialized, this will be done by the initialize method.
258  if (initialized_)
259  {
260  initParameters();
261  computeAllTransitionProbabilities();
262  fireParameterChanged(getParameters());
263  }
264 }
265 
266 /******************************************************************************/
267 
269 {
270  if (initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::initialize(). Object is already initialized.");
271  if (!data_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::initialize(). Data are no set.");
272  initParameters();
273  initialized_ = true;
275  fireParameterChanged(getParameters());
276 }
277 
278 /******************************************************************************/
279 
281 {
282  if (!initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::getBranchLengthsParameters(). Object is not initialized.");
283  return brLenParameters_.getCommonParametersWith(getParameters());
284 }
285 
286 /******************************************************************************/
287 
289 {
290  if(!initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::getSubstitutionModelParameters(). Object is not initialized.");
291  return modelSet_->getParameters().getCommonParametersWith(getParameters());
292 }
293 
294 /******************************************************************************/
295 
297 {
298  // Reset parameters:
299  resetParameters_();
300 
301  // Branch lengths:
303  addParameters_(brLenParameters_);
304 
305  // Substitution model:
306  addParameters_(modelSet_->getIndependentParameters());
307 
308  // Rate distribution:
309  addParameters_(rateDistribution_->getIndependentParameters());
310 }
311 
312 /******************************************************************************/
313 
315 {
316  if (!initialized_) throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::applyParameters(). Object not initialized.");
317  //Apply branch lengths:
318  for (unsigned int i = 0; i < nbNodes_; i++)
319  {
320  int id = nodes_[i]->getId();
321  if (reparametrizeRoot_ && id == root1_)
322  {
323  const Parameter* rootBrLen = &getParameter("BrLenRoot");
324  const Parameter* rootPos = &getParameter("RootPosition");
325  nodes_[i]->setDistanceToFather(rootBrLen->getValue() * rootPos->getValue());
326  }
327  else if (reparametrizeRoot_ && id == root2_)
328  {
329  const Parameter* rootBrLen = &getParameter("BrLenRoot");
330  const Parameter* rootPos = &getParameter("RootPosition");
331  nodes_[i]->setDistanceToFather(rootBrLen->getValue() * (1. - rootPos->getValue()));
332  }
333  else
334  {
335  const Parameter* brLen = &getParameter(string("BrLen") + TextTools::toString(i));
336  if (brLen) nodes_[i]->setDistanceToFather(brLen->getValue());
337  }
338  }
339  //Apply substitution model parameters:
340 
341  modelSet_->matchParametersValues(getParameters());
342  //Apply rate distribution parameters:
343  rateDistribution_->matchParametersValues(getParameters());
344 }
345 
346 /******************************************************************************/
347 
349 {
350  brLenParameters_.reset();
351  double l1 = 0, l2 = 0;
352  for (unsigned int i = 0; i < nbNodes_; i++)
353  {
354  double d = minimumBrLen_;
355  if (!nodes_[i]->hasDistanceToFather())
356  {
357  ApplicationTools::displayWarning("Missing branch length " + TextTools::toString(i) + ". Value is set to " + TextTools::toString(minimumBrLen_));
358  nodes_[i]->setDistanceToFather(minimumBrLen_);
359  }
360  else
361  {
362  d = nodes_[i]->getDistanceToFather();
363  if (d < minimumBrLen_)
364  {
365  ApplicationTools::displayWarning("Branch length " + TextTools::toString(i) + " is too small: " + TextTools::toString(d) + ". Value is set to " + TextTools::toString(minimumBrLen_));
366  nodes_[i]->setDistanceToFather(minimumBrLen_);
367  d = minimumBrLen_;
368  }
369  if (d > maximumBrLen_)
370  {
371  ApplicationTools::displayWarning("Branch length " + TextTools::toString(i) + " is too big: " + TextTools::toString(d) + ". Value is set to " + TextTools::toString(maximumBrLen_));
372  nodes_[i]->setDistanceToFather(maximumBrLen_);
373  d = maximumBrLen_;
374  }
375  }
376  if (reparametrizeRoot_ && nodes_[i]->getId() == root1_)
377  l1 = d;
378  else if (reparametrizeRoot_ && nodes_[i]->getId() == root2_)
379  l2 = d;
380  else
381  {
382  brLenParameters_.addParameter(Parameter("BrLen" + TextTools::toString(i), d, brLenConstraint_->clone(), true)); //Attach constraint to avoid clonage problems!
383  }
384  }
385  if (reparametrizeRoot_) {
386  brLenParameters_.addParameter(Parameter("BrLenRoot", l1 + l2, brLenConstraint_->clone(), true));
387  brLenParameters_.addParameter(Parameter("RootPosition", l1 / (l1 + l2), &Parameter::PROP_CONSTRAINT_EX));
388  }
389 }
390 
391 /*******************************************************************************/
392 
394 {
395  for(unsigned int l = 0; l < nbNodes_; l++)
396  {
397  //For each node,
398  Node * node = nodes_[l];
400  }
402 }
403 
404 /*******************************************************************************/
405 
407 {
408  const SubstitutionModel* model = modelSet_->getModelForNode(node->getId());
409  double l = node->getDistanceToFather();
410 
411  //Computes all pxy and pyx once for all:
412  VVVdouble * pxy__node = & pxy_[node->getId()];
413  for(unsigned int c = 0; c < nbClasses_; c++)
414  {
415  VVdouble * pxy__node_c = & (* pxy__node)[c];
416  RowMatrix<double> Q = model->getPij_t(l * rateDistribution_->getCategory(c));
417  for(unsigned int x = 0; x < nbStates_; x++)
418  {
419  Vdouble * pxy__node_c_x = & (* pxy__node_c)[x];
420  for(unsigned int y = 0; y < nbStates_; y++)
421  {
422  (* pxy__node_c_x)[y] = Q(x, y);
423  }
424  }
425  }
426 
428  {
429  //Computes all dpxy/dt once for all:
430  VVVdouble * dpxy__node = & dpxy_[node->getId()];
431 
432  for(unsigned int c = 0; c < nbClasses_; c++)
433  {
434  VVdouble * dpxy__node_c = & (* dpxy__node)[c];
435  double rc = rateDistribution_->getCategory(c);
436 
437  RowMatrix<double> dQ = model->getdPij_dt(l * rc);
438 
439  for(unsigned int x = 0; x < nbStates_; x++)
440  {
441  Vdouble * dpxy__node_c_x = & (* dpxy__node_c)[x];
442  for(unsigned int y = 0; y < nbStates_; y++)
443  (* dpxy__node_c_x)[y] = rc * dQ(x, y);
444  }
445  }
446  }
447 
449  {
450  //Computes all d2pxy/dt2 once for all:
451  VVVdouble * d2pxy__node = & d2pxy_[node->getId()];
452  for(unsigned int c = 0; c < nbClasses_; c++)
453  {
454  VVdouble * d2pxy__node_c = & (* d2pxy__node)[c];
455  double rc = rateDistribution_->getCategory(c);
456  RowMatrix<double> d2Q = model->getd2Pij_dt2(l * rc);
457  for(unsigned int x = 0; x < nbStates_; x++)
458  {
459  Vdouble * d2pxy__node_c_x = & (* d2pxy__node_c)[x];
460  for(unsigned int y = 0; y < nbStates_; y++)
461  {
462  (* d2pxy__node_c_x)[y] = rc * rc * d2Q(x, y);
463  }
464  }
465  }
466  }
467 }
468 
469 /*******************************************************************************/
470 
virtual N * getRootNode()
Definition: TreeTemplate.h:403
Substitution models manager for non-homogeneous / non-reversible models of evolution.
AbstractNonHomogeneousTreeLikelihood(const Tree &tree, SubstitutionModelSet *modelSet, DiscreteDistribution *rDist, bool verbose=true, bool reparametrizeRoot=true)
ParameterList getSubstitutionModelParameters() const
Get the parameters associated to substitution model(s).
Interface for all substitution models.
Partial implementation for branch non-homogeneous models of the TreeLikelihood interface.
const SubstitutionModel * getModelForNode(int nodeId) const
Get the model associated to a particular node id.
std::vector< Node * > nodes_
Pointer toward all nodes in the tree.
Partial implementation of the DiscreteRatesAcrossSitesTreeLikelihood interface.
virtual void computeAllTransitionProbabilities()
Fill the pxy_, dpxy_ and d2pxy_ arrays for all nodes.
virtual void applyParameters()
All parameters are stored in a parameter list. This function apply these parameters to the substituti...
std::vector< double > getRootFrequencies() const
STL namespace.
The phylogenetic tree class.
Interface for phylogenetic tree objects.
Definition: Tree.h:148
virtual const Matrix< double > & getd2Pij_dt2(double t) const =0
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
Definition: Node.h:283
AbstractNonHomogeneousTreeLikelihood & operator=(const AbstractNonHomogeneousTreeLikelihood &lik)
Assignation operator.
static bool checkIds(const Tree &tree, bool throwException)
Check if the ids are uniques.
Definition: TreeTools.cpp:783
virtual const Matrix< double > & getPij_t(double t) const =0
virtual void initParameters()
This builds the parameters list from all parametrizable objects, i.e. substitution model...
virtual int getId() const
Get the node&#39;s id.
Definition: Node.h:203
virtual void computeTransitionProbabilitiesForNode(const Node *node)
Fill the pxy_, dpxy_ and d2pxy_ arrays for one node.
std::map< int, const Node * > idToNode_
An index linking nodes to their id, for faster access than the getNode() method.
AbstractDiscreteRatesAcrossSitesTreeLikelihood & operator=(const AbstractDiscreteRatesAcrossSitesTreeLikelihood &tl)
The phylogenetic node class.
Definition: Node.h:90
ParameterList getBranchLengthsParameters() const
Get the branch lengths parameters.
virtual const Matrix< double > & getdPij_dt(double t) const =0
void init_(const Tree &tree, SubstitutionModelSet *modelSet, DiscreteDistribution *rDist, bool verbose)
Method called by constructor.