bpp-phyl  2.2.0
NNIHomogeneousTreeLikelihood.cpp
Go to the documentation of this file.
1 //
2 // File: DRHomogeneousTreeLikelihood.cpp
3 // Created by: Julien Dutheil
4 // Created on: Fri Oct 17 18:14:51 2003
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
41 
42 #include <Bpp/Text/TextTools.h>
43 #include <Bpp/App/ApplicationTools.h>
44 #include <Bpp/Numeric/AutoParameter.h>
45 
46 using namespace bpp;
47 
48 // From the STL:
49 #include <iostream>
50 
51 using namespace std;
52 
53 /*******************************************************************************/
54 void BranchLikelihood::initModel(const SubstitutionModel* model, const DiscreteDistribution* rDist)
55 {
56  model_ = model;
57  rDist_ = rDist;
58  nbStates_ = model->getNumberOfStates();
59  nbClasses_ = rDist->getNumberOfCategories();
60  pxy_.resize(nbClasses_);
61  for (size_t i = 0; i < nbClasses_; i++)
62  {
63  pxy_[i].resize(nbStates_);
64  for (size_t j = 0; j < nbStates_; j++)
65  {
66  pxy_[i][j].resize(nbStates_);
67  }
68  }
69 }
70 
71 /*******************************************************************************/
73 {
74  double l = getParameterValue("BrLen");
75 
76  // Computes all pxy once for all:
77  for (size_t c = 0; c < nbClasses_; c++)
78  {
79  VVdouble* pxy__c = &pxy_[c];
80  RowMatrix<double> Q = model_->getPij_t(l * rDist_->getCategory(c));
81  for (size_t x = 0; x < nbStates_; x++)
82  {
83  Vdouble* pxy__c_x = &(*pxy__c)[x];
84  for (size_t y = 0; y < nbStates_; y++)
85  {
86  (*pxy__c_x)[y] = Q(x, y);
87  }
88  }
89  }
90 }
91 
92 /*******************************************************************************/
94 {
95  lnL_ = 0;
96 
97  vector<double> la(array1_->size());
98  for (size_t i = 0; i < array1_->size(); i++)
99  {
100  double Li = 0;
101  for (size_t c = 0; c < nbClasses_; c++)
102  {
103  double rc = rDist_->getProbability(c);
104  for (size_t x = 0; x < nbStates_; x++)
105  {
106  for (size_t y = 0; y < nbStates_; y++)
107  {
108  Li += rc * (*array1_)[i][c][x] * pxy_[c][x][y] * (*array2_)[i][c][y];
109  }
110  }
111  }
112  la[i] = weights_[i] * log(Li);
113  }
114 
115  sort(la.begin(), la.end());
116  for (size_t i = array1_->size(); i > 0; i--)
117  {
118  lnL_ -= la[i - 1];
119  }
120 }
121 
122 /******************************************************************************/
123 
125  const Tree& tree,
126  SubstitutionModel* model,
127  DiscreteDistribution* rDist,
128  bool checkRooted,
129  bool verbose)
130 throw (Exception) :
131  DRHomogeneousTreeLikelihood(tree, model, rDist, checkRooted, verbose),
132  brLikFunction_(0),
133  brentOptimizer_(0),
134  brLenNNIValues_(),
135  brLenNNIParams_()
136 {
137  brentOptimizer_ = new BrentOneDimension();
138  brentOptimizer_->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
139  brentOptimizer_->setProfiler(0);
140  brentOptimizer_->setMessageHandler(0);
141  brentOptimizer_->setVerbose(0);
142 }
143 
144 /******************************************************************************/
145 
147  const Tree& tree,
148  const SiteContainer& data,
149  SubstitutionModel* model,
150  DiscreteDistribution* rDist,
151  bool checkRooted,
152  bool verbose)
153 throw (Exception) :
154  DRHomogeneousTreeLikelihood(tree, data, model, rDist, checkRooted, verbose),
155  brLikFunction_(0),
156  brentOptimizer_(0),
157  brLenNNIValues_(),
158  brLenNNIParams_()
159 {
160  brentOptimizer_ = new BrentOneDimension();
161  brentOptimizer_->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
162  brentOptimizer_->setProfiler(0);
163  brentOptimizer_->setMessageHandler(0);
164  brentOptimizer_->setVerbose(0);
165  // We have to do this since the DRHomogeneousTreeLikelihood constructor will not call the overloaded setData method:
166  brLikFunction_ = new BranchLikelihood(getLikelihoodData()->getWeights());
167 }
168 
169 /******************************************************************************/
170 
173  brLikFunction_(0),
174  brentOptimizer_(0),
175  brLenNNIValues_(),
176  brLenNNIParams_()
177 {
178  brLikFunction_ = dynamic_cast<BranchLikelihood*>(lik.brLikFunction_->clone());
179  brentOptimizer_ = dynamic_cast<BrentOneDimension*>(lik.brentOptimizer_->clone());
182 }
183 
184 /******************************************************************************/
185 
187 {
189  if (brLikFunction_) delete brLikFunction_;
190  brLikFunction_ = dynamic_cast<BranchLikelihood*>(lik.brLikFunction_->clone());
191  if (brentOptimizer_) delete brentOptimizer_;
192  brentOptimizer_ = dynamic_cast<BrentOneDimension*>(lik.brentOptimizer_->clone());
195  return *this;
196 }
197 
198 /******************************************************************************/
199 
201 {
202  if (brLikFunction_) delete brLikFunction_;
203  delete brentOptimizer_;
204 }
205 
206 /******************************************************************************/
207 double NNIHomogeneousTreeLikelihood::testNNI(int nodeId) const throw (NodeException)
208 {
209  const Node* son = tree_->getNode(nodeId);
210  if (!son->hasFather()) throw NodePException("DRHomogeneousTreeLikelihood::testNNI(). Node 'son' must not be the root node.", son);
211  const Node* parent = son->getFather();
212  if (!parent->hasFather()) throw NodePException("DRHomogeneousTreeLikelihood::testNNI(). Node 'parent' must not be the root node.", parent);
213  const Node* grandFather = parent->getFather();
214  // From here: Bifurcation assumed.
215  // In case of multifurcation, an arbitrary uncle is chosen.
216  // If we are at root node with a trifurcation, this does not matter, since 2 NNI are possible (see doc of the NNISearchable interface).
217  size_t parentPosition = grandFather->getSonPosition(parent);
218  // const Node * uncle = grandFather->getSon(parentPosition > 1 ? parentPosition - 1 : 1 - parentPosition);
219  const Node* uncle = grandFather->getSon(parentPosition > 1 ? 0 : 1 - parentPosition);
220 
221  // Retrieving arrays of interest:
222  const DRASDRTreeLikelihoodNodeData* parentData = &getLikelihoodData()->getNodeData(parent->getId());
223  const VVVdouble* sonArray = &parentData->getLikelihoodArrayForNeighbor(son->getId());
224  vector<const Node*> parentNeighbors = TreeTemplateTools::getRemainingNeighbors(parent, grandFather, son);
225  size_t nbParentNeighbors = parentNeighbors.size();
226  vector<const VVVdouble*> parentArrays(nbParentNeighbors);
227  vector<const VVVdouble*> parentTProbs(nbParentNeighbors);
228  for (size_t k = 0; k < nbParentNeighbors; k++)
229  {
230  const Node* n = parentNeighbors[k]; // This neighbor
231  parentArrays[k] = &parentData->getLikelihoodArrayForNeighbor(n->getId());
232  // if(n != grandFather) parentTProbs[k] = & pxy_[n->getId()];
233  // else parentTProbs[k] = & pxy_[parent->getId()];
234  parentTProbs[k] = &pxy_[n->getId()];
235  }
236 
237  const DRASDRTreeLikelihoodNodeData* grandFatherData = &getLikelihoodData()->getNodeData(grandFather->getId());
238  const VVVdouble* uncleArray = &grandFatherData->getLikelihoodArrayForNeighbor(uncle->getId());
239  vector<const Node*> grandFatherNeighbors = TreeTemplateTools::getRemainingNeighbors(grandFather, parent, uncle);
240  size_t nbGrandFatherNeighbors = grandFatherNeighbors.size();
241  vector<const VVVdouble*> grandFatherArrays;
242  vector<const VVVdouble*> grandFatherTProbs;
243  for (size_t k = 0; k < nbGrandFatherNeighbors; k++)
244  {
245  const Node* n = grandFatherNeighbors[k]; // This neighbor
246  if (grandFather->getFather() == NULL || n != grandFather->getFather())
247  {
248  grandFatherArrays.push_back(&grandFatherData->getLikelihoodArrayForNeighbor(n->getId()));
249  grandFatherTProbs.push_back(&pxy_[n->getId()]);
250  }
251  }
252 
253  // Compute array 1: grand father array
254  VVVdouble array1 = *sonArray;
255  resetLikelihoodArray(array1);
256  grandFatherArrays.push_back(sonArray);
257  grandFatherTProbs.push_back(&pxy_[son->getId()]);
258  if (grandFather->hasFather())
259  {
260  computeLikelihoodFromArrays(grandFatherArrays, grandFatherTProbs, &grandFatherData->getLikelihoodArrayForNeighbor(grandFather->getFather()->getId()), &pxy_[grandFather->getId()], array1, nbGrandFatherNeighbors, nbDistinctSites_, nbClasses_, nbStates_, false);
261  }
262  else
263  {
264  computeLikelihoodFromArrays(grandFatherArrays, grandFatherTProbs, array1, nbGrandFatherNeighbors + 1, nbDistinctSites_, nbClasses_, nbStates_, false);
265 
266  // This is the root node, we have to account for the ancestral frequencies:
267  for (size_t i = 0; i < nbDistinctSites_; i++)
268  {
269  for (size_t j = 0; j < nbClasses_; j++)
270  {
271  for (size_t x = 0; x < nbStates_; x++)
272  {
273  array1[i][j][x] *= rootFreqs_[x];
274  }
275  }
276  }
277  }
278 
279  // Compute array 2: parent array
280  VVVdouble array2 = *uncleArray;
281  resetLikelihoodArray(array2);
282  parentArrays.push_back(uncleArray);
283  parentTProbs.push_back(&pxy_[uncle->getId()]);
284  computeLikelihoodFromArrays(parentArrays, parentTProbs, array2, nbParentNeighbors + 1, nbDistinctSites_, nbClasses_, nbStates_, false);
285 
286  // Initialize BranchLikelihood:
287  brLikFunction_->initModel(model_, rateDistribution_);
288  brLikFunction_->initLikelihoods(&array1, &array2);
289  ParameterList parameters;
290  size_t pos = 0;
291  while (pos < nodes_.size() && nodes_[pos]->getId() != parent->getId()) pos++;
292  if (pos == nodes_.size()) throw Exception("NNIHomogeneousTreeLikelihood::testNNI. Unvalid node id.");
293  Parameter brLen = getParameter("BrLen" + TextTools::toString(pos));
294  brLen.setName("BrLen");
295  parameters.addParameter(brLen);
296  brLikFunction_->setParameters(parameters);
297 
298  // Re-estimate branch length:
299  brentOptimizer_->setFunction(brLikFunction_);
300  brentOptimizer_->getStopCondition()->setTolerance(0.1);
301  brentOptimizer_->setInitialInterval(brLen.getValue(), brLen.getValue() + 0.01);
302  brentOptimizer_->init(parameters);
303  brentOptimizer_->optimize();
304  // brLenNNIValues_[nodeId] = brLikFunction_->getParameterValue("BrLen");
305  brLenNNIValues_[nodeId] = brentOptimizer_->getParameters().getParameter("BrLen").getValue();
306  brLikFunction_->resetLikelihoods(); // Array1 and Array2 will be destroyed after this function call.
307  // We should not keep pointers towards them...
308 
309  // Return the resulting likelihood:
310  return brLikFunction_->getValue() - getValue();
311 }
312 
313 /*******************************************************************************/
315 {
316  // Perform the topological move, the likelihood array will have to be recomputed...
317  Node* son = tree_->getNode(nodeId);
318  if (!son->hasFather()) throw NodePException("DRHomogeneousTreeLikelihood::testNNI(). Node 'son' must not be the root node.", son);
319  Node* parent = son->getFather();
320  if (!parent->hasFather()) throw NodePException("DRHomogeneousTreeLikelihood::testNNI(). Node 'parent' must not be the root node.", parent);
321  Node* grandFather = parent->getFather();
322  // From here: Bifurcation assumed.
323  // In case of multifurcation, an arbitrary uncle is chosen.
324  // If we are at root node with a trifurcation, this does not matter, since 2 NNI are possible (see doc of the NNISearchable interface).
325  size_t parentPosition = grandFather->getSonPosition(parent);
326  Node* uncle = grandFather->getSon(parentPosition > 1 ? 0 : 1 - parentPosition);
327  // Swap nodes:
328  parent->removeSon(son);
329  grandFather->removeSon(uncle);
330  parent->addSon(uncle);
331  grandFather->addSon(son);
332  size_t pos = 0;
333  while (pos < nodes_.size() && nodes_[pos]->getId() != parent->getId()) pos++;
334  if (pos == nodes_.size()) throw Exception("NNIHomogeneousTreeLikelihood::doNNI. Unvalid node id.");
335 
336  string name = "BrLen" + TextTools::toString(pos);
337  if (brLenNNIValues_.find(nodeId) != brLenNNIValues_.end())
338  {
339  double length = brLenNNIValues_[nodeId];
340  brLenParameters_.setParameterValue(name, length);
341  getParameter_(name).setValue(length);
342  parent->setDistanceToFather(length);
343  }
344  else cerr << "ERROR, branch not found: " << nodeId << endl;
345  try
346  {
347  brLenNNIParams_.addParameter(brLenParameters_.getParameter(name));
348  }
349  catch (ParameterException& ex)
350  {
351  StdErr errout;
352  cerr << "DEBUG:" << endl;
353  brLenNNIParams_.printParameters(errout);
354  cerr << "DEBUG:" << name << endl;
355  }
356  // In case of copy of this object, we must remove the constraint associated to this stored parameter:
357  // (It should be also possible to update the pointer in the copy constructor,
358  // but we do not need the constraint info here...).
359  brLenNNIParams_[brLenNNIParams_.size() - 1].removeConstraint();
360 }
361 
362 /*******************************************************************************/
363 
NNIHomogeneousTreeLikelihood & operator=(const NNIHomogeneousTreeLikelihood &lik)
DRHomogeneousTreeLikelihood & operator=(const DRHomogeneousTreeLikelihood &lik)
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:407
Interface for all substitution models.
static std::vector< const Node * > getRemainingNeighbors(const Node *node1, const Node *node2, const Node *node3)
Get a subset of node neighbors.
Likelihood data structure for a node.
virtual const Node * getSon(size_t pos) const
Definition: Node.h:395
STL namespace.
VVVdouble & getLikelihoodArrayForNeighbor(int neighborId)
void initModel(const SubstitutionModel *model, const DiscreteDistribution *rDist)
virtual const Node * getFather() const
Get the father of this node is there is one.
Definition: Node.h:339
This class adds support for NNI topology estimation to the DRHomogeneousTreeLikelihood class...
Interface for phylogenetic tree objects.
Definition: Tree.h:148
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:379
BrentOneDimension * brentOptimizer_
Optimizer used for testing NNI.
BranchLikelihood * clone() const
NNIHomogeneousTreeLikelihood(const Tree &tree, SubstitutionModel *model, DiscreteDistribution *rDist, bool checkRooted=true, bool verbose=true)
Build a new NNIHomogeneousTreeLikelihood object.
virtual int getId() const
Get the node&#39;s id.
Definition: Node.h:203
std::map< int, double > brLenNNIValues_
Hash used for backing up branch lengths when testing NNIs.
This class implements the likelihood computation for a tree using the double-recursive algorithm...
virtual Node * removeSon(size_t pos)
Definition: Node.h:444
virtual size_t getSonPosition(const Node *son) const
Definition: Node.cpp:130
The phylogenetic node class.
Definition: Node.h:90
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:299
Compute likelihood for a 4-tree.
void doNNI(int nodeId)
Perform a NNI movement.
virtual size_t getNumberOfStates() const =0
Get the number of states.
General exception thrown when something is wrong with a particular node.
General exception thrown when something is wrong with a particular node.
double testNNI(int nodeId) const
Send the score of a NNI movement, without performing it.