bpp-phyl  2.2.0
NonHomogeneousSequenceSimulator.cpp
Go to the documentation of this file.
1 //
2 // File: NonHomogeneousSequenceSimulator.cpp
3 // (previously SequenceSimulator.cpp, then HomogeneousSequenceSimulator.cpp)
4 // Created by: Julien Dutheil
5 // Bastien Boussau
6 // Created on: Wed Feb 4 16:30:51 2004
7 //
8 
9 /*
10  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
11 
12  This software is a computer program whose purpose is to provide classes
13  for phylogenetic data analysis.
14 
15  This software is governed by the CeCILL license under French law and
16  abiding by the rules of distribution of free software. You can use,
17  modify and/ or redistribute the software under the terms of the CeCILL
18  license as circulated by CEA, CNRS and INRIA at the following URL
19  "http://www.cecill.info".
20 
21  As a counterpart to the access to the source code and rights to copy,
22  modify and redistribute granted by the license, users are provided only
23  with a limited warranty and the software's author, the holder of the
24  economic rights, and the successive licensors have only limited
25  liability.
26 
27  In this respect, the user's attention is drawn to the risks associated
28  with loading, using, modifying and/or developing or reproducing the
29  software by the user in light of its specific status of free software,
30  that may mean that it is complicated to manipulate, and that also
31  therefore means that it is reserved for developers and experienced
32  professionals having in-depth computer knowledge. Users are therefore
33  encouraged to load and test the software's suitability as regards their
34  requirements in conditions enabling the security of their systems and/or
35  data to be ensured and, more generally, to use and operate it in the
36  same conditions as regards security.
37 
38  The fact that you are presently reading this means that you have had
39  knowledge of the CeCILL license and that you accept its terms.
40  */
41 
43 #include "../Model/SubstitutionModelSetTools.h"
44 
45 #include <Bpp/App/ApplicationTools.h>
46 #include <Bpp/Numeric/VectorTools.h>
47 #include <Bpp/Numeric/Matrix/MatrixTools.h>
48 
49 // From SeqLib:
50 #include <Bpp/Seq/Container/VectorSiteContainer.h>
51 
52 using namespace bpp;
53 using namespace std;
54 
55 /******************************************************************************/
56 
58  const SubstitutionModelSet* modelSet,
59  const DiscreteDistribution* rate,
60  const Tree* tree) throw (Exception) :
61  modelSet_(modelSet),
62  alphabet_(modelSet_->getAlphabet()),
63  supportedStates_(modelSet_->getAlphabetStates()),
64  rate_(rate),
65  templateTree_(tree),
66  tree_(*tree),
67  ownModelSet_(false),
68  leaves_(tree_.getLeaves()),
69  seqNames_(),
70  nbNodes_(),
71  nbClasses_(rate_->getNumberOfCategories()),
72  nbStates_(modelSet_->getNumberOfStates()),
73  continuousRates_(false)
74 {
75  if (!modelSet->isFullySetUpFor(*tree))
76  throw Exception("NonHomogeneousSequenceSimulator(constructor). Model set is not fully specified.");
77  init();
78 }
79 
80 /******************************************************************************/
81 
83  const SubstitutionModel* model,
84  const DiscreteDistribution* rate,
85  const Tree* tree) :
86  modelSet_(0),
87  alphabet_(model->getAlphabet()),
88  supportedStates_(model->getAlphabetStates()),
89  rate_(rate),
90  templateTree_(tree),
91  tree_(*tree),
92  ownModelSet_(true),
93  leaves_(tree_.getLeaves()),
94  seqNames_(),
95  nbNodes_(),
96  nbClasses_(rate_->getNumberOfCategories()),
97  nbStates_(model->getNumberOfStates()),
98  continuousRates_(false)
99 {
100  FixedFrequenciesSet* fSet = new FixedFrequenciesSet(model->getStateMap().clone(), model->getFrequencies());
101  fSet->setNamespace("anc.");
102  modelSet_ = SubstitutionModelSetTools::createHomogeneousModelSet(dynamic_cast<SubstitutionModel*>(model->clone()), fSet, templateTree_);
103  init();
104 }
105 
106 /******************************************************************************/
107 
109 {
110  seqNames_.resize(leaves_.size());
111  for (size_t i = 0; i < seqNames_.size(); i++)
112  {
113  seqNames_[i] = leaves_[i]->getName();
114  }
115  // Initialize cumulative pxy:
116  vector<SNode*> nodes = tree_.getNodes();
117  nodes.pop_back(); // remove root
118  nbNodes_ = nodes.size();
119 
120  for (size_t i = 0; i < nodes.size(); i++)
121  {
122  SNode* node = nodes[i];
123  node->getInfos().model = modelSet_->getModelForNode(node->getId());
124  double d = node->getDistanceToFather();
125  VVVdouble* cumpxy_node_ = &node->getInfos().cumpxy;
126  cumpxy_node_->resize(nbClasses_);
127  for (size_t c = 0; c < nbClasses_; c++)
128  {
129  VVdouble* cumpxy_node_c_ = &(*cumpxy_node_)[c];
130  cumpxy_node_c_->resize(nbStates_);
131  RowMatrix<double> P = node->getInfos().model->getPij_t(d * rate_->getCategory(c));
132  for (size_t x = 0; x < nbStates_; x++)
133  {
134  Vdouble* cumpxy_node_c_x_ = &(*cumpxy_node_c_)[x];
135  cumpxy_node_c_x_->resize(nbStates_);
136  (*cumpxy_node_c_x_)[0] = P(x, 0);
137  for (size_t y = 1; y < nbStates_; y++)
138  {
139  (*cumpxy_node_c_x_)[y] = (*cumpxy_node_c_x_)[y - 1] + P(x, y);
140  }
141  }
142  }
143  }
144 }
145 
146 /******************************************************************************/
147 
149 {
150  // Draw an initial state randomly according to equilibrum frequencies:
151  size_t initialStateIndex = 0;
152  double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
153  double cumprob = 0;
154  vector<double> freqs = modelSet_->getRootFrequencies();
155  for (size_t i = 0; i < nbStates_; i++)
156  {
157  cumprob += freqs[i];
158  if (r <= cumprob)
159  {
160  initialStateIndex = i;
161  break;
162  }
163  }
164  return simulateSite(initialStateIndex);
165 }
166 
167 /******************************************************************************/
168 
169 Site* NonHomogeneousSequenceSimulator::simulateSite(size_t ancestralStateIndex) const
170 {
171  if (continuousRates_)
172  {
173  // Draw a random rate:
174  double rate = rate_->randC();
175  // Make this state evolve:
176  return simulateSite(ancestralStateIndex, rate);
177  }
178  else
179  {
180  // Draw a random rate:
181  size_t rateClass = RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(rate_->getNumberOfCategories());
182  // Make this state evolve:
183  return simulateSite(ancestralStateIndex, rateClass);
184  }
185 }
186 
187 /******************************************************************************/
188 
189 Site* NonHomogeneousSequenceSimulator::simulateSite(size_t ancestralStateIndex, size_t rateClass) const
190 {
191  // Launch recursion:
192  SNode* root = tree_.getRootNode();
193  root->getInfos().state = ancestralStateIndex;
194  for (size_t i = 0; i < root->getNumberOfSons(); ++i)
195  {
196  evolveInternal(root->getSon(i), rateClass);
197  }
198  // Now create a Site object:
199  Vint site(leaves_.size());
200  for (size_t i = 0; i < leaves_.size(); ++i)
201  {
202  site[i] = leaves_[i]->getInfos().model->getAlphabetStateAsInt(leaves_[i]->getInfos().state);
203  }
204  return new Site(site, alphabet_);
205 }
206 
207 /******************************************************************************/
208 
209 Site* NonHomogeneousSequenceSimulator::simulateSite(size_t ancestralStateIndex, double rate) const
210 {
211  // Launch recursion:
212  SNode* root = tree_.getRootNode();
213  root->getInfos().state = ancestralStateIndex;
214  for (size_t i = 0; i < root->getNumberOfSons(); i++)
215  {
216  evolveInternal(root->getSon(i), rate);
217  }
218  // Now create a Site object:
219  Vint site(leaves_.size());
220  for (size_t i = 0; i < leaves_.size(); i++)
221  {
222  site[i] = leaves_[i]->getInfos().model->getAlphabetStateAsInt(leaves_[i]->getInfos().state);
223  }
224  return new Site(site, alphabet_);
225 }
226 
227 /******************************************************************************/
228 
230 {
231  // Draw an initial state randomly according to equilibrum frequencies:
232  size_t ancestralStateIndex = 0;
233  double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
234  double cumprob = 0;
235  vector<double> freqs = modelSet_->getRootFrequencies();
236  for (size_t i = 0; i < nbStates_; i++)
237  {
238  cumprob += freqs[i];
239  if (r <= cumprob)
240  {
241  ancestralStateIndex = i;
242  break;
243  }
244  }
245  // Make this state evolve:
246  return simulateSite(ancestralStateIndex, rate);
247 }
248 
249 /******************************************************************************/
250 
251 SiteContainer* NonHomogeneousSequenceSimulator::simulate(size_t numberOfSites) const
252 {
253  vector<size_t> ancestralStateIndices(numberOfSites, 0);
254  for (size_t j = 0; j < numberOfSites; j++)
255  {
256  double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
257  double cumprob = 0;
258  vector<double> freqs = modelSet_->getRootFrequencies();
259  for (size_t i = 0; i < nbStates_; i++)
260  {
261  cumprob += freqs[i];
262  if (r <= cumprob)
263  {
264  ancestralStateIndices[j] = i;
265  break;
266  }
267  }
268  }
269  if (continuousRates_)
270  {
271  VectorSiteContainer* sites = new VectorSiteContainer(seqNames_.size(), alphabet_);
272  sites->setSequencesNames(seqNames_);
273  for (size_t j = 0; j < numberOfSites; j++)
274  {
275  Site* site = simulateSite();
276  site->setPosition(static_cast<int>(j));
277  sites->addSite(*site);
278  delete site;
279  }
280  return sites;
281  }
282  else
283  {
284  // More efficient to do site this way:
285  // Draw random rates:
286  vector<size_t> rateClasses(numberOfSites);
287  size_t nCat = rate_->getNumberOfCategories();
288  for (size_t j = 0; j < numberOfSites; j++)
289  {
290  rateClasses[j] = RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(nCat);
291  }
292  // Make these states evolve:
293  SiteContainer* sites = multipleEvolve(ancestralStateIndices, rateClasses);
294  return sites;
295  }
296 }
297 
298 /******************************************************************************/
299 
301 {
302  // Draw an initial state randomly according to equilibrum frequencies:
303  size_t ancestralStateIndex = 0;
304  double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
305  double cumprob = 0;
306  vector<double> freqs = modelSet_->getRootFrequencies();
307  for (size_t i = 0; i < nbStates_; i++)
308  {
309  cumprob += freqs[i];
310  if (r <= cumprob)
311  {
312  ancestralStateIndex = i;
313  break;
314  }
315  }
316 
317  return dSimulateSite(ancestralStateIndex);
318 }
319 
320 /******************************************************************************/
321 
323 {
324  // Draw a random rate:
325  if (continuousRates_)
326  {
327  double rate = rate_->randC();
328  return dSimulateSite(ancestralStateIndex, rate);
329  }
330  else
331  {
332  size_t rateClass = RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(rate_->getNumberOfCategories());
333  return dSimulateSite(ancestralStateIndex, rateClass);
334  // NB: this is more efficient than dSimulate(initialState, rDist_->rand())
335  }
336 }
337 
338 /******************************************************************************/
339 
340 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulateSite(size_t ancestralStateIndex, double rate) const
341 {
342  // Make this state evolve:
343  RASiteSimulationResult* hssr = new RASiteSimulationResult(templateTree_, modelSet_->getAlphabet(), ancestralStateIndex, rate);
344  dEvolve(ancestralStateIndex, rate, *hssr);
345  return hssr;
346 }
347 
348 /******************************************************************************/
349 
350 RASiteSimulationResult* NonHomogeneousSequenceSimulator::dSimulateSite(size_t ancestralStateIndex, size_t rateClass) const
351 {
352  return dSimulateSite(ancestralStateIndex, rate_->getCategory(rateClass));
353 }
354 
355 /******************************************************************************/
356 
358 {
359  // Draw an initial state randomly according to equilibrum frequencies:
360  size_t ancestralStateIndex = 0;
361  double r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
362  double cumprob = 0;
363  vector<double> freqs = modelSet_->getRootFrequencies();
364  for (size_t i = 0; i < nbStates_; i++)
365  {
366  cumprob += freqs[i];
367  if (r <= cumprob)
368  {
369  ancestralStateIndex = i;
370  break;
371  }
372  }
373  return dSimulateSite(ancestralStateIndex, rate);
374 }
375 
376 /******************************************************************************/
377 
378 size_t NonHomogeneousSequenceSimulator::evolve(const SNode* node, size_t initialStateIndex, size_t rateClass) const
379 {
380  const Vdouble* cumpxy_node_c_x_ = &node->getInfos().cumpxy[rateClass][initialStateIndex];
381  double rand = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
382  for (size_t y = 0; y < nbStates_; y++)
383  {
384  if (rand < (*cumpxy_node_c_x_)[y]) return y;
385  }
386  throw Exception("HomogeneousSequenceSimulator::evolve. The impossible happened! rand = " + TextTools::toString(rand) + ".");
387 }
388 
389 /******************************************************************************/
390 
391 size_t NonHomogeneousSequenceSimulator::evolve(const SNode* node, size_t initialStateIndex, double rate) const
392 {
393  double cumpxy = 0;
394  double rand = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
395  double l = rate * node->getDistanceToFather();
396  const SubstitutionModel* model = node->getInfos().model;
397  for (size_t y = 0; y < nbStates_; y++)
398  {
399  cumpxy += model->Pij_t(initialStateIndex, y, l);
400  if (rand < cumpxy) return y;
401  }
402  MatrixTools::print(model->getPij_t(l));
403  throw Exception("HomogeneousSequenceSimulator::evolve. The impossible happened! rand = " + TextTools::toString(rand) + ".");
404 }
405 
406 /******************************************************************************/
407 
409  const SNode* node,
410  const std::vector<size_t>& initialStateIndices,
411  const vector<size_t>& rateClasses,
412  std::vector<size_t>& finalStateIndices) const
413 {
414  const VVVdouble* cumpxy_node_ = &node->getInfos().cumpxy;
415  for (size_t i = 0; i < initialStateIndices.size(); i++)
416  {
417  const Vdouble* cumpxy_node_c_x_ = &(*cumpxy_node_)[rateClasses[i]][initialStateIndices[i]];
418  double rand = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
419  for (size_t y = 0; y < nbStates_; y++)
420  {
421  if (rand < (*cumpxy_node_c_x_)[y])
422  {
423  finalStateIndices[i] = y;
424  break;
425  }
426  }
427  }
428 }
429 
430 /******************************************************************************/
431 
432 void NonHomogeneousSequenceSimulator::evolveInternal(SNode* node, size_t rateClass) const
433 {
434  if (!node->hasFather())
435  {
436  cerr << "DEBUG: NonHomogeneousSequenceSimulator::evolveInternal. Forbidden call of method on root node." << endl;
437  return;
438  }
439  node->getInfos().state = evolve(node, node->getFather()->getInfos().state, rateClass);
440  for (size_t i = 0; i < node->getNumberOfSons(); i++)
441  {
442  evolveInternal(node->getSon(i), rateClass);
443  }
444 }
445 
446 /******************************************************************************/
447 
449 {
450  if (!node->hasFather())
451  {
452  cerr << "DEBUG: NonHomogeneousSequenceSimulator::evolveInternal. Forbidden call of method on root node." << endl;
453  return;
454  }
455  node->getInfos().state = evolve(node, node->getFather()->getInfos().state, rate);
456  for (size_t i = 0; i < node->getNumberOfSons(); i++)
457  {
458  evolveInternal(node->getSon(i), rate);
459  }
460 }
461 
462 /******************************************************************************/
463 
464 void NonHomogeneousSequenceSimulator::multipleEvolveInternal(SNode* node, const vector<size_t>& rateClasses) const
465 {
466  if (!node->hasFather())
467  {
468  cerr << "DEBUG: NonHomogeneousSequenceSimulator::multipleEvolveInternal. Forbidden call of method on root node." << endl;
469  return;
470  }
471  const vector<size_t>* initialStates = &node->getFather()->getInfos().states;
472  size_t n = initialStates->size();
473  node->getInfos().states.resize(n); // allocation.
474  multipleEvolve(node, node->getFather()->getInfos().states, rateClasses, node->getInfos().states);
475  for (size_t i = 0; i < node->getNumberOfSons(); i++)
476  {
477  multipleEvolveInternal(node->getSon(i), rateClasses);
478  }
479 }
480 
481 /******************************************************************************/
482 
484  const std::vector<size_t>& initialStateIndices,
485  const vector<size_t>& rateClasses) const
486 {
487  // Launch recursion:
488  SNode* root = tree_.getRootNode();
489  root->getInfos().states = initialStateIndices;
490  for (size_t i = 0; i < root->getNumberOfSons(); i++)
491  {
492  multipleEvolveInternal(root->getSon(i), rateClasses);
493  }
494  // Now create a SiteContainer object:
495  AlignedSequenceContainer* sites = new AlignedSequenceContainer(alphabet_);
496  size_t n = leaves_.size();
497  size_t nbSites = initialStateIndices.size();
498  const SubstitutionModel* model = 0;
499  for (size_t i = 0; i < n; i++)
500  {
501  vector<int> content(nbSites);
502  vector<size_t>& states = leaves_[i]->getInfos().states;
503  model = leaves_[i]->getInfos().model;
504  for (size_t j = 0; j < nbSites; j++)
505  {
506  content[j] = model->getAlphabetStateAsInt(states[j]);
507  }
508  sites->addSequence(BasicSequence(leaves_[i]->getName(), content, alphabet_), false);
509  }
510  return sites;
511 }
512 
513 /******************************************************************************/
514 
515 void NonHomogeneousSequenceSimulator::dEvolve(size_t initialState, double rate, RASiteSimulationResult& rassr) const
516 {
517  // Launch recursion:
518  SNode* root = tree_.getRootNode();
519  root->getInfos().state = initialState;
520  for (size_t i = 0; i < root->getNumberOfSons(); i++)
521  {
522  dEvolveInternal(root->getSon(i), rate, rassr);
523  }
524 }
525 
526 /******************************************************************************/
527 
529 {
530  if (!node->hasFather())
531  {
532  cerr << "DEBUG: NonHomogeneousSequenceSimulator::evolveInternal. Forbidden call of method on root node." << endl;
533  return;
534  }
535  SimpleMutationProcess process(node->getInfos().model);
536  MutationPath mp = process.detailedEvolve(node->getFather()->getInfos().state, node->getDistanceToFather() * rate);
537  node->getInfos().state = mp.getFinalState();
538 
539  // Now append infos in rassr:
540  rassr.addNode(node->getId(), mp);
541 
542  // Now jump to son nodes:
543  for (size_t i = 0; i < node->getNumberOfSons(); i++)
544  {
545  dEvolveInternal(node->getSon(i), rate, rassr);
546  }
547 }
548 
549 /******************************************************************************/
550 
const NodeTemplate< NodeInfos > * getFather() const
Get the father of this node is there is one.
Definition: NodeTemplate.h:141
Substitution models manager for non-homogeneous / non-reversible models of evolution.
Interface for all substitution models.
const SubstitutionModel * getModelForNode(int nodeId) const
Get the model associated to a particular node id.
virtual int getAlphabetStateAsInt(size_t index) const =0
FrequenciesSet useful for homogeneous and stationary models.
const NodeTemplate< NodeInfos > * getSon(size_t i) const
Definition: NodeTemplate.h:147
virtual const StateMap & getStateMap() const =0
NonHomogeneousSequenceSimulator(const SubstitutionModelSet *modelSet, const DiscreteDistribution *rate, const Tree *tree)
This class is used by MutationProcess to store detailed results of simulations.
size_t evolve(const SNode *node, size_t initialStateIndex, size_t rateClass) const
Evolve from an initial state along a branch, knowing the evolutionary rate class. ...
virtual void addNode(int nodeId, MutationPath path)
std::vector< double > getRootFrequencies() const
STL namespace.
virtual StateMap * clone() const =0
Interface for phylogenetic tree objects.
Definition: Tree.h:148
SiteContainer * simulate(size_t numberOfSites) const
virtual const Vdouble & getFrequencies() const =0
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:379
void multipleEvolveInternal(SNode *node, const std::vector< size_t > &rateClasses) const
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
Definition: Node.h:283
const Alphabet * getAlphabet() const
virtual const Matrix< double > & getPij_t(double t) const =0
virtual int getId() const
Get the node&#39;s id.
Definition: Node.h:203
virtual double Pij_t(size_t i, size_t j, double t) const =0
void multipleEvolve(const SNode *node, const std::vector< size_t > &initialStateIndices, const std::vector< size_t > &rateClasses, std::vector< size_t > &finalStates) const
The same as the evolve(initialState, rateClass) function, but for several sites at a time...
void evolveInternal(SNode *node, size_t rateClass) const
RASiteSimulationResult * dSimulateSite() const
Get a detailed simulation result for one site.
std::vector< SNode * > leaves_
This stores once for all all leaves in a given order. This order will be used during site creation...
void dEvolveInternal(SNode *node, double rate, RASiteSimulationResult &rassr) const
virtual size_t getNumberOfSons() const
Definition: Node.h:388
void dEvolve(size_t initialState, double rate, RASiteSimulationResult &rassr) const
SubstitutionModel * clone() const =0
static SubstitutionModelSet * createHomogeneousModelSet(SubstitutionModel *model, FrequenciesSet *rootFreqs, const Tree *tree)
Create a SubstitutionModelSet object, corresponding to the homogeneous case.
Data structure to store the result of a DetailedSiteSimulator.
virtual const NodeInfos & getInfos() const
Definition: NodeTemplate.h:179
The NodeTemplate class.
Definition: NodeTemplate.h:73
Generally used mutation process model.