bpp-phyl  2.2.0
SubstitutionMappingTools.cpp
Go to the documentation of this file.
1 //
2 // File: SubstitutionMappingTools.cpp
3 // Created by: Julien Dutheil
4 // Created on: Wed Apr 5 13:04 2006
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004, 2005, 2006)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
42 #include "DecompositionReward.h"
44 #include "RewardMappingTools.h"
45 #include "../Likelihood/DRTreeLikelihoodTools.h"
46 #include "../Likelihood/MarginalAncestralStateReconstruction.h"
47 
48 #include <Bpp/Text/TextTools.h>
49 #include <Bpp/App/ApplicationTools.h>
50 #include <Bpp/Numeric/Matrix/MatrixTools.h>
51 #include <Bpp/Numeric/DataTable.h>
52 #include <Bpp/Seq/AlphabetIndex/UserAlphabetIndex1.h>
53 
54 using namespace bpp;
55 
56 // From the STL:
57 #include <iomanip>
58 
59 using namespace std;
60 
61 /******************************************************************************/
62 
64  const DRTreeLikelihood& drtl,
65  const vector<int>& nodeIds,
66  SubstitutionCount& substitutionCount,
67  bool verbose) throw (Exception)
68 {
69  // Preamble:
70  if (!drtl.isInitialized())
71  throw Exception("SubstitutionMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
72 
73  // A few variables we'll need:
74 
75  const TreeTemplate<Node> tree(drtl.getTree());
76  const SiteContainer* sequences = drtl.getData();
77  const DiscreteDistribution* rDist = drtl.getRateDistribution();
78 
79  size_t nbSites = sequences->getNumberOfSites();
80  size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites();
81  size_t nbStates = sequences->getAlphabet()->getSize();
82  size_t nbClasses = rDist->getNumberOfCategories();
83  size_t nbTypes = substitutionCount.getNumberOfSubstitutionTypes();
84  vector<const Node*> nodes = tree.getNodes();
85  const vector<size_t>* rootPatternLinks
86  = &drtl.getLikelihoodData()->getRootArrayPositions();
87  nodes.pop_back(); // Remove root node.
88  size_t nbNodes = nodes.size();
89 
90  // We create a new ProbabilisticSubstitutionMapping object:
91  ProbabilisticSubstitutionMapping* substitutions = new ProbabilisticSubstitutionMapping(tree, &substitutionCount, nbSites);
92 
93  // Store likelihood for each rate for each site:
94  VVVdouble lik;
95  drtl.computeLikelihoodAtNode(tree.getRootId(), lik);
96  Vdouble Lr(nbDistinctSites, 0);
97  Vdouble rcProbs = rDist->getProbabilities();
98  Vdouble rcRates = rDist->getCategories();
99  for (size_t i = 0; i < nbDistinctSites; i++)
100  {
101  VVdouble* lik_i = &lik[i];
102  for (size_t c = 0; c < nbClasses; c++)
103  {
104  Vdouble* lik_i_c = &(*lik_i)[c];
105  double rc = rDist->getProbability(c);
106  for (size_t s = 0; s < nbStates; s++)
107  {
108  Lr[i] += (*lik_i_c)[s] * rc;
109  }
110  }
111  }
112 
113  // Compute the number of substitutions for each class and each branch in the tree:
114  if (verbose)
115  ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
116 
117  for (size_t l = 0; l < nbNodes; ++l)
118  {
119  // For each node,
120  const Node* currentNode = nodes[l];
121  if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId()))
122  continue;
123 
124  const Node* father = currentNode->getFather();
125 
126  double d = currentNode->getDistanceToFather();
127 
128  if (verbose)
129  ApplicationTools::displayGauge(l, nbNodes - 1);
130  VVdouble substitutionsForCurrentNode(nbDistinctSites);
131  for (size_t i = 0; i < nbDistinctSites; ++i)
132  {
133  substitutionsForCurrentNode[i].resize(nbTypes);
134  }
135 
136  // Now we've got to compute likelihoods in a smart manner... ;)
137  VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
138  for (size_t i = 0; i < nbDistinctSites; i++)
139  {
140  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
141  likelihoodsFatherConstantPart_i->resize(nbClasses);
142  for (size_t c = 0; c < nbClasses; c++)
143  {
144  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
145  likelihoodsFatherConstantPart_i_c->resize(nbStates);
146  double rc = rDist->getProbability(c);
147  for (size_t s = 0; s < nbStates; s++)
148  {
149  // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
150  // freq is already accounted in the array
151  (*likelihoodsFatherConstantPart_i_c)[s] = rc;
152  }
153  }
154  }
155 
156  // First, what will remain constant:
157  size_t nbSons = father->getNumberOfSons();
158  for (size_t n = 0; n < nbSons; n++)
159  {
160  const Node* currentSon = father->getSon(n);
161  if (currentSon->getId() != currentNode->getId())
162  {
163  const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
164 
165  // Now iterate over all site partitions:
166  auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentSon->getId()));
167  VVVdouble pxy;
168  bool first;
169  while (mit->hasNext())
170  {
172  auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
173  first = true;
174  while (sit->hasNext())
175  {
176  size_t i = sit->next();
177  // We retrieve the transition probabilities for this site partition:
178  if (first)
179  {
180  pxy = drtl.getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
181  first = false;
182  }
183  const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
184  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
185  for (size_t c = 0; c < nbClasses; c++)
186  {
187  const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
188  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
189  VVdouble* pxy_c = &pxy[c];
190  for (size_t x = 0; x < nbStates; x++)
191  {
192  Vdouble* pxy_c_x = &(*pxy_c)[x];
193  double likelihood = 0.;
194  for (size_t y = 0; y < nbStates; y++)
195  {
196  likelihood += (*pxy_c_x)[y] * (*likelihoodsFather_son_i_c)[y];
197  }
198  (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
199  }
200  }
201  }
202  }
203  }
204  }
205  if (father->hasFather())
206  {
207  const Node* currentSon = father->getFather();
208  const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
209  // Now iterate over all site partitions:
210  auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(father->getId()));
211  VVVdouble pxy;
212  bool first;
213  while (mit->hasNext())
214  {
216  auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
217  first = true;
218  while (sit->hasNext())
219  {
220  size_t i = sit->next();
221  // We retrieve the transition probabilities for this site partition:
222  if (first)
223  {
224  pxy = drtl.getTransitionProbabilitiesPerRateClass(father->getId(), i);
225  first = false;
226  }
227  const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
228  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
229  for (size_t c = 0; c < nbClasses; c++)
230  {
231  const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
232  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
233  VVdouble* pxy_c = &pxy[c];
234  for (size_t x = 0; x < nbStates; x++)
235  {
236  double likelihood = 0.;
237  for (size_t y = 0; y < nbStates; y++)
238  {
239  Vdouble* pxy_c_x = &(*pxy_c)[y];
240  likelihood += (*pxy_c_x)[x] * (*likelihoodsFather_son_i_c)[y];
241  }
242  (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
243  }
244  }
245  }
246  }
247  }
248  else
249  {
250  // Account for root frequencies:
251  for (size_t i = 0; i < nbDistinctSites; i++)
252  {
253  vector<double> freqs = drtl.getRootFrequencies(i);
254  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
255  for (size_t c = 0; c < nbClasses; c++)
256  {
257  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
258  for (size_t x = 0; x < nbStates; x++)
259  {
260  (*likelihoodsFatherConstantPart_i_c)[x] *= freqs[x];
261  }
262  }
263  }
264  }
265 
266 
267  // Then, we deal with the node of interest.
268  // We first average upon 'y' to save computations, and then upon 'x'.
269  // ('y' is the state at 'node' and 'x' the state at 'father'.)
270 
271  // Iterate over all site partitions:
272  const VVVdouble* likelihoodsFather_node = &(drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentNode->getId()));
273  auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId()));
274  VVVdouble pxy;
275  bool first;
276  while (mit->hasNext())
277  {
279  substitutionCount.setSubstitutionModel(bmd->getModel());
280  // compute all nxy first:
281  VVVVdouble nxy(nbClasses);
282  for (size_t c = 0; c < nbClasses; ++c)
283  {
284  VVVdouble* nxy_c = &nxy[c];
285  double rc = rcRates[c];
286  nxy_c->resize(nbTypes);
287  for (size_t t = 0; t < nbTypes; ++t)
288  {
289  VVdouble* nxy_c_t = &(*nxy_c)[t];
290  Matrix<double>* nijt = substitutionCount.getAllNumbersOfSubstitutions(d * rc, t + 1);
291  nxy_c_t->resize(nbStates);
292  for (size_t x = 0; x < nbStates; ++x)
293  {
294  Vdouble* nxy_c_t_x = &(*nxy_c_t)[x];
295  nxy_c_t_x->resize(nbStates);
296  for (size_t y = 0; y < nbStates; ++y)
297  {
298  (*nxy_c_t_x)[y] = (*nijt)(x, y);
299  }
300  }
301  delete nijt;
302  }
303  }
304 
305  // Now loop over sites:
306  auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
307  first = true;
308  while (sit->hasNext())
309  {
310  size_t i = sit->next();
311  // We retrieve the transition probabilities and substitution counts for this site partition:
312  if (first)
313  {
314  pxy = drtl.getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
315  first = false;
316  }
317  const VVdouble* likelihoodsFather_node_i = &(*likelihoodsFather_node)[i];
318  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
319  for (size_t c = 0; c < nbClasses; ++c)
320  {
321  const Vdouble* likelihoodsFather_node_i_c = &(*likelihoodsFather_node_i)[c];
322  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
323  const VVdouble* pxy_c = &pxy[c];
324  VVVdouble* nxy_c = &nxy[c];
325  for (size_t x = 0; x < nbStates; ++x)
326  {
327  double* likelihoodsFatherConstantPart_i_c_x = &(*likelihoodsFatherConstantPart_i_c)[x];
328  const Vdouble* pxy_c_x = &(*pxy_c)[x];
329  for (size_t y = 0; y < nbStates; ++y)
330  {
331  double likelihood_cxy = (*likelihoodsFatherConstantPart_i_c_x)
332  * (*pxy_c_x)[y]
333  * (*likelihoodsFather_node_i_c)[y];
334 
335  for (size_t t = 0; t < nbTypes; ++t)
336  {
337  // Now the vector computation:
338  substitutionsForCurrentNode[i][t] += likelihood_cxy * (*nxy_c)[t][x][y];
339  // <------------> <--------------->
340  // Posterior probability | |
341  // for site i and rate class c * | |
342  // likelihood for this site----------------+ |
343  // |
344  // Substitution function for site i and rate class c----------+
345  }
346  }
347  }
348  }
349  }
350  }
351 
352  // Now we just have to copy the substitutions into the result vector:
353  for (size_t i = 0; i < nbSites; ++i)
354  {
355  for (size_t t = 0; t < nbTypes; ++t)
356  {
357  (*substitutions)(l, i, t) = substitutionsForCurrentNode[(*rootPatternLinks)[i]][t] / Lr[(*rootPatternLinks)[i]];
358  }
359  }
360  }
361  if (verbose)
362  {
363  if (ApplicationTools::message)
364  *ApplicationTools::message << " ";
365  ApplicationTools::displayTaskDone();
366  }
367 
368  return substitutions;
369 }
370 
371 /******************************************************************************/
372 
374  const DRTreeLikelihood& drtl,
375  const SubstitutionModelSet& modelSet,
376  const vector<int>& nodeIds,
377  SubstitutionCount& substitutionCount,
378  bool verbose) throw (Exception)
379 {
380  // Preamble:
381  if (!drtl.isInitialized())
382  throw Exception("SubstitutionMappingTools::computeSubstitutionVectors(). Likelihood object is not initialized.");
383 
384  // A few variables we'll need:
385 
386  const TreeTemplate<Node> tree(drtl.getTree());
387  const SiteContainer* sequences = drtl.getData();
388  const DiscreteDistribution* rDist = drtl.getRateDistribution();
389 
390  size_t nbSites = sequences->getNumberOfSites();
391  size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites();
392  size_t nbStates = sequences->getAlphabet()->getSize();
393  size_t nbClasses = rDist->getNumberOfCategories();
394  size_t nbTypes = substitutionCount.getNumberOfSubstitutionTypes();
395  vector<const Node*> nodes = tree.getNodes();
396  const vector<size_t>* rootPatternLinks
397  = &drtl.getLikelihoodData()->getRootArrayPositions();
398  nodes.pop_back(); // Remove root node.
399  size_t nbNodes = nodes.size();
400 
401  // We create a new ProbabilisticSubstitutionMapping object:
402  ProbabilisticSubstitutionMapping* substitutions = new ProbabilisticSubstitutionMapping(tree, &substitutionCount, nbSites);
403 
404  // Store likelihood for each rate for each site:
405  VVVdouble lik;
406  drtl.computeLikelihoodAtNode(tree.getRootId(), lik);
407  Vdouble Lr(nbDistinctSites, 0);
408  Vdouble rcProbs = rDist->getProbabilities();
409  Vdouble rcRates = rDist->getCategories();
410  for (size_t i = 0; i < nbDistinctSites; i++)
411  {
412  VVdouble* lik_i = &lik[i];
413  for (size_t c = 0; c < nbClasses; c++)
414  {
415  Vdouble* lik_i_c = &(*lik_i)[c];
416  double rc = rDist->getProbability(c);
417  for (size_t s = 0; s < nbStates; s++)
418  {
419  Lr[i] += (*lik_i_c)[s] * rc;
420  }
421  }
422  }
423 
424  // Compute the number of substitutions for each class and each branch in the tree:
425  if (verbose)
426  ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
427 
428  for (size_t l = 0; l < nbNodes; ++l)
429  {
430  // For each node,
431  const Node* currentNode = nodes[l];
432  if (nodeIds.size() > 0 && !VectorTools::contains(nodeIds, currentNode->getId()))
433  continue;
434 
435  const Node* father = currentNode->getFather();
436 
437  double d = currentNode->getDistanceToFather();
438 
439  if (verbose)
440  ApplicationTools::displayGauge(l, nbNodes - 1);
441  VVdouble substitutionsForCurrentNode(nbDistinctSites);
442  for (size_t i = 0; i < nbDistinctSites; ++i)
443  {
444  substitutionsForCurrentNode[i].resize(nbTypes);
445  }
446 
447  // Now we've got to compute likelihoods in a smart manner... ;)
448  VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
449  for (size_t i = 0; i < nbDistinctSites; i++)
450  {
451  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
452  likelihoodsFatherConstantPart_i->resize(nbClasses);
453  for (size_t c = 0; c < nbClasses; c++)
454  {
455  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
456  likelihoodsFatherConstantPart_i_c->resize(nbStates);
457  double rc = rDist->getProbability(c);
458  for (size_t s = 0; s < nbStates; s++)
459  {
460  // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
461  // freq is already accounted in the array
462  (*likelihoodsFatherConstantPart_i_c)[s] = rc;
463  }
464  }
465  }
466 
467  // First, what will remain constant:
468  size_t nbSons = father->getNumberOfSons();
469  for (size_t n = 0; n < nbSons; n++)
470  {
471  const Node* currentSon = father->getSon(n);
472  if (currentSon->getId() != currentNode->getId())
473  {
474  const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
475 
476  // Now iterate over all site partitions:
477  auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentSon->getId()));
478  VVVdouble pxy;
479  bool first;
480  while (mit->hasNext())
481  {
483  auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
484  first = true;
485  while (sit->hasNext())
486  {
487  size_t i = sit->next();
488  // We retrieve the transition probabilities for this site partition:
489  if (first)
490  {
491  pxy = drtl.getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
492  first = false;
493  }
494  const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
495  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
496  for (size_t c = 0; c < nbClasses; c++)
497  {
498  const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
499  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
500  VVdouble* pxy_c = &pxy[c];
501  for (size_t x = 0; x < nbStates; x++)
502  {
503  Vdouble* pxy_c_x = &(*pxy_c)[x];
504  double likelihood = 0.;
505  for (size_t y = 0; y < nbStates; y++)
506  {
507  likelihood += (*pxy_c_x)[y] * (*likelihoodsFather_son_i_c)[y];
508  }
509  (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
510  }
511  }
512  }
513  }
514  }
515  }
516  if (father->hasFather())
517  {
518  const Node* currentSon = father->getFather();
519  const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
520  // Now iterate over all site partitions:
521  auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(father->getId()));
522  VVVdouble pxy;
523  bool first;
524  while (mit->hasNext())
525  {
527  auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
528  first = true;
529  while (sit->hasNext())
530  {
531  size_t i = sit->next();
532  // We retrieve the transition probabilities for this site partition:
533  if (first)
534  {
535  pxy = drtl.getTransitionProbabilitiesPerRateClass(father->getId(), i);
536  first = false;
537  }
538  const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
539  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
540  for (size_t c = 0; c < nbClasses; c++)
541  {
542  const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
543  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
544  VVdouble* pxy_c = &pxy[c];
545  for (size_t x = 0; x < nbStates; x++)
546  {
547  double likelihood = 0.;
548  for (size_t y = 0; y < nbStates; y++)
549  {
550  Vdouble* pxy_c_x = &(*pxy_c)[y];
551  likelihood += (*pxy_c_x)[x] * (*likelihoodsFather_son_i_c)[y];
552  }
553  (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
554  }
555  }
556  }
557  }
558  }
559  else
560  {
561  // Account for root frequencies:
562  for (size_t i = 0; i < nbDistinctSites; i++)
563  {
564  vector<double> freqs = drtl.getRootFrequencies(i);
565  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
566  for (size_t c = 0; c < nbClasses; c++)
567  {
568  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
569  for (size_t x = 0; x < nbStates; x++)
570  {
571  (*likelihoodsFatherConstantPart_i_c)[x] *= freqs[x];
572  }
573  }
574  }
575  }
576 
577 
578  // Then, we deal with the node of interest.
579  // We first average upon 'y' to save computations, and then upon 'x'.
580  // ('y' is the state at 'node' and 'x' the state at 'father'.)
581 
582  // Iterate over all site partitions:
583  const VVVdouble* likelihoodsFather_node = &(drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentNode->getId()));
584  auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId()));
585  VVVdouble pxy;
586  bool first;
587  while (mit->hasNext())
588  {
590  substitutionCount.setSubstitutionModel(modelSet.getModelForNode(currentNode->getId()));
591 
592  // compute all nxy first:
593  VVVVdouble nxy(nbClasses);
594  for (size_t c = 0; c < nbClasses; ++c)
595  {
596  VVVdouble* nxy_c = &nxy[c];
597  double rc = rcRates[c];
598  nxy_c->resize(nbTypes);
599  for (size_t t = 0; t < nbTypes; ++t)
600  {
601  VVdouble* nxy_c_t = &(*nxy_c)[t];
602  Matrix<double>* nijt = substitutionCount.getAllNumbersOfSubstitutions(d * rc, t + 1);
603  nxy_c_t->resize(nbStates);
604  for (size_t x = 0; x < nbStates; ++x)
605  {
606  Vdouble* nxy_c_t_x = &(*nxy_c_t)[x];
607  nxy_c_t_x->resize(nbStates);
608  for (size_t y = 0; y < nbStates; ++y)
609  {
610  (*nxy_c_t_x)[y] = (*nijt)(x, y);
611  }
612  }
613  delete nijt;
614  }
615  }
616 
617  // Now loop over sites:
618  auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
619  first = true;
620  while (sit->hasNext())
621  {
622  size_t i = sit->next();
623  // We retrieve the transition probabilities and substitution counts for this site partition:
624  if (first)
625  {
626  pxy = drtl.getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
627  first = false;
628  }
629  const VVdouble* likelihoodsFather_node_i = &(*likelihoodsFather_node)[i];
630  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
631  for (size_t c = 0; c < nbClasses; ++c)
632  {
633  const Vdouble* likelihoodsFather_node_i_c = &(*likelihoodsFather_node_i)[c];
634  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
635  const VVdouble* pxy_c = &pxy[c];
636  VVVdouble* nxy_c = &nxy[c];
637  for (size_t x = 0; x < nbStates; ++x)
638  {
639  double* likelihoodsFatherConstantPart_i_c_x = &(*likelihoodsFatherConstantPart_i_c)[x];
640  const Vdouble* pxy_c_x = &(*pxy_c)[x];
641  for (size_t y = 0; y < nbStates; ++y)
642  {
643  double likelihood_cxy = (*likelihoodsFatherConstantPart_i_c_x)
644  * (*pxy_c_x)[y]
645  * (*likelihoodsFather_node_i_c)[y];
646 
647  for (size_t t = 0; t < nbTypes; ++t)
648  {
649  // Now the vector computation:
650  substitutionsForCurrentNode[i][t] += likelihood_cxy * (*nxy_c)[t][x][y];
651  // <------------> <--------------->
652  // Posterior probability | |
653  // for site i and rate class c * | |
654  // likelihood for this site----------------+ |
655  // |
656  // Substitution function for site i and rate class c----------+
657  }
658  }
659  }
660  }
661  }
662  }
663 
664  // Now we just have to copy the substitutions into the result vector:
665  for (size_t i = 0; i < nbSites; ++i)
666  {
667  for (size_t t = 0; t < nbTypes; ++t)
668  {
669  (*substitutions)(l, i, t) = substitutionsForCurrentNode[(*rootPatternLinks)[i]][t] / Lr[(*rootPatternLinks)[i]];
670  }
671  }
672  }
673  if (verbose)
674  {
675  if (ApplicationTools::message)
676  *ApplicationTools::message << " ";
677  ApplicationTools::displayTaskDone();
678  }
679 
680  return substitutions;
681 }
682 
683 /**************************************************************************************************/
684 
686  const DRTreeLikelihood& drtl,
687  SubstitutionCount& substitutionCount,
688  bool verbose) throw (Exception)
689 {
690  // Preamble:
691  if (!drtl.isInitialized())
692  throw Exception("SubstitutionMappingTools::computeSubstitutionVectorsNoAveraging(). Likelihood object is not initialized.");
693 
694  // A few variables we'll need:
695  const TreeTemplate<Node> tree(drtl.getTree());
696  const SiteContainer* sequences = drtl.getData();
697  const DiscreteDistribution* rDist = drtl.getRateDistribution();
698 
699  size_t nbSites = sequences->getNumberOfSites();
700  size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites();
701  size_t nbStates = sequences->getAlphabet()->getSize();
702  size_t nbClasses = rDist->getNumberOfCategories();
703  size_t nbTypes = substitutionCount.getNumberOfSubstitutionTypes();
704  vector<const Node*> nodes = tree.getNodes();
705  const vector<size_t>* rootPatternLinks
706  = &drtl.getLikelihoodData()->getRootArrayPositions();
707  nodes.pop_back(); // Remove root node.
708  size_t nbNodes = nodes.size();
709 
710  // We create a new ProbabilisticSubstitutionMapping object:
711  ProbabilisticSubstitutionMapping* substitutions = new ProbabilisticSubstitutionMapping(tree, &substitutionCount, nbSites);
712 
713  Vdouble rcRates = rDist->getCategories();
714 
715  // Compute the number of substitutions for each class and each branch in the tree:
716  if (verbose)
717  ApplicationTools::displayTask("Compute joint node-pairs likelihood", true);
718 
719  for (size_t l = 0; l < nbNodes; ++l)
720  {
721  // For each node,
722  const Node* currentNode = nodes[l];
723 
724  const Node* father = currentNode->getFather();
725 
726  double d = currentNode->getDistanceToFather();
727 
728  if (verbose)
729  ApplicationTools::displayGauge(l, nbNodes - 1);
730  VVdouble substitutionsForCurrentNode(nbDistinctSites);
731  for (size_t i = 0; i < nbDistinctSites; ++i)
732  {
733  substitutionsForCurrentNode[i].resize(nbTypes);
734  }
735 
736  // Now we've got to compute likelihoods in a smart manner... ;)
737  VVVdouble likelihoodsFatherConstantPart(nbDistinctSites);
738  for (size_t i = 0; i < nbDistinctSites; ++i)
739  {
740  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
741  likelihoodsFatherConstantPart_i->resize(nbClasses);
742  for (size_t c = 0; c < nbClasses; ++c)
743  {
744  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
745  likelihoodsFatherConstantPart_i_c->resize(nbStates);
746  double rc = rDist->getProbability(c);
747  for (size_t s = 0; s < nbStates; ++s)
748  {
749  // (* likelihoodsFatherConstantPart_i_c)[s] = rc * model->freq(s);
750  // freq is already accounted in the array
751  (*likelihoodsFatherConstantPart_i_c)[s] = rc;
752  }
753  }
754  }
755 
756  // First, what will remain constant:
757  size_t nbSons = father->getNumberOfSons();
758  for (size_t n = 0; n < nbSons; ++n)
759  {
760  const Node* currentSon = father->getSon(n);
761  if (currentSon->getId() != currentNode->getId())
762  {
763  const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
764 
765  // Now iterate over all site partitions:
766  auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentSon->getId()));
767  VVVdouble pxy;
768  bool first;
769  while (mit->hasNext())
770  {
772  auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
773  first = true;
774  while (sit->hasNext())
775  {
776  size_t i = sit->next();
777  // We retrieve the transition probabilities for this site partition:
778  if (first)
779  {
780  pxy = drtl.getTransitionProbabilitiesPerRateClass(currentSon->getId(), i);
781  first = false;
782  }
783  const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
784  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
785  for (size_t c = 0; c < nbClasses; ++c)
786  {
787  const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
788  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
789  VVdouble* pxy_c = &pxy[c];
790  for (size_t x = 0; x < nbStates; ++x)
791  {
792  Vdouble* pxy_c_x = &(*pxy_c)[x];
793  double likelihood = 0.;
794  for (size_t y = 0; y < nbStates; ++y)
795  {
796  likelihood += (*pxy_c_x)[y] * (*likelihoodsFather_son_i_c)[y];
797  }
798  (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
799  }
800  }
801  }
802  }
803  }
804  }
805  if (father->hasFather())
806  {
807  const Node* currentSon = father->getFather();
808  const VVVdouble* likelihoodsFather_son = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentSon->getId());
809  // Now iterate over all site partitions:
810  auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(father->getId()));
811  VVVdouble pxy;
812  bool first;
813  while (mit->hasNext())
814  {
816  auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
817  first = true;
818  while (sit->hasNext())
819  {
820  size_t i = sit->next();
821  // We retrieve the transition probabilities for this site partition:
822  if (first)
823  {
824  pxy = drtl.getTransitionProbabilitiesPerRateClass(father->getId(), i);
825  first = false;
826  }
827  const VVdouble* likelihoodsFather_son_i = &(*likelihoodsFather_son)[i];
828  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
829  for (size_t c = 0; c < nbClasses; ++c)
830  {
831  const Vdouble* likelihoodsFather_son_i_c = &(*likelihoodsFather_son_i)[c];
832  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
833  VVdouble* pxy_c = &pxy[c];
834  for (size_t x = 0; x < nbStates; ++x)
835  {
836  double likelihood = 0.;
837  for (size_t y = 0; y < nbStates; ++y)
838  {
839  Vdouble* pxy_c_x = &(*pxy_c)[y];
840  likelihood += (*pxy_c_x)[x] * (*likelihoodsFather_son_i_c)[y];
841  }
842  (*likelihoodsFatherConstantPart_i_c)[x] *= likelihood;
843  }
844  }
845  }
846  }
847  }
848  else
849  {
850  // Account for root frequencies:
851  for (size_t i = 0; i < nbDistinctSites; ++i)
852  {
853  vector<double> freqs = drtl.getRootFrequencies(i);
854  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
855  for (size_t c = 0; c < nbClasses; ++c)
856  {
857  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
858  for (size_t x = 0; x < nbStates; ++x)
859  {
860  (*likelihoodsFatherConstantPart_i_c)[x] *= freqs[x];
861  }
862  }
863  }
864  }
865 
866  // Then, we deal with the node of interest.
867  // We first average uppon 'y' to save computations, and then uppon 'x'.
868  // ('y' is the state at 'node' and 'x' the state at 'father'.)
869 
870  // Iterate over all site partitions:
871  const VVVdouble* likelihoodsFather_node = &drtl.getLikelihoodData()->getLikelihoodArray(father->getId(), currentNode->getId());
872  auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId()));
873  VVVdouble pxy;
874  bool first;
875  while (mit->hasNext())
876  {
878  substitutionCount.setSubstitutionModel(bmd->getModel());
879  // compute all nxy first:
880  VVVVdouble nxy(nbClasses);
881  for (size_t c = 0; c < nbClasses; ++c)
882  {
883  double rc = rcRates[c];
884  VVVdouble* nxy_c = &nxy[c];
885  nxy_c->resize(nbTypes);
886  for (size_t t = 0; t < nbTypes; ++t)
887  {
888  VVdouble* nxy_c_t = &(*nxy_c)[t];
889  nxy_c_t->resize(nbStates);
890  Matrix<double>* nijt = substitutionCount.getAllNumbersOfSubstitutions(d * rc, t + 1);
891  for (size_t x = 0; x < nbStates; ++x)
892  {
893  Vdouble* nxy_c_t_x = &(*nxy_c_t)[x];
894  nxy_c_t_x->resize(nbStates);
895  for (size_t y = 0; y < nbStates; ++y)
896  {
897  (*nxy_c_t_x)[y] = (*nijt)(x, y);
898  }
899  }
900  delete nijt;
901  }
902  }
903 
904  // Now loop over sites:
905  auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
906  first = true;
907  while (sit->hasNext())
908  {
909  size_t i = sit->next();
910  // We retrieve the transition probabilities and substitution counts for this site partition:
911  if (first)
912  {
913  pxy = drtl.getTransitionProbabilitiesPerRateClass(currentNode->getId(), i);
914  first = false;
915  }
916  const VVdouble* likelihoodsFather_node_i = &(*likelihoodsFather_node)[i];
917  VVdouble* likelihoodsFatherConstantPart_i = &likelihoodsFatherConstantPart[i];
918  RowMatrix<double> pairProbabilities(nbStates, nbStates);
919  MatrixTools::fill(pairProbabilities, 0.);
920  VVVdouble subsCounts(nbStates);
921  for (size_t j = 0; j < nbStates; ++j)
922  {
923  subsCounts[j].resize(nbStates);
924  for (size_t k = 0; k < nbStates; ++k)
925  {
926  subsCounts[j][k].resize(nbTypes);
927  }
928  }
929  for (size_t c = 0; c < nbClasses; ++c)
930  {
931  const Vdouble* likelihoodsFather_node_i_c = &(*likelihoodsFather_node_i)[c];
932  Vdouble* likelihoodsFatherConstantPart_i_c = &(*likelihoodsFatherConstantPart_i)[c];
933  const VVdouble* pxy_c = &pxy[c];
934  VVVdouble* nxy_c = &nxy[c];
935  for (size_t x = 0; x < nbStates; ++x)
936  {
937  double* likelihoodsFatherConstantPart_i_c_x = &(*likelihoodsFatherConstantPart_i_c)[x];
938  const Vdouble* pxy_c_x = &(*pxy_c)[x];
939  for (size_t y = 0; y < nbStates; ++y)
940  {
941  double likelihood_cxy = (*likelihoodsFatherConstantPart_i_c_x)
942  * (*pxy_c_x)[y]
943  * (*likelihoodsFather_node_i_c)[y];
944  pairProbabilities(x, y) += likelihood_cxy; // Sum over all rate classes.
945  for (size_t t = 0; t < nbTypes; ++t)
946  {
947  subsCounts[x][y][t] += likelihood_cxy * (*nxy_c)[t][x][y];
948  }
949  }
950  }
951  }
952  // Now the vector computation:
953  // Here we do not average over all possible pair of ancestral states,
954  // We only consider the one with max likelihood:
955  vector<size_t> xy = MatrixTools::whichMax(pairProbabilities);
956  for (size_t t = 0; t < nbTypes; ++t)
957  {
958  substitutionsForCurrentNode[i][t] += subsCounts[xy[0]][xy[1]][t] / pairProbabilities(xy[0], xy[1]);
959  }
960  }
961  }
962  // Now we just have to copy the substitutions into the result vector:
963  for (size_t i = 0; i < nbSites; i++)
964  {
965  for (size_t t = 0; t < nbTypes; t++)
966  {
967  (*substitutions)(l, i, t) = substitutionsForCurrentNode[(*rootPatternLinks)[i]][t];
968  }
969  }
970  }
971  if (verbose)
972  {
973  if (ApplicationTools::message)
974  *ApplicationTools::message << " ";
975  ApplicationTools::displayTaskDone();
976  }
977  return substitutions;
978 }
979 
980 /**************************************************************************************************/
981 
983  const DRTreeLikelihood& drtl,
984  SubstitutionCount& substitutionCount,
985  bool verbose) throw (Exception)
986 {
987  // Preamble:
988  if (!drtl.isInitialized())
989  throw Exception("SubstitutionMappingTools::computeSubstitutionVectorsNoAveragingMarginal(). Likelihood object is not initialized.");
990 
991  // A few variables we'll need:
992 
993  const TreeTemplate<Node> tree(drtl.getTree());
994  const SiteContainer* sequences = drtl.getData();
995  const DiscreteDistribution* rDist = drtl.getRateDistribution();
996  const Alphabet* alpha = sequences->getAlphabet();
997 
998  size_t nbSites = sequences->getNumberOfSites();
999  size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites();
1000  size_t nbStates = alpha->getSize();
1001  size_t nbTypes = substitutionCount.getNumberOfSubstitutionTypes();
1002  vector<const Node*> nodes = tree.getNodes();
1003  const vector<size_t>* rootPatternLinks
1004  = &drtl.getLikelihoodData()->getRootArrayPositions();
1005  nodes.pop_back(); // Remove root node.
1006  size_t nbNodes = nodes.size();
1007 
1008  // We create a new ProbabilisticSubstitutionMapping object:
1009  ProbabilisticSubstitutionMapping* substitutions = new ProbabilisticSubstitutionMapping(tree, &substitutionCount, nbSites);
1010 
1011  // Compute the whole likelihood of the tree according to the specified model:
1012 
1013  Vdouble rcRates = rDist->getCategories();
1014 
1015  // Compute the number of substitutions for each class and each branch in the tree:
1016  if (verbose)
1017  ApplicationTools::displayTask("Compute marginal ancestral states");
1019  map<int, vector<size_t> > ancestors = masr.getAllAncestralStates();
1020  if (verbose)
1021  ApplicationTools::displayTaskDone();
1022 
1023  // Now we just have to compute the substitution vectors:
1024  if (verbose)
1025  ApplicationTools::displayTask("Compute substitution vectors", true);
1026 
1027  for (size_t l = 0; l < nbNodes; l++)
1028  {
1029  const Node* currentNode = nodes[l];
1030 
1031  const Node* father = currentNode->getFather();
1032 
1033  double d = currentNode->getDistanceToFather();
1034 
1035  vector<size_t> nodeStates = ancestors[currentNode->getId()]; // These are not 'true' ancestors ;)
1036  vector<size_t> fatherStates = ancestors[father->getId()];
1037 
1038  // For each node,
1039  if (verbose)
1040  ApplicationTools::displayGauge(l, nbNodes - 1);
1041  VVdouble substitutionsForCurrentNode(nbDistinctSites);
1042  for (size_t i = 0; i < nbDistinctSites; ++i)
1043  {
1044  substitutionsForCurrentNode[i].resize(nbTypes);
1045  }
1046 
1047  // Here, we have no likelihood computation to do!
1048 
1049  // Then, we deal with the node of interest.
1050  // ('y' is the state at 'node' and 'x' the state at 'father'.)
1051  // Iterate over all site partitions:
1052  auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId()));
1053  while (mit->hasNext())
1054  {
1056  substitutionCount.setSubstitutionModel(bmd->getModel());
1057  // compute all nxy first:
1058  VVVdouble nxyt(nbTypes);
1059  for (size_t t = 0; t < nbTypes; ++t)
1060  {
1061  nxyt[t].resize(nbStates);
1062  Matrix<double>* nxy = substitutionCount.getAllNumbersOfSubstitutions(d, t + 1);
1063  for (size_t x = 0; x < nbStates; ++x)
1064  {
1065  nxyt[t][x].resize(nbStates);
1066  for (size_t y = 0; y < nbStates; ++y)
1067  {
1068  nxyt[t][x][y] = (*nxy)(x, y);
1069  }
1070  }
1071  delete nxy;
1072  }
1073  // Now loop over sites:
1074  auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
1075  while (sit->hasNext())
1076  {
1077  size_t i = sit->next();
1078  size_t fatherState = fatherStates[i];
1079  size_t nodeState = nodeStates[i];
1080  if (fatherState >= nbStates || nodeState >= nbStates)
1081  for (size_t t = 0; t < nbTypes; ++t)
1082  {
1083  substitutionsForCurrentNode[i][t] = 0;
1084  } // To be conservative! Only in case there are generic characters.
1085  else
1086  for (size_t t = 0; t < nbTypes; ++t)
1087  {
1088  substitutionsForCurrentNode[i][t] = nxyt[t][fatherState][nodeState];
1089  }
1090  }
1091  }
1092 
1093  // Now we just have to copy the substitutions into the result vector:
1094  for (size_t i = 0; i < nbSites; i++)
1095  {
1096  for (size_t t = 0; t < nbTypes; t++)
1097  {
1098  (*substitutions)(l, i, t) = substitutionsForCurrentNode[(*rootPatternLinks)[i]][t];
1099  }
1100  }
1101  }
1102  if (verbose)
1103  {
1104  if (ApplicationTools::message)
1105  *ApplicationTools::message << " ";
1106  ApplicationTools::displayTaskDone();
1107  }
1108  return substitutions;
1109 }
1110 
1111 /**************************************************************************************************/
1112 
1114  const DRTreeLikelihood& drtl,
1115  SubstitutionCount& substitutionCount,
1116  bool verbose) throw (Exception)
1117 {
1118  // Preamble:
1119  if (!drtl.isInitialized())
1120  throw Exception("SubstitutionMappingTools::computeSubstitutionVectorsMarginal(). Likelihood object is not initialized.");
1121 
1122  // A few variables we'll need:
1123 
1124  const TreeTemplate<Node> tree(drtl.getTree());
1125  const SiteContainer* sequences = drtl.getData();
1126  const DiscreteDistribution* rDist = drtl.getRateDistribution();
1127 
1128  size_t nbSites = sequences->getNumberOfSites();
1129  size_t nbDistinctSites = drtl.getLikelihoodData()->getNumberOfDistinctSites();
1130  size_t nbStates = sequences->getAlphabet()->getSize();
1131  size_t nbClasses = rDist->getNumberOfCategories();
1132  size_t nbTypes = substitutionCount.getNumberOfSubstitutionTypes();
1133  vector<const Node*> nodes = tree.getNodes();
1134  const vector<size_t>* rootPatternLinks
1135  = &drtl.getLikelihoodData()->getRootArrayPositions();
1136  nodes.pop_back(); // Remove root node.
1137  size_t nbNodes = nodes.size();
1138 
1139  // We create a new ProbabilisticSubstitutionMapping object:
1140  ProbabilisticSubstitutionMapping* substitutions = new ProbabilisticSubstitutionMapping(tree, &substitutionCount, nbSites);
1141 
1142  // Compute the whole likelihood of the tree according to the specified model:
1143 
1144  Vdouble rcProbs = rDist->getProbabilities();
1145  Vdouble rcRates = rDist->getCategories();
1146 
1147  // II) Compute the number of substitutions for each class and each branch in the tree:
1148  if (verbose)
1149  ApplicationTools::displayTask("Compute marginal node-pairs likelihoods", true);
1150 
1151  for (size_t l = 0; l < nbNodes; l++)
1152  {
1153  const Node* currentNode = nodes[l];
1154 
1155  const Node* father = currentNode->getFather();
1156 
1157  double d = currentNode->getDistanceToFather();
1158 
1159  // For each node,
1160  if (verbose)
1161  ApplicationTools::displayGauge(l, nbNodes - 1);
1162  VVdouble substitutionsForCurrentNode(nbDistinctSites);
1163  for (size_t i = 0; i < nbDistinctSites; ++i)
1164  {
1165  substitutionsForCurrentNode[i].resize(nbTypes);
1166  }
1167 
1168  // Then, we deal with the node of interest.
1169  // ('y' is the state at 'node' and 'x' the state at 'father'.)
1170  VVVdouble probsNode = DRTreeLikelihoodTools::getPosteriorProbabilitiesForEachStateForEachRate(drtl, currentNode->getId());
1171  VVVdouble probsFather = DRTreeLikelihoodTools::getPosteriorProbabilitiesForEachStateForEachRate(drtl, father->getId());
1172 
1173  // Iterate over all site partitions:
1174  auto_ptr<TreeLikelihood::ConstBranchModelIterator> mit(drtl.getNewBranchModelIterator(currentNode->getId()));
1175  while (mit->hasNext())
1176  {
1178  substitutionCount.setSubstitutionModel(bmd->getModel());
1179  // compute all nxy first:
1180  VVVVdouble nxy(nbClasses);
1181  for (size_t c = 0; c < nbClasses; ++c)
1182  {
1183  VVVdouble* nxy_c = &nxy[c];
1184  double rc = rcRates[c];
1185  nxy_c->resize(nbTypes);
1186  for (size_t t = 0; t < nbTypes; ++t)
1187  {
1188  VVdouble* nxy_c_t = &(*nxy_c)[t];
1189  Matrix<double>* nijt = substitutionCount.getAllNumbersOfSubstitutions(d * rc, t + 1);
1190  nxy_c_t->resize(nbStates);
1191  for (size_t x = 0; x < nbStates; ++x)
1192  {
1193  Vdouble* nxy_c_t_x = &(*nxy_c_t)[x];
1194  nxy_c_t_x->resize(nbStates);
1195  for (size_t y = 0; y < nbStates; ++y)
1196  {
1197  (*nxy_c_t_x)[y] = (*nijt)(x, y);
1198  }
1199  }
1200  delete nijt;
1201  }
1202  }
1203 
1204  // Now loop over sites:
1205  auto_ptr<TreeLikelihood::SiteIterator> sit(bmd->getNewSiteIterator());
1206  while (sit->hasNext())
1207  {
1208  size_t i = sit->next();
1209  VVdouble* probsNode_i = &probsNode[i];
1210  VVdouble* probsFather_i = &probsFather[i];
1211  for (size_t c = 0; c < nbClasses; ++c)
1212  {
1213  Vdouble* probsNode_i_c = &(*probsNode_i)[c];
1214  Vdouble* probsFather_i_c = &(*probsFather_i)[c];
1215  VVVdouble* nxy_c = &nxy[c];
1216  for (size_t x = 0; x < nbStates; ++x)
1217  {
1218  for (size_t y = 0; y < nbStates; ++y)
1219  {
1220  double prob_cxy = (*probsFather_i_c)[x] * (*probsNode_i_c)[y];
1221  // Now the vector computation:
1222  for (size_t t = 0; t < nbTypes; ++t)
1223  {
1224  substitutionsForCurrentNode[i][t] += prob_cxy * (*nxy_c)[t][x][y];
1225  // <------> <--------------->
1226  // Posterior probability | |
1227  // for site i and rate class c * | |
1228  // likelihood for this site--------------+ |
1229  // |
1230  // Substitution function for site i and rate class c-------+
1231  }
1232  }
1233  }
1234  }
1235  }
1236  }
1237 
1238  // Now we just have to copy the substitutions into the result vector:
1239  for (size_t i = 0; i < nbSites; ++i)
1240  {
1241  for (size_t t = 0; t < nbTypes; ++t)
1242  {
1243  (*substitutions)(l, i, t) = substitutionsForCurrentNode[(*rootPatternLinks)[i]][t];
1244  }
1245  }
1246  }
1247  if (verbose)
1248  {
1249  if (ApplicationTools::message)
1250  *ApplicationTools::message << " ";
1251  ApplicationTools::displayTaskDone();
1252  }
1253  return substitutions;
1254 }
1255 
1256 /**************************************************************************************************/
1257 
1259  const ProbabilisticSubstitutionMapping& substitutions,
1260  const SiteContainer& sites,
1261  size_t type,
1262  ostream& out)
1263 throw (IOException)
1264 {
1265  if (!out)
1266  throw IOException("SubstitutionMappingTools::writeToFile. Can't write to stream.");
1267  out << "Branches";
1268  out << "\tMean";
1269  for (size_t i = 0; i < substitutions.getNumberOfSites(); i++)
1270  {
1271  out << "\tSite" << sites.getSite(i).getPosition();
1272  }
1273  out << endl;
1274 
1275  for (size_t j = 0; j < substitutions.getNumberOfBranches(); j++)
1276  {
1277  out << substitutions.getNode(j)->getId() << "\t" << substitutions.getNode(j)->getDistanceToFather();
1278  for (size_t i = 0; i < substitutions.getNumberOfSites(); i++)
1279  {
1280  out << "\t" << substitutions(j, i, type);
1281  }
1282  out << endl;
1283  }
1284 }
1285 
1286 /**************************************************************************************************/
1287 
1289 throw (IOException)
1290 {
1291  try
1292  {
1293  DataTable* data = DataTable::read(in, "\t", true, -1);
1294  vector<string> ids = data->getColumn(0);
1295  data->deleteColumn(0); // Remove ids
1296  data->deleteColumn(0); // Remove means
1297  // Now parse the table:
1298  size_t nbSites = data->getNumberOfColumns();
1299  substitutions.setNumberOfSites(nbSites);
1300  size_t nbBranches = data->getNumberOfRows();
1301  for (size_t i = 0; i < nbBranches; i++)
1302  {
1303  int id = TextTools::toInt(ids[i]);
1304  size_t br = substitutions.getNodeIndex(id);
1305  for (size_t j = 0; j < nbSites; j++)
1306  {
1307  substitutions(br, j, type) = TextTools::toDouble((*data)(i, j));
1308  }
1309  }
1310  // Parse the header:
1311  for (size_t i = 0; i < nbSites; i++)
1312  {
1313  string siteTxt = data->getColumnName(i);
1314  int site = 0;
1315  if (siteTxt.substr(0, 4) == "Site")
1316  site = TextTools::to<int>(siteTxt.substr(4));
1317  else
1318  site = TextTools::to<int>(siteTxt);
1319  substitutions.setSitePosition(i, site);
1320  }
1321 
1322  delete data;
1323  }
1324  catch (Exception& e)
1325  {
1326  throw IOException(string("Bad input file. ") + e.what());
1327  }
1328 }
1329 
1330 /**************************************************************************************************/
1331 
1333 {
1334  size_t nbBranches = smap.getNumberOfBranches();
1335  size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1336  Vdouble v(nbBranches);
1337  for (size_t l = 0; l < nbBranches; ++l)
1338  {
1339  v[l] = 0;
1340  for (size_t t = 0; t < nbTypes; ++t)
1341  {
1342  v[l] += smap(l, siteIndex, t);
1343  }
1344  }
1345  return v;
1346 }
1347 
1348 /**************************************************************************************************/
1349 
1351 {
1352  double sumSquare = 0;
1353  for (size_t l = 0; l < smap.getNumberOfBranches(); ++l)
1354  {
1355  double sum = 0;
1356  for (size_t t = 0; t < smap.getNumberOfSubstitutionTypes(); ++t)
1357  {
1358  sum += smap(l, siteIndex, t);
1359  }
1360  sumSquare += sum * sum;
1361  }
1362  return sqrt(sumSquare);
1363 }
1364 
1365 /**************************************************************************************************/
1366 
1367 vector<double> SubstitutionMappingTools::computeSumForBranch(const SubstitutionMapping& smap, size_t branchIndex)
1368 {
1369  size_t nbSites = smap.getNumberOfSites();
1370  size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1371  Vdouble v(nbTypes, 0);
1372  for (size_t i = 0; i < nbSites; ++i)
1373  {
1374  for (size_t t = 0; t < nbTypes; ++t)
1375  {
1376  v[t] += smap(branchIndex, i, t);
1377  }
1378  }
1379  return v;
1380 }
1381 
1382 /**************************************************************************************************/
1383 
1384 vector<double> SubstitutionMappingTools::computeSumForSite(const SubstitutionMapping& smap, size_t siteIndex)
1385 {
1386  size_t nbBranches = smap.getNumberOfBranches();
1387  size_t nbTypes = smap.getNumberOfSubstitutionTypes();
1388  Vdouble v(nbTypes, 0);
1389  for (size_t i = 0; i < nbBranches; ++i)
1390  {
1391  for (size_t t = 0; t < nbTypes; ++t)
1392  {
1393  v[t] += smap(i, siteIndex, t);
1394  }
1395  }
1396  return v;
1397 }
1398 
1399 /**************************************************************************************************/
1401  DRTreeLikelihood& drtl,
1402  const vector<int>& ids,
1403  SubstitutionModel* model,
1404  const SubstitutionRegister& reg,
1405  double threshold,
1406  bool verbose)
1407 {
1408  SubstitutionRegister* reg2 = reg.clone();
1409 
1410  auto_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(model, reg2));
1411 
1412  auto_ptr<ProbabilisticSubstitutionMapping> mapping(SubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, *count, false));
1413 
1414  vector< vector<double> > counts(ids.size());
1415  size_t nbSites = mapping->getNumberOfSites();
1416  size_t nbTypes = mapping->getNumberOfSubstitutionTypes();
1417 
1418  for (size_t k = 0; k < ids.size(); ++k)
1419  {
1420  vector<double> countsf(nbTypes, 0);
1421  vector<double> tmp(nbTypes, 0);
1422  size_t nbIgnored = 0;
1423  bool error = false;
1424  for (size_t i = 0; !error && i < nbSites; ++i)
1425  {
1426  double s = 0;
1427  for (size_t t = 0; t < nbTypes; ++t)
1428  {
1429  tmp[t] = (*mapping)(mapping->getNodeIndex(ids[k]), i, t);
1430  error = isnan(tmp[t]);
1431  if (error)
1432  goto ERROR;
1433  s += tmp[t];
1434  }
1435  if (threshold >= 0)
1436  {
1437  if (s <= threshold)
1438  countsf += tmp;
1439  else
1440  {
1441  nbIgnored++;
1442  }
1443  }
1444  else
1445  {
1446  countsf += tmp;
1447  }
1448  }
1449 
1450 ERROR:
1451  if (error)
1452  {
1453  // We do nothing. This happens for small branches.
1454  if (verbose)
1455  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", counts could not be computed.");
1456  for (size_t t = 0; t < nbTypes; ++t)
1457  {
1458  countsf[t] = 0;
1459  }
1460  }
1461  else
1462  {
1463  if (nbIgnored > 0)
1464  {
1465  if (verbose)
1466  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", " + TextTools::toString(nbIgnored) + " sites (" + TextTools::toString(ceil(static_cast<double>(nbIgnored * 100) / static_cast<double>(nbSites))) + "%) have been ignored because they are presumably saturated.");
1467  }
1468  }
1469 
1470  counts[k].resize(countsf.size());
1471  for (size_t j = 0; j < countsf.size(); ++j)
1472  {
1473  counts[k][j] = countsf[j];
1474  }
1475  }
1476 
1477  return counts;
1478 }
1479 
1480 /**************************************************************************************************/
1481 
1483  DRTreeLikelihood& drtl,
1484  const vector<int>& ids,
1485  const SubstitutionModelSet& modelSet,
1486  const SubstitutionRegister& reg,
1487  double threshold,
1488  bool verbose)
1489 {
1490  SubstitutionRegister* reg2 = reg.clone();
1491 
1492  auto_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(modelSet.getModel(0), reg2));
1493 
1494  auto_ptr<ProbabilisticSubstitutionMapping> mapping(SubstitutionMappingTools::computeSubstitutionVectors(drtl, modelSet, ids, *count, false));
1495 
1496  vector< vector<double> > counts(ids.size());
1497  size_t nbSites = mapping->getNumberOfSites();
1498  size_t nbTypes = mapping->getNumberOfSubstitutionTypes();
1499 
1500  for (size_t k = 0; k < ids.size(); ++k)
1501  {
1502  vector<double> countsf(nbTypes, 0);
1503  vector<double> tmp(nbTypes, 0);
1504  size_t nbIgnored = 0;
1505  bool error = false;
1506  for (size_t i = 0; !error && i < nbSites; ++i)
1507  {
1508  double s = 0;
1509  for (size_t t = 0; t < nbTypes; ++t)
1510  {
1511  tmp[t] = (*mapping)(mapping->getNodeIndex(ids[k]), i, t);
1512  error = isnan(tmp[t]);
1513  if (error)
1514  goto ERROR;
1515  s += tmp[t];
1516  }
1517  if (threshold >= 0)
1518  {
1519  if (s <= threshold)
1520  countsf += tmp;
1521  else
1522  {
1523  nbIgnored++;
1524  }
1525  }
1526  else
1527  {
1528  countsf += tmp;
1529  }
1530  }
1531 
1532 ERROR:
1533  if (error)
1534  {
1535  // We do nothing. This happens for small branches.
1536  if (verbose)
1537  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", counts could not be computed.");
1538  for (size_t t = 0; t < nbTypes; ++t)
1539  {
1540  countsf[t] = 0;
1541  }
1542  }
1543  else
1544  {
1545  if (nbIgnored > 0)
1546  {
1547  if (verbose)
1548  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", " + TextTools::toString(nbIgnored) + " sites (" + TextTools::toString(ceil(static_cast<double>(nbIgnored * 100) / static_cast<double>(nbSites))) + "%) have been ignored because they are presumably saturated.");
1549  }
1550  }
1551 
1552  counts[k].resize(countsf.size());
1553  for (size_t j = 0; j < countsf.size(); ++j)
1554  {
1555  counts[k][j] = countsf[j];
1556  }
1557  }
1558 
1559  return counts;
1560 }
1561 
1562 /**************************************************************************************************/
1563 
1565  DRTreeLikelihood& drtl,
1566  const vector<int>& ids,
1567  const SubstitutionModel* nullModel,
1568  const SubstitutionRegister& reg,
1569  bool verbose)
1570 {
1571  size_t nbTypes = reg.getNumberOfSubstitutionTypes();
1572  size_t nbStates = nullModel->getAlphabet()->getSize();
1573  size_t nbSites = drtl.getNumberOfSites();
1574  vector<int> supportedStates = nullModel->getAlphabetStates();
1575 
1576  // compute the AlphabetIndex for each substitutionType
1577  vector<UserAlphabetIndex1 > usai(nbTypes, UserAlphabetIndex1(nullModel->getAlphabet()));
1578 
1579  for (size_t i = 0; i < nbStates; i++)
1580  {
1581  for (size_t j = 0; j < nbStates; j++)
1582  {
1583  if (i != j)
1584  {
1585  size_t nbt = reg.getType(i, j);
1586  if (nbt != 0)
1587  usai[nbt - 1].setIndex(supportedStates[i], usai[nbt - 1].getIndex(supportedStates[i]) + nullModel->Qij(i, j));
1588  }
1589  }
1590  }
1591 
1592  // compute the normalization for each substitutionType
1593  vector< vector<double> > rewards(ids.size());
1594 
1595  for (size_t k = 0; k < ids.size(); ++k)
1596  {
1597  rewards[k].resize(nbTypes);
1598  }
1599 
1600  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1601  {
1602  auto_ptr<Reward> reward(new DecompositionReward(nullModel, &usai[nbt]));
1603 
1604  auto_ptr<ProbabilisticRewardMapping> mapping(RewardMappingTools::computeRewardVectors(drtl, ids, *reward, false));
1605 
1606  for (size_t k = 0; k < ids.size(); ++k)
1607  {
1608  double s = 0;
1609  for (size_t i = 0; i < nbSites; ++i)
1610  {
1611  double tmp = (*mapping)(k, i);
1612  if (isnan(tmp))
1613  {
1614  if (verbose)
1615  ApplicationTools::displayWarning("On branch " + TextTools::toString(ids[k]) + ", reward for type " + reg.getTypeName(nbt + 1) + " could not be computed.");
1616  s = 0;
1617  break;
1618  }
1619  s += tmp;
1620  }
1621  rewards[k][nbt] = s;
1622  }
1623  reward.release();
1624  mapping.release();
1625  }
1626  return rewards;
1627 }
1628 
1629 /**************************************************************************************************/
1630 
1632  DRTreeLikelihood& drtl,
1633  const vector<int>& ids,
1634  const SubstitutionModelSet* nullModelSet,
1635  const SubstitutionRegister& reg,
1636  bool verbose)
1637 {
1638  size_t nbTypes = reg.getNumberOfSubstitutionTypes();
1639  size_t nbStates = nullModelSet->getAlphabet()->getSize();
1640  size_t nbSites = drtl.getNumberOfSites();
1641  size_t nbModels = nullModelSet->getNumberOfModels();
1642 
1643  // compute the AlphabetIndex for each substitutionType
1644  // compute the normalization for each substitutionType
1645  vector< vector<double> > rewards(ids.size());
1646 
1647  for (size_t k = 0; k < ids.size(); ++k)
1648  {
1649  rewards[k].resize(nbTypes);
1650  }
1651 
1652  vector<UserAlphabetIndex1 > usai(nbTypes, UserAlphabetIndex1(nullModelSet->getAlphabet()));
1653 
1654  for (size_t nbm = 0; nbm < nbModels; nbm++)
1655  {
1656  vector<int> mids = VectorTools::vectorIntersection(ids, nullModelSet->getNodesWithModel(nbm));
1657 
1658  if (mids.size()>0)
1659  {
1660  const SubstitutionModel* modn = nullModelSet->getModel(nbm);
1661  vector<int> supportedStates = modn->getAlphabetStates();
1662 
1663  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1664  for (size_t i = 0; i < nbStates; i++)
1665  usai[nbt].setIndex(supportedStates[i], 0);
1666 
1667  for (size_t i = 0; i < nbStates; i++)
1668  {
1669  for (size_t j = 0; j < nbStates; j++)
1670  {
1671  if (i != j)
1672  {
1673  size_t nbt = reg.getType(i, j);
1674  if (nbt != 0)
1675  usai[nbt - 1].setIndex(supportedStates[i], usai[nbt - 1].getIndex(supportedStates[i]) + modn->Qij(i, j));
1676  }
1677  }
1678  }
1679 
1680  for (size_t nbt = 0; nbt < nbTypes; nbt++)
1681  {
1682  auto_ptr<Reward> reward(new DecompositionReward(nullModelSet->getModel(nbm), &usai[nbt]));
1683 
1684  auto_ptr<ProbabilisticRewardMapping> mapping(RewardMappingTools::computeRewardVectors(drtl, mids, *reward, false));
1685 
1686  for (size_t k = 0; k < mids.size(); k++)
1687  {
1688  double s = 0;
1689  for (size_t i = 0; i < nbSites; ++i)
1690  {
1691  double tmp = (*mapping)(mapping->getNodeIndex(mids[k]), i);
1692  if (isnan(tmp))
1693  {
1694  if (verbose)
1695  ApplicationTools::displayWarning("On branch " + TextTools::toString(mids[k]) + ", reward for type " + reg.getTypeName(nbt + 1) + " could not be computed.");
1696  s = 0;
1697  break;
1698  }
1699  else
1700  s += tmp;
1701  }
1702 
1703  rewards[VectorTools::which(ids, mids[k])][nbt] = s;
1704  }
1705  reward.release();
1706  mapping.release();
1707  }
1708  }
1709  }
1710 
1711  return rewards;
1712 }
1713 
1714 /**************************************************************************************************/
1715 
1717  DRTreeLikelihood& drtl,
1718  const vector<int>& ids,
1719  SubstitutionModel* model,
1720  SubstitutionModel* nullModel,
1721  const SubstitutionRegister& reg,
1722  bool verbose)
1723 {
1724  vector< vector<double> > counts;
1725  vector< vector<double> > factors;
1726 
1727  counts = getCountsPerBranch(drtl, ids, model, reg, -1, verbose);
1728  factors = getNormalizationsPerBranch(drtl, ids, nullModel, reg, verbose);
1729 
1730  size_t nbTypes = counts[0].size();
1731 
1732  for (size_t k = 0; k < ids.size(); ++k)
1733  {
1734  for (size_t t = 0; t < nbTypes; ++t)
1735  {
1736  if (factors[k][t] != 0)
1737  counts[k][t] /= factors[k][t];
1738  }
1739  }
1740 
1741  // Multiply by the lengths of the branches of the input tree
1742 
1743  const TreeTemplate<Node> tree(drtl.getTree());
1744 
1745  for (size_t k = 0; k < ids.size(); ++k)
1746  {
1747  double l = tree.getNode(ids[k])->getDistanceToFather();
1748  for (size_t t = 0; t < nbTypes; ++t)
1749  {
1750  counts[k][t] *= l;
1751  }
1752  }
1753 
1754  return counts;
1755 }
1756 
1757 /**************************************************************************************************/
1758 
1760  DRTreeLikelihood& drtl,
1761  const vector<int>& ids,
1762  SubstitutionModelSet* modelSet,
1763  SubstitutionModelSet* nullModelSet,
1764  const SubstitutionRegister& reg,
1765  bool verbose)
1766 {
1767  vector< vector<double> > counts;
1768  vector< vector<double> > factors;
1769 
1770  counts = getCountsPerBranch(drtl, ids, modelSet->getModel(0), reg, -1, verbose);
1771  factors = getNormalizationsPerBranch(drtl, ids, nullModelSet, reg, verbose);
1772 
1773  size_t nbTypes = counts[0].size();
1774 
1775  for (size_t k = 0; k < ids.size(); ++k)
1776  {
1777  for (size_t t = 0; t < nbTypes; ++t)
1778  {
1779  if (factors[k][t] != 0)
1780  counts[k][t] /= factors[k][t];
1781  }
1782  }
1783 
1784  // Multiply by the lengths of the branches of the input tree
1785 
1786  const TreeTemplate<Node> tree(drtl.getTree());
1787 
1788  for (size_t k = 0; k < ids.size(); ++k)
1789  {
1790  double l = tree.getNode(ids[k])->getDistanceToFather();
1791  for (size_t t = 0; t < nbTypes; ++t)
1792  {
1793  counts[k][t] *= l;
1794  }
1795  }
1796 
1797  return counts;
1798 }
1799 
1800 /**************************************************************************************************/
1801 
1803  DRTreeLikelihood& drtl,
1804  const vector<int>& ids,
1805  SubstitutionModel* model,
1806  const SubstitutionRegister& reg,
1807  bool stationarity,
1808  double threshold)
1809 {
1810  vector< vector<double> > counts = getCountsPerBranch(drtl, ids, model, reg, threshold);
1811 
1812  const CategorySubstitutionRegister* creg;
1813  if (!stationarity)
1814  {
1815  try
1816  {
1817  creg = &dynamic_cast<const CategorySubstitutionRegister&>(reg);
1818  }
1819  catch (Exception& ex)
1820  {
1821  throw Exception("The stationarity option can only be used with a category substitution register.");
1822  }
1823 
1824  size_t nbTypes = counts[0].size();
1825 
1826  for (size_t k = 0; k < ids.size(); ++k)
1827  {
1828  vector<double> freqs = DRTreeLikelihoodTools::getPosteriorStateFrequencies(drtl, ids[k]);
1829  // Compute frequencies for types:
1830  vector<double> freqsTypes(creg->getNumberOfCategories());
1831  for (size_t i = 0; i < freqs.size(); ++i)
1832  {
1833  size_t c = creg->getCategory(i);
1834  freqsTypes[c - 1] += freqs[i];
1835  }
1836 
1837  // We devide the counts by the frequencies and rescale:
1838  double s = VectorTools::sum(counts[k]);
1839  for (size_t t = 0; t < nbTypes; ++t)
1840  {
1841  counts[k][t] /= freqsTypes[creg->getCategoryFrom(t + 1) - 1];
1842  }
1843 
1844  double s2 = VectorTools::sum(counts[k]);
1845  // Scale:
1846  counts[k] = (counts[k] / s2) * s;
1847  }
1848  }
1849 
1850  return counts;
1851 }
1852 
1853 /**************************************************************************************************/
1854 
1856  string& filename,
1857  DRTreeLikelihood& drtl,
1858  const vector<int>& ids,
1859  SubstitutionModel* model,
1860  const SubstitutionRegister& reg)
1861 {
1862  auto_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(model, reg.clone()));
1863  auto_ptr<ProbabilisticSubstitutionMapping> smap(SubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, *count, false));
1864 
1865  ofstream file;
1866  file.open(filename.c_str());
1867 
1868  size_t nbSites = smap->getNumberOfSites();
1869  size_t nbBr = ids.size();
1870 
1871  vector<size_t> sdi(nbBr); // reverse of ids
1872  for (size_t i = 0; i < nbBr; ++i)
1873  {
1874  sdi[i] = smap->getNodeIndex(ids[i]);
1875  }
1876 
1877  file << "sites";
1878  for (size_t i = 0; i < nbBr; ++i)
1879  {
1880  file << "\t" << i;
1881  }
1882  file << endl;
1883 
1884  for (size_t k = 0; k < nbSites; ++k)
1885  {
1886  vector<double> countsf = SubstitutionMappingTools::computeTotalSubstitutionVectorForSite(*smap, k);
1887  file << k;
1888  for (size_t i = 0; i < nbBr; ++i)
1889  {
1890  file << "\t" << countsf[sdi[i]];
1891  }
1892  file << endl;
1893  }
1894  file.close();
1895 }
1896 
1897 /**************************************************************************************************/
1898 
1900  const string& filenamePrefix,
1901  DRTreeLikelihood& drtl,
1902  const vector<int>& ids,
1903  SubstitutionModel* model,
1904  const SubstitutionRegister& reg)
1905 {
1906  auto_ptr<SubstitutionCount> count(new UniformizationSubstitutionCount(model, reg.clone()));
1907  auto_ptr<ProbabilisticSubstitutionMapping> smap(SubstitutionMappingTools::computeSubstitutionVectors(drtl, ids, *count, false));
1908 
1909  ofstream file;
1910 
1911  size_t nbSites = smap->getNumberOfSites();
1912  size_t nbBr = ids.size();
1913 
1914  for (size_t i = 0; i < reg.getNumberOfSubstitutionTypes(); ++i)
1915  {
1916  string path = filenamePrefix + TextTools::toString(i + 1) + string(".count");
1917  ApplicationTools::displayResult(string("Output counts of type ") + TextTools::toString(i + 1) + string(" to file"), path);
1918  file.open(path.c_str());
1919 
1920  file << "sites";
1921  for (size_t k = 0; k < nbBr; ++k)
1922  {
1923  file << "\t" << k;
1924  }
1925  file << endl;
1926 
1927  for (size_t j = 0; j < nbSites; ++j)
1928  {
1929  file << j;
1930  for (size_t k = 0; k < nbBr; ++k)
1931  {
1932  file << "\t" << (*smap)(smap->getNodeIndex(ids[k]), j, i);
1933  }
1934  file << endl;
1935  }
1936  file.close();
1937  }
1938 }
1939 
1940 /**************************************************************************************************/
static ProbabilisticSubstitutionMapping * computeSubstitutionVectorsNoAveraging(const DRTreeLikelihood &drtl, SubstitutionCount &substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
A pair of SubstitutionModel / SiteIterator.
Likelihood ancestral states reconstruction: marginal method.
Substitution models manager for non-homogeneous / non-reversible models of evolution.
Interface for all substitution models.
static std::vector< double > computeSumForBranch(const SubstitutionMapping &smap, size_t branchIndex)
Sum all substitutions for each type of a given branch (specified by its index).
virtual const SubstitutionModel * getModel() const =0
const SubstitutionModel * getModel(size_t i) const
Get one model from the set knowing its index.
virtual const std::vector< int > & getAlphabetStates() const =0
std::map< int, std::vector< size_t > > getAllAncestralStates() const
Get all ancestral states for all nodes.
virtual SiteIterator * getNewSiteIterator() const =0
static double computeNormForSite(const SubstitutionMapping &smap, size_t siteIndex)
Compute the norm of a substitution vector for a given position (specified by its index).
virtual SubstitutionRegister * clone() const =0
Data storage class for probabilistic substitution mappings.
General interface for storing mapping data.
The SubstitutionRegister interface.
virtual const Node * getSon(size_t pos) const
Definition: Node.h:395
virtual const Alphabet * getAlphabet() const =0
STL namespace.
Analytical reward using the eigen decomposition method.
The phylogenetic tree class.
static ProbabilisticSubstitutionMapping * computeSubstitutionVectorsMarginal(const DRTreeLikelihood &drtl, SubstitutionCount &substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
virtual size_t getCategoryFrom(size_t type) const
static ProbabilisticRewardMapping * computeRewardVectors(const DRTreeLikelihood &drtl, const std::vector< int > &nodeIds, Reward &reward, bool verbose=true)
Compute the reward vectors for a particular dataset using the double-recursive likelihood computation...
virtual const Node * getFather() const
Get the father of this node is there is one.
Definition: Node.h:339
virtual size_t getNumberOfBranches() const =0
static std::vector< std::vector< double > > getRelativeCountsPerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, bool stationarity=true, double threshold=-1)
Returns the counts relative to the frequency of the states in case of non-stationarity.
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:379
Gather states into defined categories, and count the changes between categories.
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
Definition: Node.h:283
const Alphabet * getAlphabet() const
virtual size_t getNumberOfSites() const =0
Get the number of sites in the dataset.
virtual int getId() const
Get the node&#39;s id.
Definition: Node.h:203
static ProbabilisticSubstitutionMapping * computeSubstitutionVectorsNoAveragingMarginal(const DRTreeLikelihood &drtl, SubstitutionCount &substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static void outputIndividualCountsPerBranchPerSite(const std::string &filenamePrefix, DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg)
Output individual counts par branch per site, in files.
virtual double Qij(size_t i, size_t j) const =0
virtual const Tree & getTree() const =0
Get the tree (topology and branch lengths).
static std::vector< std::vector< double > > getNormalizedCountsPerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, SubstitutionModel *nullModel, const SubstitutionRegister &reg, bool verbose=true)
Returns the counts normalized by a null model.
virtual ConstBranchModelDescription * next()=0
Analytical (weighted) substitution count using the uniformization method.
The SubstitutionsCount interface.
static std::vector< double > computeSumForSite(const SubstitutionMapping &smap, size_t siteIndex)
Sum all substitutions for each type of a given site (specified by its index).
virtual size_t getCategory(size_t state) const
The phylogenetic node class.
Definition: Node.h:90
virtual size_t getNumberOfSubstitutionTypes() const =0
static std::vector< double > computeTotalSubstitutionVectorForSite(const SubstitutionMapping &smap, size_t siteIndex)
Sum all type of substitutions for each branch of a given position (specified by its index)...
virtual size_t getType(size_t fromState, size_t toState) const =0
Get the substitution type far a given pair of model states.
virtual std::string getTypeName(size_t type) const =0
Get the name of a given substitution type.
const std::vector< int > & getNodesWithModel(size_t i) const
Get a list of nodes id for which the given model is associated.
static std::vector< std::vector< double > > getNormalizationsPerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, const SubstitutionModel *nullModel, const SubstitutionRegister &reg, bool verbose=true)
Returns the normalization factors due to the null model on each branch, for each register.
virtual size_t getNodeIndex(int nodeId) const
Definition: Mapping.h:230
virtual size_t getNumberOfSons() const
Definition: Node.h:388
Interface for double-recursive (DR) implementation of the likelihood computation. ...
virtual size_t getNumberOfSites() const =0
size_t getNumberOfSites() const
Definition: Mapping.h:208
static void readFromStream(std::istream &in, ProbabilisticSubstitutionMapping &substitutions, size_t type)
Read the substitutions vectors from a stream.
static Vdouble getPosteriorStateFrequencies(const DRTreeLikelihood &drl, int nodeId)
Compute the posterior probabilities for each state for a given node.
static VVVdouble getPosteriorProbabilitiesForEachStateForEachRate(const DRTreeLikelihood &drl, int nodeId)
Compute the posterior probabilities for each state and each rate of each distinct site...
static std::vector< std::vector< double > > getCountsPerBranch(DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg, double threshold=-1, bool verbose=true)
Returns the counts on each branch.
virtual size_t getNumberOfSubstitutionTypes() const =0
virtual N * getNode(int id, bool checkId=false)
Definition: TreeTemplate.h:419
static void outputTotalCountsPerBranchPerSite(std::string &filename, DRTreeLikelihood &drtl, const std::vector< int > &ids, SubstitutionModel *model, const SubstitutionRegister &reg)
Output the sum of the counts par branch per site, in a file.
static void writeToStream(const ProbabilisticSubstitutionMapping &substitutions, const SiteContainer &sites, size_t type, std::ostream &out)
Write the substitutions vectors to a stream.
virtual std::vector< const N * > getNodes() const
Definition: TreeTemplate.h:411
static ProbabilisticSubstitutionMapping * computeSubstitutionVectors(const DRTreeLikelihood &drtl, SubstitutionCount &substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...