bpp-phyl  2.2.0
MarginalAncestralStateReconstruction.cpp
Go to the documentation of this file.
1 //
2 // File: MarginalAncestralStateReconstruction.cpp
3 // Created by: Julien Dutheil
4 // Created on: Fri Jul 08 13:32 2005
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
41 #include <Bpp/Numeric/VectorTools.h>
42 #include <Bpp/Numeric/Random/RandomTools.h>
43 
44 using namespace bpp;
45 using namespace std;
46 
47 vector<size_t> MarginalAncestralStateReconstruction::getAncestralStatesForNode(int nodeId, VVdouble& probs, bool sample) const
48 {
49  vector<size_t> ancestors(nbDistinctSites_);
50  probs.resize(nbDistinctSites_);
51  double cumProb = 0;
52  double r;
53  if (likelihood_->getTree().isLeaf(nodeId))
54  {
55  VVdouble larray = likelihood_->getLikelihoodData()->getLeafLikelihoods(nodeId);
56  for (size_t i = 0; i < nbDistinctSites_; ++i)
57  {
58  Vdouble* probs_i = &probs[i];
59  probs_i->resize(nbStates_);
60  size_t j = VectorTools::whichMax(larray[i]);
61  ancestors[i] = j;
62  (*probs_i)[j] = 1.;
63  }
64  }
65  else
66  {
67  VVVdouble larray;
68 
69  likelihood_->computeLikelihoodAtNode(nodeId, larray);
70  for (size_t i = 0; i < nbDistinctSites_; i++)
71  {
72  VVdouble* larray_i = &larray[i];
73  Vdouble* probs_i = &probs[i];
74  probs_i->resize(nbStates_);
75  for (size_t c = 0; c < nbClasses_; c++)
76  {
77  Vdouble* larray_i_c = &(*larray_i)[c];
78  for (size_t x = 0; x < nbStates_; x++)
79  {
80  (*probs_i)[x] += (*larray_i_c)[x] * r_[c] / l_[i];
81  }
82  }
83  if (sample)
84  {
85  cumProb = 0;
86  r = RandomTools::giveRandomNumberBetweenZeroAndEntry(1.);
87  for (size_t j = 0; j < nbStates_; j++)
88  {
89  cumProb += (*probs_i)[j];
90  if (r <= cumProb)
91  {
92  ancestors[i] = j;
93  break;
94  }
95  }
96  }
97  else
98  ancestors[i] = VectorTools::whichMax(*probs_i);
99  }
100  }
101  return ancestors;
102 }
103 
105 {
106  map<int, vector<size_t> > ancestors;
107  // Clone the data into a AlignedSequenceContainer for more efficiency:
108  AlignedSequenceContainer* data = new AlignedSequenceContainer(*likelihood_->getLikelihoodData()->getShrunkData());
109  recursiveMarginalAncestralStates(tree_.getRootNode(), ancestors, *data);
110  delete data;
111  return ancestors;
112 }
113 
114 Sequence* MarginalAncestralStateReconstruction::getAncestralSequenceForNode(int nodeId, VVdouble* probs, bool sample) const
115 {
116  string name = tree_.hasNodeName(nodeId) ? tree_.getNodeName(nodeId) : ("" + TextTools::toString(nodeId));
117  const vector<size_t>* rootPatternLinks = &likelihood_->getLikelihoodData()->getRootArrayPositions();
118  const SubstitutionModel* model = likelihood_->getSubstitutionModel(tree_.getNodesId()[0], 0); // We assume all nodes have a model with the same number of states.
119  vector<size_t> states;
120  vector<int> allStates(nbSites_);
121  VVdouble patternedProbs;
122  if (probs)
123  {
124  states = getAncestralStatesForNode(nodeId, patternedProbs, sample);
125  probs->resize(nbSites_);
126  for (size_t i = 0; i < nbSites_; i++)
127  {
128  allStates[i] = model->getAlphabetStateAsInt(states[(*rootPatternLinks)[i]]);
129  (*probs)[i] = patternedProbs[(*rootPatternLinks)[i]];
130  }
131  }
132  else
133  {
134  states = getAncestralStatesForNode(nodeId, patternedProbs, sample);
135  for (size_t i = 0; i < nbSites_; i++)
136  {
137  allStates[i] = model->getAlphabetStateAsInt(states[(*rootPatternLinks)[i]]);
138  }
139  }
140  return new BasicSequence(name, allStates, alphabet_);
141 }
142 
144  const Node* node,
145  map<int, vector<size_t> >& ancestors,
146  AlignedSequenceContainer& data) const
147 {
148  if (node->isLeaf())
149  {
150  vector<int> content = data.getContent(node->getName());
151  vector<size_t>* v = &ancestors[node->getId()];
152  v->resize(content.size());
153  // This is a tricky way to store the real sequence as an ancestral one...
154  // In case of Markov Modulated models, we consider that the real sequences
155  // Are all in the first category.
156  const SubstitutionModel* model = likelihood_->getSubstitutionModel(tree_.getNodesId()[0], 0); // We assume all nodes have a model with the same number of states.
157  for (size_t i = 0; i < content.size(); i++)
158  {
159  (*v)[i] = model->getModelStates(content[i])[0];
160  }
161  }
162  else
163  {
164  ancestors[node->getId()] = getAncestralStatesForNode(node->getId());
165  for (size_t i = 0; i < node->getNumberOfSons(); i++)
166  {
167  recursiveMarginalAncestralStates(node->getSon(i), ancestors, data);
168  }
169  }
170 }
171 
172 AlignedSequenceContainer* MarginalAncestralStateReconstruction::getAncestralSequences(bool sample) const
173 {
174  AlignedSequenceContainer* asc = new AlignedSequenceContainer(alphabet_);
175  vector<int> ids = tree_.getInnerNodesId();
176  for (size_t i = 0; i < ids.size(); i++)
177  {
178  Sequence* seq = getAncestralSequenceForNode(ids[i], NULL, sample);
179  asc->addSequence(*seq);
180  delete seq;
181  }
182  return asc;
183 }
184 
Interface for all substitution models.
virtual std::vector< size_t > getModelStates(int code) const =0
Get the state in the model corresponding to a particular state in the alphabet.
virtual int getAlphabetStateAsInt(size_t index) const =0
virtual std::string getName() const
Get the name associated to this node, if there is one, otherwise throw a NodeException.
Definition: Node.h:236
std::map< int, std::vector< size_t > > getAllAncestralStates() const
Get all ancestral states for all nodes.
virtual const Node * getSon(size_t pos) const
Definition: Node.h:395
STL namespace.
void recursiveMarginalAncestralStates(const Node *node, std::map< int, std::vector< size_t > > &ancestors, AlignedSequenceContainer &data) const
AlignedSequenceContainer * getAncestralSequences() const
Get all the ancestral sequences for all nodes.
virtual bool isLeaf() const
Definition: Node.h:692
virtual int getId() const
Get the node&#39;s id.
Definition: Node.h:203
The phylogenetic node class.
Definition: Node.h:90
virtual size_t getNumberOfSons() const
Definition: Node.h:388
Sequence * getAncestralSequenceForNode(int nodeId, VVdouble *probs, bool sample) const
Get the ancestral sequence for a given node.
std::vector< size_t > getAncestralStatesForNode(int nodeId, VVdouble &probs, bool sample) const
Get ancestral states for a given node as a vector of int.