bpp-phyl  2.2.0
DRASRTreeLikelihoodData.cpp
Go to the documentation of this file.
1 //
2 // File: DRASRTreeLikelihoodData.cpp
3 // Created by: Julien Dutheil
4 // Created on: Sat Dec 30 14:20 2006
5 // From file HomogeneousTreeLikelihood.cpp
6 //
7 
8 /*
9  Copyright or © or Copr. CNRS, (November 16, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for phylogenetic data analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39  */
40 
42 #include "../PatternTools.h"
43 
44 // From SeqLib:
45 #include <Bpp/Seq/SiteTools.h>
46 #include <Bpp/Seq/Container/AlignedSequenceContainer.h>
47 #include <Bpp/Seq/Container/SequenceContainerTools.h>
48 #include <Bpp/Seq/Container/VectorSiteContainer.h>
49 
50 using namespace bpp;
51 
52 /******************************************************************************/
53 
54 void DRASRTreeLikelihoodData::initLikelihoods(const SiteContainer& sites, const SubstitutionModel& model)
55 throw (Exception)
56 {
57  if (sites.getNumberOfSequences() == 1)
58  throw Exception("Error, only 1 sequence!");
59  if (sites.getNumberOfSequences() == 0)
60  throw Exception("Error, no sequence!");
61  if (sites.getAlphabet()->getAlphabetType()
62  != model.getAlphabet()->getAlphabetType())
63  throw AlphabetMismatchException("DRASDRTreeLikelihoodData::initLikelihoods. Data and model must have the same alphabet type.",
64  sites.getAlphabet(),
65  model.getAlphabet());
66  alphabet_ = sites.getAlphabet();
67  nbStates_ = model.getNumberOfStates();
68  nbSites_ = sites.getNumberOfSites();
69  if (shrunkData_)
70  delete shrunkData_;
71  SitePatterns* patterns;
72  if (usePatterns_)
73  {
74  patterns = initLikelihoodsWithPatterns(tree_->getRootNode(), sites, model);
75  shrunkData_ = patterns->getSites();
76  rootWeights_ = patterns->getWeights();
77  rootPatternLinks_ = patterns->getIndices();
78  nbDistinctSites_ = shrunkData_->getNumberOfSites();
79  }
80  else
81  {
82  patterns = new SitePatterns(&sites);
83  shrunkData_ = patterns->getSites();
84  rootWeights_ = patterns->getWeights();
85  rootPatternLinks_ = patterns->getIndices();
86  nbDistinctSites_ = shrunkData_->getNumberOfSites();
87  initLikelihoods(tree_->getRootNode(), *shrunkData_, model);
88  }
89  delete patterns;
90 }
91 
92 /******************************************************************************/
93 
94 void DRASRTreeLikelihoodData::initLikelihoods(const Node* node, const SiteContainer& sequences, const SubstitutionModel& model) throw (Exception)
95 {
96  // Initialize likelihood vector:
97  DRASRTreeLikelihoodNodeData* nodeData = &nodeData_[node->getId()];
98  nodeData->setNode(node);
99  VVVdouble* _likelihoods_node = &nodeData->getLikelihoodArray();
100  VVVdouble* _dLikelihoods_node = &nodeData->getDLikelihoodArray();
101  VVVdouble* _d2Likelihoods_node = &nodeData->getD2LikelihoodArray();
102 
103  _likelihoods_node->resize(nbDistinctSites_);
104  _dLikelihoods_node->resize(nbDistinctSites_);
105  _d2Likelihoods_node->resize(nbDistinctSites_);
106 
107  for (size_t i = 0; i < nbDistinctSites_; i++)
108  {
109  VVdouble* _likelihoods_node_i = &(*_likelihoods_node)[i];
110  VVdouble* _dLikelihoods_node_i = &(*_dLikelihoods_node)[i];
111  VVdouble* _d2Likelihoods_node_i = &(*_d2Likelihoods_node)[i];
112  _likelihoods_node_i->resize(nbClasses_);
113  _dLikelihoods_node_i->resize(nbClasses_);
114  _d2Likelihoods_node_i->resize(nbClasses_);
115  for (size_t c = 0; c < nbClasses_; c++)
116  {
117  Vdouble* _likelihoods_node_i_c = &(*_likelihoods_node_i)[c];
118  Vdouble* _dLikelihoods_node_i_c = &(*_dLikelihoods_node_i)[c];
119  Vdouble* _d2Likelihoods_node_i_c = &(*_d2Likelihoods_node_i)[c];
120  _likelihoods_node_i_c->resize(nbStates_);
121  _dLikelihoods_node_i_c->resize(nbStates_);
122  _d2Likelihoods_node_i_c->resize(nbStates_);
123  for (size_t s = 0; s < nbStates_; s++)
124  {
125  (*_likelihoods_node_i_c)[s] = 1; // All likelihoods are initialized to 1.
126  (*_dLikelihoods_node_i_c)[s] = 0; // All dLikelihoods are initialized to 0.
127  (*_d2Likelihoods_node_i_c)[s] = 0; // All d2Likelihoods are initialized to 0.
128  }
129  }
130  }
131 
132  // Now initialize likelihood values and pointers:
133 
134  if (node->isLeaf())
135  {
136  const Sequence* seq;
137  try
138  {
139  seq = &sequences.getSequence(node->getName());
140  }
141  catch (SequenceNotFoundException snfe)
142  {
143  throw SequenceNotFoundException("DRASRTreeLikelihoodData::initTreelikelihoods. Leaf name in tree not found in site conainer: ", (node->getName()));
144  }
145  for (size_t i = 0; i < nbDistinctSites_; i++)
146  {
147  VVdouble* _likelihoods_node_i = &(*_likelihoods_node)[i];
148  int state = seq->getValue(i);
149  for (size_t c = 0; c < nbClasses_; c++)
150  {
151  Vdouble* _likelihoods_node_i_c = &(*_likelihoods_node_i)[c];
152  double test = 0.;
153  for (size_t s = 0; s < nbStates_; s++)
154  {
155  // Leaves likelihood are set to 1 if the char correspond to the site in the sequence,
156  // otherwise value set to 0:
157  // cout << "i=" << i << "\tc=" << c << "\ts=" << s << endl;
158  (*_likelihoods_node_i_c)[s] = model.getInitValue(s, state);
159  test += (*_likelihoods_node_i_c)[s];
160  }
161  if (test < 0.000001)
162  std::cerr << "WARNING!!! Likelihood will be 0 for this site." << std::endl;
163  }
164  }
165  }
166  else
167  {
168  // 'node' is an internal node.
169  std::map<int, std::vector<size_t> >* patternLinks__node = &patternLinks_[node->getId()];
170  size_t nbSonNodes = node->getNumberOfSons();
171  for (size_t l = 0; l < nbSonNodes; l++)
172  {
173  // For each son node,
174  const Node* son = (*node)[static_cast<int>(l)];
175  initLikelihoods(son, sequences, model);
176  std::vector<size_t>* patternLinks__node_son = &(*patternLinks__node)[son->getId()];
177 
178  // Init map:
179  patternLinks__node_son->resize(nbDistinctSites_);
180 
181  for (size_t i = 0; i < nbDistinctSites_; i++)
182  {
183  (*patternLinks__node_son)[i] = i;
184  }
185  }
186  }
187 }
188 
189 /******************************************************************************/
190 
191 SitePatterns* DRASRTreeLikelihoodData::initLikelihoodsWithPatterns(const Node* node, const SiteContainer& sequences, const SubstitutionModel& model) throw (Exception)
192 {
193  SiteContainer* tmp = PatternTools::getSequenceSubset(sequences, *node);
194  SitePatterns* patterns = new SitePatterns(tmp, true);
195  SiteContainer* subSequences = patterns->getSites();
196 
197  size_t nbSites = subSequences->getNumberOfSites();
198 
199  // Initialize likelihood vector:
200  DRASRTreeLikelihoodNodeData* nodeData = &nodeData_[node->getId()];
201  nodeData->setNode(node);
202  VVVdouble* _likelihoods_node = &nodeData->getLikelihoodArray();
203  VVVdouble* _dLikelihoods_node = &nodeData->getDLikelihoodArray();
204  VVVdouble* _d2Likelihoods_node = &nodeData->getD2LikelihoodArray();
205  _likelihoods_node->resize(nbSites);
206  _dLikelihoods_node->resize(nbSites);
207  _d2Likelihoods_node->resize(nbSites);
208 
209  for (size_t i = 0; i < nbSites; i++)
210  {
211  VVdouble* _likelihoods_node_i = &(*_likelihoods_node)[i];
212  VVdouble* _dLikelihoods_node_i = &(*_dLikelihoods_node)[i];
213  VVdouble* _d2Likelihoods_node_i = &(*_d2Likelihoods_node)[i];
214  _likelihoods_node_i->resize(nbClasses_);
215  _dLikelihoods_node_i->resize(nbClasses_);
216  _d2Likelihoods_node_i->resize(nbClasses_);
217  for (size_t c = 0; c < nbClasses_; c++)
218  {
219  Vdouble* _likelihoods_node_i_c = &(*_likelihoods_node_i)[c];
220  Vdouble* _dLikelihoods_node_i_c = &(*_dLikelihoods_node_i)[c];
221  Vdouble* _d2Likelihoods_node_i_c = &(*_d2Likelihoods_node_i)[c];
222  _likelihoods_node_i_c->resize(nbStates_);
223  _dLikelihoods_node_i_c->resize(nbStates_);
224  _d2Likelihoods_node_i_c->resize(nbStates_);
225  for (size_t s = 0; s < nbStates_; s++)
226  {
227  (*_likelihoods_node_i_c)[s] = 1; // All likelihoods are initialized to 1.
228  (*_dLikelihoods_node_i_c)[s] = 0; // All dLikelihoods are initialized to 0.
229  (*_d2Likelihoods_node_i_c)[s] = 0; // All d2Likelihoods are initialized to 0.
230  }
231  }
232  }
233 
234  // Now initialize likelihood values and pointers:
235 
236  if (node->isLeaf())
237  {
238  const Sequence* seq;
239  try
240  {
241  seq = &subSequences->getSequence(node->getName());
242  }
243  catch (SequenceNotFoundException snfe)
244  {
245  throw SequenceNotFoundException("HomogeneousTreeLikelihood::initTreelikelihoodsWithPatterns. Leaf name in tree not found in site conainer: ", (node->getName()));
246  }
247  for (size_t i = 0; i < nbSites; i++)
248  {
249  VVdouble* _likelihoods_node_i = &(*_likelihoods_node)[i];
250  int state = seq->getValue(i);
251  for (size_t c = 0; c < nbClasses_; c++)
252  {
253  Vdouble* _likelihoods_node_i_c = &(*_likelihoods_node_i)[c];
254  double test = 0.;
255  for (size_t s = 0; s < nbStates_; s++)
256  {
257  // Leaves likelihood are set to 1 if the char correspond to the site in the sequence,
258  // otherwise value set to 0:
259  // cout << "i=" << i << "\tc=" << c << "\ts=" << s << endl;
260  (*_likelihoods_node_i_c)[s] = model.getInitValue(s, state);
261  test += (*_likelihoods_node_i_c)[s];
262  }
263  if (test < 0.000001)
264  std::cerr << "WARNING!!! Likelihood will be 0 for this site." << std::endl;
265  }
266  }
267  }
268  else
269  {
270  // 'node' is an internal node.
271  std::map<int, std::vector<size_t> >* patternLinks__node = &patternLinks_[node->getId()];
272 
273  // Now initialize pattern links:
274  size_t nbSonNodes = node->getNumberOfSons();
275  for (int l = 0; l < static_cast<int>(nbSonNodes); l++)
276  {
277  // For each son node,
278  const Node* son = (*node)[l];
279 
280  std::vector<size_t>* patternLinks__node_son = &(*patternLinks__node)[son->getId()];
281 
282  // Initialize subtree 'l' and retrieves corresponding subSequences:
283  SitePatterns* subPatterns = initLikelihoodsWithPatterns(son, *subSequences, model);
284  (*patternLinks__node_son) = subPatterns->getIndices();
285  delete subPatterns;
286  }
287  }
288  delete subSequences;
289  return patterns;
290 }
291 
292 /******************************************************************************/
293 
Interface for all substitution models.
const std::vector< size_t > & getIndices() const
Definition: SitePatterns.h:175
void setNode(const Node *node)
Set the node associated to this data.
const std::vector< unsigned int > & getWeights() const
Definition: SitePatterns.h:171
virtual int getId() const
Get the node&#39;s id.
Definition: Node.h:203
static SiteContainer * getSequenceSubset(const SiteContainer &sequenceSet, const Node &node)
Extract the sequences corresponding to a given subtree.
SiteContainer * getSites() const
void initLikelihoods(const SiteContainer &sites, const SubstitutionModel &model)
The phylogenetic node class.
Definition: Node.h:90
Data structure for site patterns.
Definition: SitePatterns.h:69
Likelihood data structure for a node.
virtual SitePatterns * initLikelihoodsWithPatterns(const Node *node, const SiteContainer &sequences, const SubstitutionModel &model)
This method initializes the leaves according to a sequence file.