bpp-phyl  2.2.0
DRASDRTreeLikelihoodData.cpp
Go to the documentation of this file.
1 //
2 // File: DRASDRTreeLikelihoodData.cpp
3 // Created by: Julien Dutheil
4 // Created on: Sat Dec 30 14:20 2006
5 // From file DRHomogeneousTreeLikelihood.cpp
6 //
7 
8 /*
9  Copyright or © or Copr. CNRS, (November 16, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for phylogenetic data analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39  */
40 
42 #include "../PatternTools.h"
43 
44 // From SeqLib:
45 #include <Bpp/Seq/SiteTools.h>
46 
47 using namespace bpp;
48 
49 /******************************************************************************/
50 
51 void DRASDRTreeLikelihoodData::initLikelihoods(const SiteContainer& sites, const SubstitutionModel& model) throw (Exception)
52 {
53  if (sites.getNumberOfSequences() == 1)
54  throw Exception("Error, only 1 sequence!");
55  if (sites.getNumberOfSequences() == 0)
56  throw Exception("Error, no sequence!");
57  if (sites.getAlphabet()->getAlphabetType()
58  != model.getAlphabet()->getAlphabetType())
59  throw AlphabetMismatchException("DRASDRTreeLikelihoodData::initLikelihoods. Data and model must have the same alphabet type.",
60  sites.getAlphabet(),
61  model.getAlphabet());
62  alphabet_ = sites.getAlphabet();
63  nbStates_ = model.getNumberOfStates();
64  nbSites_ = sites.getNumberOfSites();
65 
66  SitePatterns pattern(&sites);
67  if (shrunkData_)
68  delete shrunkData_;
69  shrunkData_ = pattern.getSites();
70  rootWeights_ = pattern.getWeights();
71  rootPatternLinks_ = pattern.getIndices();
72  nbDistinctSites_ = shrunkData_->getNumberOfSites();
73 
74  // Init data:
75  // Clone data for more efficiency on sequences access:
76  const SiteContainer* sequences = new AlignedSequenceContainer(*shrunkData_);
77  initLikelihoods(tree_->getRootNode(), *sequences, model);
78  delete sequences;
79 
80  // Now initialize root likelihoods and derivatives:
81  rootLikelihoods_.resize(nbDistinctSites_);
82  rootLikelihoodsS_.resize(nbDistinctSites_);
83  rootLikelihoodsSR_.resize(nbDistinctSites_);
84  for (size_t i = 0; i < nbDistinctSites_; i++)
85  {
86  VVdouble* rootLikelihoods_i_ = &rootLikelihoods_[i];
87  Vdouble* rootLikelihoodsS_i_ = &rootLikelihoodsS_[i];
88  rootLikelihoods_i_->resize(nbClasses_);
89  rootLikelihoodsS_i_->resize(nbClasses_);
90  for (size_t c = 0; c < nbClasses_; c++)
91  {
92  Vdouble* rootLikelihoods_i_c_ = &(*rootLikelihoods_i_)[c];
93  rootLikelihoods_i_c_->resize(nbStates_);
94  for (size_t x = 0; x < nbStates_; x++)
95  {
96  (*rootLikelihoods_i_c_)[x] = 1.;
97  }
98  }
99  }
100 }
101 
102 /******************************************************************************/
103 
104 void DRASDRTreeLikelihoodData::initLikelihoods(const Node* node, const SiteContainer& sites, const SubstitutionModel& model) throw (Exception)
105 {
106  if (node->isLeaf())
107  {
108  // Init leaves likelihoods:
109  const Sequence* seq;
110  try
111  {
112  seq = &sites.getSequence(node->getName());
113  }
114  catch (SequenceNotFoundException& snfe)
115  {
116  throw SequenceNotFoundException("DRASDRTreeLikelihoodData::initlikelihoods. Leaf name in tree not found in site container: ", (node->getName()));
117  }
118  DRASDRTreeLikelihoodLeafData* leafData = &leafData_[node->getId()];
119  VVdouble* leavesLikelihoods_leaf = &leafData->getLikelihoodArray();
120  leafData->setNode(node);
121  leavesLikelihoods_leaf->resize(nbDistinctSites_);
122  for (size_t i = 0; i < nbDistinctSites_; i++)
123  {
124  Vdouble* leavesLikelihoods_leaf_i = &(*leavesLikelihoods_leaf)[i];
125  leavesLikelihoods_leaf_i->resize(nbStates_);
126  int state = seq->getValue(i);
127  double test = 0.;
128  for (size_t s = 0; s < nbStates_; s++)
129  {
130  // Leaves likelihood are set to 1 if the char correspond to the site in the sequence,
131  // otherwise value set to 0:
132  ( *leavesLikelihoods_leaf_i)[s] = model.getInitValue(s, state);
133  test += ( *leavesLikelihoods_leaf_i)[s];
134  }
135  if (test < 0.000001)
136  std::cerr << "WARNING!!! Likelihood will be 0 for this site." << std::endl;
137  }
138  }
139 
140  // We initialize each son node first:
141  size_t nbSonNodes = node->getNumberOfSons();
142  for (size_t l = 0; l < nbSonNodes; l++)
143  {
144  // For each son node,
145  initLikelihoods(node->getSon(l), sites, model);
146  }
147 
148  // Initialize likelihood vector:
149  DRASDRTreeLikelihoodNodeData* nodeData = &nodeData_[node->getId()];
150  std::map<int, VVVdouble>* likelihoods_node_ = &nodeData->getLikelihoodArrays();
151  nodeData->setNode(node);
152 
153  int nbSons = static_cast<int>(node->getNumberOfSons());
154 
155  for (int n = (node->hasFather() ? -1 : 0); n < nbSons; n++)
156  {
157  const Node* neighbor = (*node)[n];
158  VVVdouble* likelihoods_node_neighbor_ = &(*likelihoods_node_)[neighbor->getId()];
159 
160  likelihoods_node_neighbor_->resize(nbDistinctSites_);
161 
162  if (neighbor->isLeaf())
163  {
164  VVdouble* leavesLikelihoods_leaf_ = &leafData_[neighbor->getId()].getLikelihoodArray();
165  for (size_t i = 0; i < nbDistinctSites_; i++)
166  {
167  Vdouble* leavesLikelihoods_leaf_i_ = &(*leavesLikelihoods_leaf_)[i];
168  VVdouble* likelihoods_node_neighbor_i_ = &(*likelihoods_node_neighbor_)[i];
169  likelihoods_node_neighbor_i_->resize(nbClasses_);
170  for (size_t c = 0; c < nbClasses_; c++)
171  {
172  Vdouble* likelihoods_node_neighbor_i_c_ = &(*likelihoods_node_neighbor_i_)[c];
173  likelihoods_node_neighbor_i_c_->resize(nbStates_);
174  for (size_t s = 0; s < nbStates_; s++)
175  {
176  (*likelihoods_node_neighbor_i_c_)[s] = (*leavesLikelihoods_leaf_i_)[s];
177  }
178  }
179  }
180  }
181  else
182  {
183  for (size_t i = 0; i < nbDistinctSites_; i++)
184  {
185  VVdouble* likelihoods_node_neighbor_i_ = &(*likelihoods_node_neighbor_)[i];
186  likelihoods_node_neighbor_i_->resize(nbClasses_);
187  for (size_t c = 0; c < nbClasses_; c++)
188  {
189  Vdouble* likelihoods_node_neighbor_i_c_ = &(*likelihoods_node_neighbor_i_)[c];
190  likelihoods_node_neighbor_i_c_->resize(nbStates_);
191  for (size_t s = 0; s < nbStates_; s++)
192  {
193  (*likelihoods_node_neighbor_i_c_)[s] = 1.; // All likelihoods are initialized to 1.
194  }
195  }
196  }
197  }
198  }
199 
200  // Initialize d and d2 likelihoods:
201  Vdouble* dLikelihoods_node_ = &nodeData->getDLikelihoodArray();
202  Vdouble* d2Likelihoods_node_ = &nodeData->getD2LikelihoodArray();
203  dLikelihoods_node_->resize(nbDistinctSites_);
204  d2Likelihoods_node_->resize(nbDistinctSites_);
205 }
206 
207 /******************************************************************************/
208 
209 void DRASDRTreeLikelihoodData::reInit() throw (Exception)
210 {
211  reInit(tree_->getRootNode());
212 }
213 
214 void DRASDRTreeLikelihoodData::reInit(const Node* node) throw (Exception)
215 {
216  if (node->isLeaf())
217  {
218  DRASDRTreeLikelihoodLeafData* leafData = &leafData_[node->getId()];
219  leafData->setNode(node);
220  }
221 
222  DRASDRTreeLikelihoodNodeData* nodeData = &nodeData_[node->getId()];
223  nodeData->setNode(node);
224  nodeData->eraseNeighborArrays();
225 
226  int nbSons = static_cast<int>(node->getNumberOfSons());
227 
228  for (int n = (node->hasFather() ? -1 : 0); n < nbSons; n++)
229  {
230  const Node* neighbor = (*node)[n];
231  VVVdouble* array = &nodeData->getLikelihoodArrayForNeighbor(neighbor->getId());
232 
233  array->resize(nbDistinctSites_);
234  for (size_t i = 0; i < nbDistinctSites_; i++)
235  {
236  VVdouble* array_i = &(*array)[i];
237  array_i->resize(nbClasses_);
238  for (size_t c = 0; c < nbClasses_; c++)
239  {
240  Vdouble* array_i_c = &(*array_i)[c];
241  array_i_c->resize(nbStates_);
242  for (size_t s = 0; s < nbStates_; s++)
243  {
244  (*array_i_c)[s] = 1.; // All likelihoods are initialized to 1.
245  }
246  }
247  }
248  }
249 
250  // We re-initialize each son node:
251  size_t nbSonNodes = node->getNumberOfSons();
252  for (size_t l = 0; l < nbSonNodes; l++)
253  {
254  // For each son node,
255  reInit(node->getSon(l));
256  }
257 
258  nodeData->getDLikelihoodArray().resize(nbDistinctSites_);
259  nodeData->getD2LikelihoodArray().resize(nbDistinctSites_);
260 }
261 
262 /******************************************************************************/
263 
Interface for all substitution models.
const std::vector< size_t > & getIndices() const
Definition: SitePatterns.h:175
void setNode(const Node *node)
Set the node associated to this data.
Likelihood data structure for a node.
const std::vector< unsigned int > & getWeights() const
Definition: SitePatterns.h:171
VVVdouble & getLikelihoodArrayForNeighbor(int neighborId)
const TreeTemplate< Node > * tree_
virtual bool isLeaf() const
Definition: Node.h:692
void reInit()
Rebuild likelihood arrays at inner nodes.
virtual int getId() const
Get the node&#39;s id.
Definition: Node.h:203
void setNode(const Node *node)
Set the node associated to this data.
SiteContainer * getSites() const
The phylogenetic node class.
Definition: Node.h:90
const std::map< int, VVVdouble > & getLikelihoodArrays() const
Data structure for site patterns.
Definition: SitePatterns.h:69
void initLikelihoods(const SiteContainer &sites, const SubstitutionModel &model)
Resize and initialize all likelihood arrays according to the given data set and substitution model...
Likelihood data structure for a leaf.