bpp-phyl  2.2.0
DRTreeParsimonyScore.cpp
Go to the documentation of this file.
1 //
2 // File: DRTreeParsimonyScore.cpp
3 // Created by: Julien Dutheil
4 // Created on: Thu Jul 28 17:25 2005
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #include "DRTreeParsimonyScore.h"
41 #include "../PatternTools.h"
42 #include "../TreeTemplateTools.h" // Needed for NNIs
43 
44 #include <Bpp/App/ApplicationTools.h>
45 #include <Bpp/Numeric/VectorTools.h>
46 
47 using namespace bpp;
48 using namespace std;
49 
50 /******************************************************************************/
51 
53  const Tree& tree,
54  const SiteContainer& data,
55  bool verbose,
56  bool includeGaps)
57 throw (Exception) :
58  AbstractTreeParsimonyScore(tree, data, verbose, includeGaps),
59  parsimonyData_(new DRTreeParsimonyData(getTreeP_())),
60  nbDistinctSites_()
61 {
62  init_(data, verbose);
63 }
64 
66  const Tree& tree,
67  const SiteContainer& data,
68  const StateMap* statesMap,
69  bool verbose)
70 throw (Exception) :
71  AbstractTreeParsimonyScore(tree, data, statesMap, verbose),
72  parsimonyData_(new DRTreeParsimonyData(getTreeP_())),
73  nbDistinctSites_()
74 {
75  init_(data, verbose);
76 }
77 
78 void DRTreeParsimonyScore::init_(const SiteContainer& data, bool verbose)
79 {
80  if (verbose)
81  ApplicationTools::displayTask("Initializing data structure");
82  parsimonyData_->init(data, getStateMap());
83  nbDistinctSites_ = parsimonyData_->getNumberOfDistinctSites();
84  computeScores();
85  if (verbose)
86  ApplicationTools::displayTaskDone();
87  if (verbose)
88  ApplicationTools::displayResult("Number of distinct sites",
89  TextTools::toString(nbDistinctSites_));
90 }
91 
92 /******************************************************************************/
93 
96  parsimonyData_(dynamic_cast<DRTreeParsimonyData*>(tp.parsimonyData_->clone())),
97  nbDistinctSites_(tp.nbDistinctSites_)
98 {
100 }
101 
102 /******************************************************************************/
103 
105 {
107  parsimonyData_ = dynamic_cast<DRTreeParsimonyData*>(tp.parsimonyData_->clone());
110  return *this;
111 }
112 
113 /******************************************************************************/
114 
116 {
117  delete parsimonyData_;
118 }
119 
120 /******************************************************************************/
122 {
123  computeScoresPostorder(getTreeP_()->getRootNode());
124  computeScoresPreorder(getTreeP_()->getRootNode());
126  parsimonyData_->getNodeData(getTree().getRootId()),
129 }
130 
132 {
133  if (node->isLeaf()) return;
135  for (unsigned int k = 0; k < node->getNumberOfSons(); k++)
136  {
137  const Node* son = node->getSon(k);
139  vector<Bitset>* bitsets = &pData->getBitsetsArrayForNeighbor(son->getId());
140  vector<unsigned int>* scores = &pData->getScoresArrayForNeighbor(son->getId());
141  if (son->isLeaf())
142  {
143  // son has no NodeData associated, must use LeafData instead
144  vector<Bitset>* sonBitsets = &parsimonyData_->getLeafData(son->getId()).getBitsetsArray();
145  for (unsigned int i = 0; i < sonBitsets->size(); i++)
146  {
147  (*bitsets)[i] = (*sonBitsets)[i];
148  (*scores)[i] = 0;
149  }
150  }
151  else
152  {
155  *bitsets,
156  *scores);
157  }
158  }
159 }
160 
161 void DRTreeParsimonyScore::computeScoresPostorderForNode(const DRTreeParsimonyNodeData& pData, vector<Bitset>& rBitsets, vector<unsigned int>& rScores)
162 {
163  // First initialize the vectors from input:
164  const Node* node = pData.getNode();
165  const Node* source = node->getFather();
166  vector<const Node*> neighbors = node->getNeighbors();
167  size_t nbNeighbors = node->degree();
168  vector< const vector<Bitset>*> iBitsets;
169  vector< const vector<unsigned int>*> iScores;
170  for (unsigned int k = 0; k < nbNeighbors; k++)
171  {
172  const Node* n = neighbors[k];
173  if (n != source)
174  {
175  iBitsets.push_back(&pData.getBitsetsArrayForNeighbor(n->getId()));
176  iScores.push_back(&pData.getScoresArrayForNeighbor(n->getId()));
177  }
178  }
179  // Then call the general method on these arrays:
180  computeScoresFromArrays(iBitsets, iScores, rBitsets, rScores);
181 }
182 
184 {
185  if (node->getNumberOfSons() == 0) return;
187  if (node->hasFather())
188  {
189  const Node* father = node->getFather();
190  vector<Bitset>* bitsets = &pData->getBitsetsArrayForNeighbor(father->getId());
191  vector<unsigned int>* scores = &pData->getScoresArrayForNeighbor(father->getId());
192  if (father->isLeaf())
193  { // Means that the tree is rooted by a leaf... dunno if we must allow that! Let it be for now.
194  // son has no NodeData associated, must use LeafData instead
195  vector<Bitset>* sonBitsets = &parsimonyData_->getLeafData(father->getId()).getBitsetsArray();
196  for (unsigned int i = 0; i < sonBitsets->size(); i++)
197  {
198  (*bitsets)[i] = (*sonBitsets)[i];
199  (*scores)[i] = 0;
200  }
201  }
202  else
203  {
205  parsimonyData_->getNodeData(father->getId()),
206  node,
207  *bitsets,
208  *scores);
209  }
210  }
211  // Recurse call:
212  for (unsigned int k = 0; k < node->getNumberOfSons(); k++)
213  {
214  computeScoresPreorder(node->getSon(k));
215  }
216 }
217 
218 void DRTreeParsimonyScore::computeScoresPreorderForNode(const DRTreeParsimonyNodeData& pData, const Node* source, std::vector<Bitset>& rBitsets, std::vector<unsigned int>& rScores)
219 {
220  // First initialize the vectors from input:
221  const Node* node = pData.getNode();
222  vector<const Node*> neighbors = node->getNeighbors();
223  size_t nbNeighbors = node->degree();
224  vector< const vector<Bitset>*> iBitsets;
225  vector< const vector<unsigned int>*> iScores;
226  for (unsigned int k = 0; k < nbNeighbors; k++)
227  {
228  const Node* n = neighbors[k];
229  if (n != source)
230  {
231  iBitsets.push_back(&pData.getBitsetsArrayForNeighbor(n->getId()));
232  iScores.push_back(&pData.getScoresArrayForNeighbor(n->getId()));
233  }
234  }
235  // Then call the general method on these arrays:
236  computeScoresFromArrays(iBitsets, iScores, rBitsets, rScores);
237 }
238 
239 void DRTreeParsimonyScore::computeScoresForNode(const DRTreeParsimonyNodeData& pData, std::vector<Bitset>& rBitsets, std::vector<unsigned int>& rScores)
240 {
241  const Node* node = pData.getNode();
242  size_t nbNeighbors = node->degree();
243  vector<const Node*> neighbors = node->getNeighbors();
244  // First initialize the vectors fro input:
245  vector< const vector<Bitset>*> iBitsets(nbNeighbors);
246  vector< const vector<unsigned int>*> iScores(nbNeighbors);
247  for (unsigned int k = 0; k < nbNeighbors; k++)
248  {
249  const Node* n = neighbors[k];
250  iBitsets[k] = &pData.getBitsetsArrayForNeighbor(n->getId());
251  iScores [k] = &pData.getScoresArrayForNeighbor(n->getId());
252  }
253  // Then call the general method on these arrays:
254  computeScoresFromArrays(iBitsets, iScores, rBitsets, rScores);
255 }
256 
257 /******************************************************************************/
258 unsigned int DRTreeParsimonyScore::getScore() const
259 {
260  unsigned int score = 0;
261  for (unsigned int i = 0; i < nbDistinctSites_; i++)
262  {
264  }
265  return score;
266 }
267 
268 /******************************************************************************/
269 unsigned int DRTreeParsimonyScore::getScoreForSite(size_t site) const
270 {
272 }
273 
274 /******************************************************************************/
276  const vector< const vector<Bitset>*>& iBitsets,
277  const vector< const vector<unsigned int>*>& iScores,
278  vector<Bitset>& oBitsets,
279  vector<unsigned int>& oScores)
280 {
281  size_t nbPos = oBitsets.size();
282  size_t nbNodes = iBitsets.size();
283  if (iScores.size() != nbNodes)
284  throw Exception("DRTreeParsimonyScore::computeScores(); Error, input arrays must have the same length.");
285  if (nbNodes < 1)
286  throw Exception("DRTreeParsimonyScore::computeScores(); Error, input arrays must have a size >= 1.");
287  const vector<Bitset>* bitsets0 = iBitsets[0];
288  const vector<unsigned int>* scores0 = iScores[0];
289  for (size_t i = 0; i < nbPos; i++)
290  {
291  oBitsets[i] = (*bitsets0)[i];
292  oScores[i] = (*scores0)[i];
293  }
294  for (size_t k = 1; k < nbNodes; k++)
295  {
296  const vector<Bitset>* bitsetsk = iBitsets[k];
297  const vector<unsigned int>* scoresk = iScores[k];
298  for (unsigned int i = 0; i < nbPos; i++)
299  {
300  Bitset bs = oBitsets[i] & (*bitsetsk)[i];
301  oScores[i] += (*scoresk)[i];
302  if (bs == 0)
303  {
304  bs = oBitsets[i] | (*bitsetsk)[i];
305  oScores[i] += 1;
306  }
307  oBitsets[i] = bs;
308  }
309  }
310 }
311 
312 /******************************************************************************/
313 double DRTreeParsimonyScore::testNNI(int nodeId) const throw (NodeException)
314 {
315  const Node* son = getTreeP_()->getNode(nodeId);
316  if (!son->hasFather()) throw NodePException("DRTreeParsimonyScore::testNNI(). Node 'son' must not be the root node.", son);
317  const Node* parent = son->getFather();
318  if (!parent->hasFather()) throw NodePException("DRTreeParsimonyScore::testNNI(). Node 'parent' must not be the root node.", parent);
319  const Node* grandFather = parent->getFather();
320  // From here: Bifurcation assumed.
321  // In case of multifurcation, an arbitrary uncle is chosen.
322  // If we are at root node with a trifurcation, this does not matter, since 2 NNI are possible (see doc of the NNISearchable interface).
323  size_t parentPosition = grandFather->getSonPosition(parent);
324  const Node* uncle = grandFather->getSon(parentPosition > 1 ? parentPosition - 1 : 1 - parentPosition);
325 
326  // Retrieving arrays of interest:
327  const DRTreeParsimonyNodeData* parentData = &parsimonyData_->getNodeData(parent->getId());
328  const vector<Bitset>* sonBitsets = &parentData->getBitsetsArrayForNeighbor(son->getId());
329  const vector<unsigned int>* sonScores = &parentData->getScoresArrayForNeighbor(son->getId());
330  vector<const Node*> parentNeighbors = TreeTemplateTools::getRemainingNeighbors(parent, grandFather, son);
331  size_t nbParentNeighbors = parentNeighbors.size();
332  vector< const vector<Bitset>*> parentBitsets(nbParentNeighbors);
333  vector< const vector<unsigned int>*> parentScores(nbParentNeighbors);
334  for (unsigned int k = 0; k < nbParentNeighbors; k++)
335  {
336  const Node* n = parentNeighbors[k]; // This neighbor
337  parentBitsets[k] = &parentData->getBitsetsArrayForNeighbor(n->getId());
338  parentScores[k] = &parentData->getScoresArrayForNeighbor(n->getId());
339  }
340 
341  const DRTreeParsimonyNodeData* grandFatherData = &parsimonyData_->getNodeData(grandFather->getId());
342  const vector<Bitset>* uncleBitsets = &grandFatherData->getBitsetsArrayForNeighbor(uncle->getId());
343  const vector<unsigned int>* uncleScores = &grandFatherData->getScoresArrayForNeighbor(uncle->getId());
344  vector<const Node*> grandFatherNeighbors = TreeTemplateTools::getRemainingNeighbors(grandFather, parent, uncle);
345  size_t nbGrandFatherNeighbors = grandFatherNeighbors.size();
346  vector< const vector<Bitset>*> grandFatherBitsets(nbGrandFatherNeighbors);
347  vector< const vector<unsigned int>*> grandFatherScores(nbGrandFatherNeighbors);
348  for (unsigned int k = 0; k < nbGrandFatherNeighbors; k++)
349  {
350  const Node* n = grandFatherNeighbors[k]; // This neighbor
351  grandFatherBitsets[k] = &grandFatherData->getBitsetsArrayForNeighbor(n->getId());
352  grandFatherScores[k] = &grandFatherData->getScoresArrayForNeighbor(n->getId());
353  }
354 
355  // Compute arrays and scores for grand-father node:
356  grandFatherBitsets.push_back(sonBitsets);
357  grandFatherScores.push_back(sonScores);
358  // Init arrays:
359  vector<Bitset> gfBitsets(sonBitsets->size()); // All arrays supposed to have the same size!
360  vector<unsigned int> gfScores(sonScores->size());
361  // Fill arrays:
362  computeScoresFromArrays(grandFatherBitsets, grandFatherScores, gfBitsets, gfScores);
363 
364  // Now computes arrays and scores for parent node:
365  parentBitsets.push_back(uncleBitsets);
366  parentScores.push_back(uncleScores);
367  parentBitsets.push_back(&gfBitsets);
368  parentScores.push_back(&gfScores);
369  // Init arrays:
370  vector<Bitset> pBitsets(sonBitsets->size()); // All arrays supposed to have the same size!
371  vector<unsigned int> pScores(sonScores->size());
372  // Fill arrays:
373  computeScoresFromArrays(parentBitsets, parentScores, pBitsets, pScores);
374 
375  // Final computation:
376  unsigned int score = 0;
377  for (unsigned int i = 0; i < nbDistinctSites_; i++)
378  {
379  score += pScores[i] * parsimonyData_->getWeight(i);
380  }
381  return (double)score - (double)getScore();
382 }
383 
384 /******************************************************************************/
386 {
387  Node* son = getTreeP_()->getNode(nodeId);
388  if (!son->hasFather()) throw NodePException("DRTreeParsimonyScore::doNNI(). Node 'son' must not be the root node.", son);
389  Node* parent = son->getFather();
390  if (!parent->hasFather()) throw NodePException("DRTreeParsimonyScore::doNNI(). Node 'parent' must not be the root node.", parent);
391  Node* grandFather = parent->getFather();
392  // From here: Bifurcation assumed.
393  // In case of multifurcation, an arbitrary uncle is chosen.
394  // If we are at root node with a trifurcation, this does not matter, since 2 NNI are possible (see doc of the NNISearchable interface).
395  size_t parentPosition = grandFather->getSonPosition(parent);
396  Node* uncle = grandFather->getSon(parentPosition > 1 ? parentPosition - 1 : 1 - parentPosition);
397  // Swap nodes:
398  parent->removeSon(son);
399  grandFather->removeSon(uncle);
400  parent->addSon(uncle);
401  grandFather->addSon(son);
402 }
403 
404 /******************************************************************************/
405 
Parsimony data structure for a node.
unsigned int getRootScore(size_t i) const
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:407
std::vector< Bitset > & getBitsetsArrayForNeighbor(int neighborId)
void setTree(const TreeTemplate< Node > *tree)
Set the tree associated to the data.
static void computeScoresFromArrays(const std::vector< const std::vector< Bitset > *> &iBitsets, const std::vector< const std::vector< unsigned int > *> &iScores, std::vector< Bitset > &oBitsets, std::vector< unsigned int > &oScores)
Compute bitsets and scores from an array of arrays.
double testNNI(int nodeId) const
Send the score of a NNI movement, without performing it.
static std::vector< const Node * > getRemainingNeighbors(const Node *node1, const Node *node2, const Node *node3)
Get a subset of node neighbors.
virtual const Node * getSon(size_t pos) const
Definition: Node.h:395
Double recursive implementation of interface TreeParsimonyScore.
size_t getRootArrayPosition(size_t site) const
STL namespace.
unsigned int getWeight(size_t pos) const
DRTreeParsimonyScore & operator=(const DRTreeParsimonyScore &tp)
virtual const Node * getFather() const
Get the father of this node is there is one.
Definition: Node.h:339
Interface for phylogenetic tree objects.
Definition: Tree.h:148
AbstractTreeParsimonyScore & operator=(const AbstractTreeParsimonyScore &tp)
virtual void computeScoresPreorder(const Node *)
Compute scores (preorder algorithm).
DRTreeParsimonyNodeData & getNodeData(int nodeId)
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:379
std::vector< unsigned int > & getRootScores()
virtual bool isLeaf() const
Definition: Node.h:692
void init_(const SiteContainer &data, bool verbose)
void doNNI(int nodeId)
Perform a NNI movement.
std::vector< unsigned int > & getScoresArrayForNeighbor(int neighborId)
static void computeScoresForNode(const DRTreeParsimonyNodeData &pData, std::vector< Bitset > &rBitsets, std::vector< unsigned int > &rScores)
Compute bitsets and scores for each site for a node, in all directions.
static void computeScoresPostorderForNode(const DRTreeParsimonyNodeData &pData, std::vector< Bitset > &rBitsets, std::vector< unsigned int > &rScores)
Compute bitsets and scores for each site for a node, in postorder.
virtual int getId() const
Get the node&#39;s id.
Definition: Node.h:203
const Node * getNode() const
Get the node associated to this data structure.
std::vector< const Node * > getNeighbors() const
Definition: Node.cpp:114
virtual Node * removeSon(size_t pos)
Definition: Node.h:444
std::vector< Bitset > & getRootBitsets()
virtual size_t getSonPosition(const Node *son) const
Definition: Node.cpp:130
virtual void computeScoresPostorder(const Node *)
Compute scores (postorder algorithm).
const TreeTemplate< Node > * getTreeP_() const
DRTreeParsimonyData * clone() const
The phylogenetic node class.
Definition: Node.h:90
std::bitset< 21 > Bitset
Partial implementation of the TreeParsimonyScore interface.
virtual void computeScores()
Compute all scores.
Parsimony data structure for double-recursive (DR) algorithm.
DRTreeParsimonyData * parsimonyData_
virtual size_t getNumberOfSons() const
Definition: Node.h:388
unsigned int getScore() const
Get the score for the current tree, i.e. the total minimum number of changes in the tree...
General exception thrown when something is wrong with a particular node.
Map the states of a given alphabet which have a model state.
Definition: StateMap.h:58
static void computeScoresPreorderForNode(const DRTreeParsimonyNodeData &pData, const Node *source, std::vector< Bitset > &rBitsets, std::vector< unsigned int > &rScores)
Compute bitsets and scores for each site for a node, in preorder.
virtual size_t degree() const
Definition: Node.h:488
unsigned int getScoreForSite(size_t site) const
Get the score for a given site for the current tree, i.e. the total minimum number of changes in the ...
General exception thrown when something is wrong with a particular node.
DRTreeParsimonyLeafData & getLeafData(int nodeId)
DRTreeParsimonyScore(const Tree &tree, const SiteContainer &data, bool verbose=true, bool includeGaps=false)
virtual const Tree & getTree() const
Get the tree for wich scores are computed.