bpp-phyl  2.2.0
BioNJ.cpp
Go to the documentation of this file.
1 //
2 // File: BioNJ.h
3 // Created by: Vincent Ranwez
4 // Created on: Tue Apr 11 14:23 2006
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004, 2005, 2006)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #include "BioNJ.h"
41 #include "../Tree.h"
42 
43 #include <Bpp/App/ApplicationTools.h>
44 
45 using namespace bpp;
46 
47 // From the STL:
48 #include <cmath>
49 #include <iostream>
50 
51 using namespace std;
52 
53 double BioNJ::computeDistancesFromPair(const vector<size_t>& pair, const vector<double>& branchLengths, size_t pos)
54 {
55  return positiveLengths_ ?
56  std::max(lambda_ * (matrix_(pair[0], pos) - branchLengths[0]) + (1 - lambda_) * (matrix_(pair[1], pos) - branchLengths[1]), 0.)
57  : lambda_ * (matrix_(pair[0], pos) - branchLengths[0]) + (1 - lambda_) * (matrix_(pair[1], pos) - branchLengths[1]);
58 }
59 
60 void BioNJ::computeTree() throw (Exception)
61 {
62  // Initialization:
63  for (size_t i = 0; i < matrix_.size(); i++)
64  {
65  currentNodes_[i] = getLeafNode(static_cast<int>(i), matrix_.getName(i));
66  }
67  int idNextNode = static_cast<int>(matrix_.size());
68  vector<double> newDist(matrix_.size());
69  vector<double> newVar(matrix_.size());
70 
71  // Build tree:
72  while (currentNodes_.size() > (rootTree_ ? 2 : 3))
73  {
74  if (verbose_)
75  ApplicationTools::displayGauge(matrix_.size() - currentNodes_.size(), matrix_.size() - (rootTree_ ? 2 : 3) - 1);
76  vector<size_t> bestPair = getBestPair();
77  vector<double> distances = computeBranchLengthsForPair(bestPair);
78  Node* best1 = currentNodes_[bestPair[0]];
79  Node* best2 = currentNodes_[bestPair[1]];
80  // Distances may be used by getParentNodes (PGMA for instance).
81  best1->setDistanceToFather(distances[0]);
82  best2->setDistanceToFather(distances[1]);
83  Node* parent = getParentNode(idNextNode++, best1, best2);
84  // compute lambda
85  lambda_ = 0;
86  if (variance_(bestPair[0], bestPair[1]) == 0)
87  lambda_ = .5;
88  else
89  {
90  for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
91  {
92  size_t id = i->first;
93  if (id != bestPair[0] && id != bestPair[1])
94  lambda_ += (variance_(bestPair[1], id) - variance_(bestPair[0], id));
95  }
96  double div = 2 * static_cast<double>(currentNodes_.size() - 2) * variance_(bestPair[0], bestPair[1]);
97  lambda_ /= div;
98  lambda_ += .5;
99  }
100  if (lambda_ < 0.)
101  lambda_ = 0.;
102  if (lambda_ > 1.)
103  lambda_ = 1.;
104 
105  for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
106  {
107  size_t id = i->first;
108  if (id != bestPair[0] && id != bestPair[1])
109  {
110  newDist[id] = computeDistancesFromPair(bestPair, distances, id);
111  newVar[id] = lambda_ * variance_(bestPair[0], id) + (1 - lambda_) * variance_(bestPair[1], id) - lambda_ * (1 - lambda_) * variance_(bestPair[0], bestPair[1]);
112  }
113  else
114  {
115  newDist[id] = 0;
116  }
117  }
118  // Actualize currentNodes_:
119  currentNodes_[bestPair[0]] = parent;
120  currentNodes_.erase(bestPair[1]);
121  for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
122  {
123  size_t id = i->first;
124  matrix_( bestPair[0], id) = matrix_(id, bestPair[0]) = newDist[id];
125  variance_(bestPair[0], id) = variance_(id, bestPair[0]) = newVar[id];
126  }
127  }
128  finalStep(idNextNode);
129 }
130 
void computeTree()
Compute the tree corresponding to the distance matrix.
Definition: BioNJ.cpp:60
STL namespace.
The phylogenetic node class.
Definition: Node.h:90
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:299
double computeDistancesFromPair(const std::vector< size_t > &pair, const std::vector< double > &branchLengths, size_t pos)
Actualizes the distance matrix according to a given pair and the corresponding branch lengths...
Definition: BioNJ.cpp:53