bpp-phyl  2.2.0
NeighborJoining.cpp
Go to the documentation of this file.
1 //
2 // File: NeighborJoining.cpp
3 // Created by: Julien Dutheil
4 // Vincent Ranwez
5 // Created on: Thu jun 23 10:39 2005
6 //
7 
8 /*
9  Copyright or © or Copr. CNRS, (November 16, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for phylogenetic data analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39  */
40 
41 #include "NeighborJoining.h"
42 #include "../Tree.h"
43 
44 using namespace bpp;
45 
46 #include <cmath>
47 #include <iostream>
48 
49 using namespace std;
50 
51 std::vector<size_t> NeighborJoining::getBestPair() throw (Exception)
52 {
53  for (std::map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
54  {
55  size_t id = i->first;
56  sumDist_[id] = 0;
57  for (map<size_t, Node*>::iterator j = currentNodes_.begin(); j != currentNodes_.end(); j++)
58  {
59  size_t jd = j->first;
60  sumDist_[id] += matrix_(id, jd);
61  }
62  }
63 
64  vector<size_t> bestPair(2);
65  double critMax = std::log(0.);
66  for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
67  {
68  size_t id = i->first;
69  map<size_t, Node*>::iterator j = i;
70  j++;
71  for ( ; j != currentNodes_.end(); j++)
72  {
73  size_t jd = j->first;
74  double crit = sumDist_[id] + sumDist_[jd] - static_cast<double>(currentNodes_.size() - 2) * matrix_(id, jd);
75  // cout << "\t" << id << "\t" << jd << "\t" << crit << endl;
76  if (crit > critMax)
77  {
78  critMax = crit;
79  bestPair[0] = id;
80  bestPair[1] = jd;
81  }
82  }
83  }
84 
85  if (critMax == std::log(0.))
86  {
87  throw Exception("Unexpected error: no maximum criterium found.");
88  }
89  return bestPair;
90 }
91 
92 std::vector<double> NeighborJoining::computeBranchLengthsForPair(const std::vector<size_t>& pair)
93 {
94  double ratio = (sumDist_[pair[0]] - sumDist_[pair[1]]) / static_cast<double>(currentNodes_.size() - 2);
95  vector<double> d(2);
96  if (positiveLengths_)
97  {
98  d[0] = std::max(.5 * (matrix_(pair[0], pair[1]) + ratio), 0.);
99  d[1] = std::max(.5 * (matrix_(pair[0], pair[1]) - ratio), 0.);
100  }
101  else
102  {
103  d[0] = .5 * (matrix_(pair[0], pair[1]) + ratio);
104  d[1] = .5 * (matrix_(pair[0], pair[1]) - ratio);
105  }
106  return d;
107 }
108 
109 double NeighborJoining::computeDistancesFromPair(const std::vector<size_t>& pair, const std::vector<double>& branchLengths, size_t pos)
110 {
111  return
112  positiveLengths_ ?
113  std::max(.5 * (matrix_(pair[0], pos) - branchLengths[0] + matrix_(pair[1], pos) - branchLengths[1]), 0.)
114  : .5 * (matrix_(pair[0], pos) - branchLengths[0] + matrix_(pair[1], pos) - branchLengths[1]);
115 }
116 
118 {
119  Node* root = new Node(idRoot);
120  map<size_t, Node* >::iterator it = currentNodes_.begin();
121  size_t i1 = it->first;
122  Node* n1 = it->second;
123  it++;
124  size_t i2 = it->first;
125  Node* n2 = it->second;
126  if (currentNodes_.size() == 2)
127  {
128  // Rooted
129  double d = matrix_(i1, i2) / 2;
130  root->addSon(n1);
131  root->addSon(n2);
132  n1->setDistanceToFather(d);
133  n2->setDistanceToFather(d);
134  }
135  else
136  {
137  // Unrooted
138  it++;
139  size_t i3 = it->first;
140  Node* n3 = it->second;
141  double d1 = positiveLengths_ ?
142  std::max(matrix_(i1, i2) + matrix_(i1, i3) - matrix_(i2, i3), 0.)
143  : matrix_(i1, i2) + matrix_(i1, i3) - matrix_(i2, i3);
144  double d2 = positiveLengths_ ?
145  std::max(matrix_(i2, i1) + matrix_(i2, i3) - matrix_(i1, i3), 0.)
146  : matrix_(i2, i1) + matrix_(i2, i3) - matrix_(i1, i3);
147  double d3 = positiveLengths_ ?
148  std::max(matrix_(i3, i1) + matrix_(i3, i2) - matrix_(i1, i2), 0.)
149  : matrix_(i3, i1) + matrix_(i3, i2) - matrix_(i1, i2);
150  root->addSon(n1);
151  root->addSon(n2);
152  root->addSon(n3);
153  n1->setDistanceToFather(d1 / 2.);
154  n2->setDistanceToFather(d2 / 2.);
155  n3->setDistanceToFather(d3 / 2.);
156  }
157  tree_ = new TreeTemplate<Node>(root);
158 }
159 
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:407
void finalStep(int idRoot)
Method called when there ar eonly three remaining node to agglomerate, and creates the root node of t...
STL namespace.
The phylogenetic tree class.
std::vector< double > computeBranchLengthsForPair(const std::vector< size_t > &pair)
Compute the branch lengths for two nodes to agglomerate.
double computeDistancesFromPair(const std::vector< size_t > &pair, const std::vector< double > &branchLengths, size_t pos)
Actualizes the distance matrix according to a given pair and the corresponding branch lengths...
std::vector< size_t > getBestPair()
Get the best pair of nodes to agglomerate.
The phylogenetic node class.
Definition: Node.h:90
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:299