bpp-phyl  2.2.0
PGMA.cpp
Go to the documentation of this file.
1 //
2 // File: PGMA.cpp
3 // Created by: Julien Dutheil
4 // Created on: Mon jul 11 11:41 2005
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #include "PGMA.h"
41 #include "../NodeTemplate.h"
42 #include "../Tree.h"
43 #include "../TreeTemplate.h"
44 #include "../TreeTemplateTools.h"
45 
46 using namespace bpp;
47 
48 // From the STL:
49 #include <cmath>
50 #include <iostream>
51 
52 using namespace std;
53 
55 {
56  Node* root = TreeTemplateTools::cloneSubtree<Node>(*dynamic_cast<TreeTemplate<NodeTemplate<PGMAInfos> >*>(tree_)->getRootNode());
57  return new TreeTemplate<Node>(root);
58 }
59 
60 vector<size_t> PGMA::getBestPair() throw (Exception)
61 {
62  vector<size_t> bestPair(2);
63  double distMin = -std::log(0.);
64  for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++)
65  {
66  size_t id = i->first;
67  map<size_t, Node*>::iterator j = i;
68  j++;
69  for ( ; j != currentNodes_.end(); j++)
70  {
71  size_t jd = j->first;
72  double dist = matrix_(id, jd);
73  if (dist < distMin)
74  {
75  distMin = dist;
76  bestPair[0] = id;
77  bestPair[1] = jd;
78  }
79  }
80  }
81 
82  if (distMin == -std::log(0.))
83  {
84  throw Exception("Unexpected error: no minimum found in the distance matrix.");
85  }
86 
87  return bestPair;
88 }
89 
90 vector<double> PGMA::computeBranchLengthsForPair(const vector<size_t>& pair)
91 {
92  vector<double> d(2);
93  double dist = matrix_(pair[0], pair[1]) / 2.;
94  d[0] = dist - dynamic_cast<NodeTemplate<PGMAInfos>*>(currentNodes_[pair[0]])->getInfos().time;
95  d[1] = dist - dynamic_cast<NodeTemplate<PGMAInfos>*>(currentNodes_[pair[1]])->getInfos().time;
96  return d;
97 }
98 
99 double PGMA::computeDistancesFromPair(const vector<size_t>& pair, const vector<double>& branchLengths, size_t pos)
100 {
101  double w1, w2;
102  if (weighted_)
103  {
104  w1 = 1;
105  w2 = 1;
106  }
107  else
108  {
109  w1 = static_cast<double>(dynamic_cast<NodeTemplate<PGMAInfos>*>(currentNodes_[pair[0]])->getInfos().numberOfLeaves);
110  w2 = static_cast<double>(dynamic_cast<NodeTemplate<PGMAInfos>*>(currentNodes_[pair[1]])->getInfos().numberOfLeaves);
111  }
112  return (w1 * matrix_(pair[0], pos) + w2 * matrix_(pair[1], pos)) / (w1 + w2);
113 }
114 
115 void PGMA::finalStep(int idRoot)
116 {
118  map<size_t, Node*>::iterator it = currentNodes_.begin();
119  size_t i1 = it->first;
120  Node* n1 = it->second;
121  it++;
122  size_t i2 = it->first;
123  Node* n2 = it->second;
124  double d = matrix_(i1, i2) / 2;
125  root->addSon(n1);
126  root->addSon(n2);
127  n1->setDistanceToFather(d - dynamic_cast<NodeTemplate<PGMAInfos>*>(n1)->getInfos().time);
128  n2->setDistanceToFather(d - dynamic_cast<NodeTemplate<PGMAInfos>*>(n2)->getInfos().time);
129  tree_ = new TreeTemplate<NodeTemplate<PGMAInfos> >(root);
130 }
131 
132 Node* PGMA::getLeafNode(int id, const string& name)
133 {
134  PGMAInfos infos;
135  infos.numberOfLeaves = 1;
136  infos.time = 0.;
137  NodeTemplate<PGMAInfos>* leaf = new NodeTemplate<PGMAInfos>(id, name);
138  leaf->setInfos(infos);
139  return leaf;
140 }
141 
142 Node* PGMA::getParentNode(int id, Node* son1, Node* son2)
143 {
144  PGMAInfos infos;
145  infos.numberOfLeaves =
146  dynamic_cast<NodeTemplate<PGMAInfos>*>(son1)->getInfos().numberOfLeaves
147  + dynamic_cast<NodeTemplate<PGMAInfos>*>(son2)->getInfos().numberOfLeaves;
148  infos.time = dynamic_cast<NodeTemplate<PGMAInfos>*>(son1)->getInfos().time + son1->getDistanceToFather();
149  Node* parent = new NodeTemplate<PGMAInfos>(id);
150  dynamic_cast<NodeTemplate<PGMAInfos>*>(parent)->setInfos(infos);
151  parent->addSon(son1);
152  parent->addSon(son2);
153  return parent;
154 }
155 
virtual Node * getParentNode(int id, Node *son1, Node *son2)
Get an inner node.
Definition: PGMA.cpp:142
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:407
virtual void setInfos(const NodeInfos &infos)
Set the information to be associated to this node.
Definition: NodeTemplate.h:191
TreeTemplate< Node > * getTree() const
Get the computed tree, if there is one.
Definition: PGMA.cpp:54
STL namespace.
The phylogenetic tree class.
size_t numberOfLeaves
Definition: PGMA.h:54
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
Definition: Node.h:283
std::vector< size_t > getBestPair()
Get the best pair of nodes to agglomerate.
Definition: PGMA.cpp:60
double computeDistancesFromPair(const std::vector< size_t > &pair, const std::vector< double > &branchLengths, size_t pos)
Actualizes the distance matrix according to a given pair and the corresponding branch lengths...
Definition: PGMA.cpp:99
Inner data structure for WPGMA and UPGMA distance methods.
Definition: PGMA.h:52
double time
Definition: PGMA.h:55
The phylogenetic node class.
Definition: Node.h:90
std::vector< double > computeBranchLengthsForPair(const std::vector< size_t > &pair)
Compute the branch lengths for two nodes to agglomerate.
Definition: PGMA.cpp:90
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:299
virtual Node * getLeafNode(int id, const std::string &name)
Get a leaf node.
Definition: PGMA.cpp:132
void finalStep(int idRoot)
Method called when there ar eonly three remaining node to agglomerate, and creates the root node of t...
Definition: PGMA.cpp:115
The NodeTemplate class.
Definition: NodeTemplate.h:73