bpp-phyl  2.2.0
PairedSiteLikelihoods.cpp
Go to the documentation of this file.
1 //
2 // File: PairedSiteLikelihoods.cpp
3 // Created by: Nicolas Rochette
4 // Created on: January 6, 2011
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 // From the STL
41 #include <vector>
42 #include <string>
43 #include <numeric>
44 #include <cmath>
45 
46 #include "PairedSiteLikelihoods.h"
47 #include "TreeLikelihood.h"
48 
49 using namespace std;
50 using namespace bpp;
51 
52 /***
53  * Constructors:
54  ***/
55 
56 PairedSiteLikelihoods::PairedSiteLikelihoods() :
57  logLikelihoods_(),
58  modelNames_()
59 {}
60 
62  const vector<vector<double> >& siteLogLikelihoods,
63  const vector<string>& modelNames) throw (Exception) :
64  logLikelihoods_(siteLogLikelihoods),
65  modelNames_(modelNames)
66 {
67  if (modelNames_.size() != getNumberOfModels())
68  {
69  if (modelNames_.size() == 0)
70  modelNames_.assign(getNumberOfModels(), string());
71  else
72  throw Exception("PairedSiteLikelihoods: There should be as many model names as model site-loglikelihoods records.");
73  }
74 
75  if (this->getNumberOfModels() > 0)
76  {
77  for (vector<vector<double> >::const_iterator siteLLiks = siteLogLikelihoods.begin();
78  siteLLiks != siteLogLikelihoods.end();
79  ++siteLLiks)
80  {
81  if (siteLLiks->size() != getNumberOfSites())
82  throw Exception("PairedSiteLikelihoods: Models site-loglikelihoods records do not have the same number of elements.");
83  }
84  }
85 }
86 
88  const vector<double>& siteLogLikelihoods,
89  const string& modelName
90  ) throw (Exception)
91 {
92  if (getNumberOfModels() > 0 && siteLogLikelihoods.size() != getNumberOfSites())
93  throw Exception("PairedSiteLikelihoods::appendModel: Model site-loglikelihoods record does not have the correct number of elements");
94 
95  logLikelihoods_.push_back(siteLogLikelihoods);
96  modelNames_.push_back(modelName);
97 }
98 
99 void PairedSiteLikelihoods::appendModel(const bpp::TreeLikelihood& treeLikelihood) throw (Exception)
100 {
101  const vector<double>& siteLogLikelihoods = treeLikelihood.getLogLikelihoodForEachSite();
102  const string& modelName = treeLikelihood.getTree().getName();
103 
104  PairedSiteLikelihoods::appendModel(siteLogLikelihoods, modelName);
105 }
106 
108 {
109  if (getNumberOfModels() > 0 && psl.getNumberOfModels() > 0 && psl.getNumberOfSites() != getNumberOfSites())
110  throw Exception("PairedSiteLikelihoods::appendModels: The two PairedSiteLikelihood objects have different number of sites.");
111 
112  logLikelihoods_.insert(logLikelihoods_.end(),
113  psl.logLikelihoods_.begin(),
114  psl.logLikelihoods_.end()
115  );
116 
117  modelNames_.insert(modelNames_.end(),
118  psl.modelNames_.begin(),
119  psl.modelNames_.end()
120  );
121 }
122 
123 
124 pair<vector<string>, vector<double> > PairedSiteLikelihoods::computeExpectedLikelihoodWeights (int replicates) const
125 {
126  // Initialize the weights
127  vector<double> weights(getNumberOfModels(), 0);
128 
129  // Sum the model weights over replicates
130  for (int r = 0; r < replicates; ++r)
131  {
132  // Draw the pseudoreplicate
133  vector<int> siteCounts = bootstrap(getNumberOfSites());
134 
135  // Compute the loglikelihood of each model for this replicate
136  vector<double> models_logliks (getNumberOfModels(), 0);
137  for (size_t m = 0; m < getNumberOfModels(); ++m)
138  {
139  const vector<double>& modelSiteLLiks = logLikelihoods_.at(m);
140  double Y = 0;
141  for (size_t s = 0; s < getNumberOfSites(); ++s)
142  {
143  Y += modelSiteLLiks.at(s) * siteCounts.at(s);
144  }
145  models_logliks.at(m) = Y;
146  }
147 
148  // Get the best loglikelihood
149  double Ymax = *max_element(models_logliks.begin(), models_logliks.end());
150 
151  // Get the exponentials of the loglikelihood differences
152  // and the sum of these values
153  vector<double> exp_logliks_diffs (getNumberOfModels(), 0);
154  for (size_t m = 0; m < getNumberOfModels(); ++m)
155  {
156  exp_logliks_diffs.at(m) = exp(models_logliks.at(m) - Ymax);
157  }
158 
159  double sumELLD = accumulate(exp_logliks_diffs.begin(), exp_logliks_diffs.end(), 0.0);
160 
161  // Get the models weights for this replicate
162  for (size_t m = 0; m < getNumberOfModels(); ++m)
163  {
164  double w = exp_logliks_diffs.at(m) / sumELLD;
165  weights.at(m) += w;
166  }
167  } //for replicates
168 
169  // Divide all weights by the number of replicates.
170  for (vector<double>::iterator w = weights.begin(); w != weights.end(); ++w)
171  {
172  *w /= replicates;
173  }
174 
175  return make_pair(modelNames_, weights);
176 }
177 
178 std::vector<int> PairedSiteLikelihoods::bootstrap(std::size_t length, double scaling)
179 {
180  vector<int> v(length, 0);
181 
182  for (size_t i = 0; i < static_cast<size_t>(static_cast<double>(length) * scaling + 0.5); ++i)
183  {
184  ++v[RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(length)];
185  }
186 
187  return v;
188 }
189 
A container for paired-site likelihoods (likelihoods over the same sites for different models...
static std::vector< int > bootstrap(std::size_t length, double scaling=1)
Draw a nonparametric pseudoreplicate.
void appendModels(const PairedSiteLikelihoods &psl)
Append models by concatenation.
The TreeLikelihood interface.
STL namespace.
void appendModel(const std::vector< double > &siteLogLikelihoods, const std::string &modelName="")
Append a model.
std::pair< std::vector< std::string >, std::vector< double > > computeExpectedLikelihoodWeights(int replicates=10000) const
Compute the Expected Likelihood Weights of the models.
std::vector< std::vector< double > > logLikelihoods_
std::size_t getNumberOfSites() const
size_t getNumberOfModels() const
Get the number of models in the container.
std::vector< std::string > modelNames_