bpp-phyl  2.2.0
UniformizationSubstitutionCount.cpp
Go to the documentation of this file.
1 //
2 // File: UniformizationSubstitutionCount.cpp
3 // Created by: Julien Dutheil
4 // Created on: Sat Mar 19 13:54 2011
5 //
6 
7 /*
8 Copyright or © or Copr. Bio++ Development Team, (November 16, 2004, 2005, 2006)
9 
10 This software is a computer program whose purpose is to provide classes
11 for phylogenetic data analysis.
12 
13 This software is governed by the CeCILL license under French law and
14 abiding by the rules of distribution of free software. You can use,
15 modify and/ or redistribute the software under the terms of the CeCILL
16 license as circulated by CEA, CNRS and INRIA at the following URL
17 "http://www.cecill.info".
18 
19 As a counterpart to the access to the source code and rights to copy,
20 modify and redistribute granted by the license, users are provided only
21 with a limited warranty and the software's author, the holder of the
22 economic rights, and the successive licensors have only limited
23 liability.
24 
25 In this respect, the user's attention is drawn to the risks associated
26 with loading, using, modifying and/or developing or reproducing the
27 software by the user in light of its specific status of free software,
28 that may mean that it is complicated to manipulate, and that also
29 therefore means that it is reserved for developers and experienced
30 professionals having in-depth computer knowledge. Users are therefore
31 encouraged to load and test the software's suitability as regards their
32 requirements in conditions enabling the security of their systems and/or
33 data to be ensured and, more generally, to use and operate it in the
34 same conditions as regards security.
35 
36 The fact that you are presently reading this means that you have had
37 knowledge of the CeCILL license and that you accept its terms.
38 */
39 
41 
42 #include "Bpp/Numeric/Matrix/MatrixTools.h"
43 #include "Bpp/Numeric/NumTools.h"
44 #include <vector>
45 
46 using namespace bpp;
47 using namespace std;
48 
49 /******************************************************************************/
50 
53  AbstractWeightedSubstitutionCount(weights, true),
54  model_(model),
55  nbStates_(model->getNumberOfStates()),
56  bMatrices_(reg->getNumberOfSubstitutionTypes()),
57  power_(),
58  s_(reg->getNumberOfSubstitutionTypes()),
59  miu_(0),
60  counts_(reg->getNumberOfSubstitutionTypes()),
61  currentLength_(1.)
62 {
63  //Check compatiblity between model and substitution register:
64  if (model->getAlphabet()->getAlphabetType() != reg->getAlphabet()->getAlphabetType())
65  throw Exception("UniformizationSubstitutionCount (constructor): alphabets do not match between register and model.");
66 
67  //Initialize all B matrices according to substitution register. This is done once for all,
68  //unless the number of states changes:
71 
72  for (unsigned int i = 0; i < nbStates_; ++i) {
73  double diagQ = abs(model_->Qij(i, i));
74  if (diagQ > miu_)
75  miu_ = diagQ;
76  }
77 
78  if (miu_>10000)
79  throw Exception("UniformizationSubstitutionCount::UniformizationSubstitutionCount The maximum diagonal values of generator is above 10000. Abort, chose another mapping method");
80 
81 }
82 
83 /******************************************************************************/
84 
86 {
87  size_t nbTypes = register_->getNumberOfSubstitutionTypes();
88  bMatrices_.resize(nbTypes);
89  counts_.resize(nbTypes);
90  s_.resize(nbTypes);
91 }
92 
93 
95 {
96  //Re-initialize all B matrices according to substitution register.
97  for (size_t i = 0; i < register_->getNumberOfSubstitutionTypes(); ++i) {
98  bMatrices_[i].resize(nbStates_, nbStates_);
99  counts_[i].resize(nbStates_, nbStates_);
100  }
101 }
102 
104 {
105  for (size_t j = 0; j < nbStates_; ++j) {
106  for (size_t k = 0; k < nbStates_; ++k) {
107  size_t i = register_->getType(j, k);
108  if (i > 0 && k != j) {
109  //jdutheil on 25/07/14: I think this is incorrect, weights should only come at the end.
110  //bMatrices_[i - 1](j, k) = model_->Qij(j, k) * (weights_ ? weights_->getIndex(fromState, toState) : 1);
111  bMatrices_[i - 1](j, k) = model_->Qij(j, k);
112  }
113  }
114  }
115 }
116 
117 
118 /******************************************************************************/
119 
121 {
122  double lam = miu_ * length;
123  RowMatrix<double> I;
124  MatrixTools::getId(nbStates_, I);
125  RowMatrix<double> R(model_->getGenerator());
126  MatrixTools::scale(R, 1. / miu_);
127  MatrixTools::add(R, I);
128 
129  //compute the stopping point
130  //use the tail of Poisson distribution
131  //can be approximated by 4 + 6 * sqrt(lam) + lam
132  size_t nMax = static_cast<size_t>(ceil(4 + 6 * sqrt(lam) + lam));
133 
134  //compute the powers of R
135  power_.resize(nMax + 1);
136  power_[0] = I;
137  for (size_t i = 1; i < nMax + 1; ++i)
138  MatrixTools::mult(power_[i - 1], R, power_[i]);
139 
140  for (size_t i = 0; i < register_->getNumberOfSubstitutionTypes(); ++i) {
141  s_[i].resize(nMax + 1);
142  MatrixTools::mult(bMatrices_[i], power_[0], s_[i][0]);
143  RowMatrix<double> tmp(nbStates_, nbStates_);
144  for (size_t l = 1; l < nMax + 1; ++l) {
145  MatrixTools::mult(R, s_[i][l - 1], s_[i][l]);
146  MatrixTools::mult(bMatrices_[i], power_[l], tmp);
147  MatrixTools::add(s_[i][l], tmp);
148  }
149  MatrixTools::fill(counts_[i], 0);
150  for (size_t l = 0; l < nMax + 1; ++l) {
151  tmp = s_[i][l];
152  //double f = (pow(lam, static_cast<double>(l + 1)) * exp(-lam) / static_cast<double>(NumTools::fact(l + 1))) / miu_;
153  double logF = static_cast<double>(l + 1) * log(lam) - lam - log(miu_) - NumTools::logFact(static_cast<double>(l + 1));
154  MatrixTools::scale(tmp, exp(logF));
155  MatrixTools::add(counts_[i], tmp);
156  }
157  }
158 
159  // Now we must divide by pijt and account for putative weights:
160  vector<int> supportedStates = model_->getAlphabetStates();
161  RowMatrix<double> P = model_->getPij_t(length);
162  for (size_t i = 0; i < register_->getNumberOfSubstitutionTypes(); i++) {
163  for (size_t j = 0; j < nbStates_; j++) {
164  for(size_t k = 0; k < nbStates_; k++) {
165  counts_[i](j, k) /= P(j, k);
166  if (isnan(counts_[i](j, k)) || counts_[i](j, k) < 0.)
167  counts_[i](j, k) = 0;
168  //Weights:
169  if (weights_)
170  counts_[i](j, k) *= weights_->getIndex(supportedStates[j], supportedStates[k]);
171  }
172  }
173  }
174 }
175 
176 /******************************************************************************/
177 
178 Matrix<double>* UniformizationSubstitutionCount::getAllNumbersOfSubstitutions(double length, size_t type) const
179 {
180  if (length < 0)
181  throw Exception("UniformizationSubstitutionCount::getAllNumbersOfSubstitutions. Negative branch length: " + TextTools::toString(length) + ".");
182  if (length != currentLength_)
183  {
184  computeCounts_(length);
185  currentLength_ = length;
186  }
187  return new RowMatrix<double>(counts_[type - 1]);
188 }
189 
190 /******************************************************************************/
191 
192 double UniformizationSubstitutionCount::getNumberOfSubstitutions(size_t initialState, size_t finalState, double length, size_t type) const
193 {
194  if (length < 0)
195  throw Exception("UniformizationSubstitutionCount::getNumbersOfSubstitutions. Negative branch length: " + TextTools::toString(length) + ".");
196  if (length != currentLength_)
197  {
198  computeCounts_(length);
199  currentLength_ = length;
200  }
201  return counts_[type - 1](initialState, finalState);
202 }
203 
204 /******************************************************************************/
205 
206 std::vector<double> UniformizationSubstitutionCount::getNumberOfSubstitutionsForEachType(size_t initialState, size_t finalState, double length) const
207 {
208  if (length < 0)
209  throw Exception("UniformizationSubstitutionCount::getNumbersOfSubstitutions. Negative branch length: " + TextTools::toString(length) + ".");
210  if (length != currentLength_)
211  {
212  computeCounts_(length);
213  currentLength_ = length;
214  }
215  std::vector<double> v(getNumberOfSubstitutionTypes());
216  for (unsigned int t = 0; t < getNumberOfSubstitutionTypes(); ++t) {
217  v[t] = counts_[t](initialState, finalState);
218  }
219  return v;
220 }
221 
222 /******************************************************************************/
223 
225 {
226  //Check compatiblity between model and substitution register:
227  if (model->getAlphabet()->getAlphabetType() != register_->getAlphabet()->getAlphabetType())
228  throw Exception("UniformizationSubstitutionCount::setSubstitutionModel: alphabets do not match between register and model.");
229 
230  model_ = model;
231  unsigned int n = model->getAlphabet()->getSize();
232  if (n != nbStates_) {
233  nbStates_ = n;
234  //Re-initialize all B matrices according to substitution register.
235  initBMatrices_();
236  }
237  fillBMatrices_();
238 
239  miu_ = 0;
240  for (unsigned int i = 0; i < nbStates_; ++i) {
241  double diagQ = abs(model_->Qij(i, i));
242  if (diagQ > miu_)
243  miu_ = diagQ;
244  }
245 
246  if (miu_>10000)
247  throw Exception("UniformizationSubstitutionCount::setSubstitutionModel The maximum diagonal values of generator is above 10000. Abort, chose another mapping method.");
248 
249  //Recompute counts:
251 }
252 
253 /******************************************************************************/
254 
256 {
257  //Check compatiblity between model and substitution register:
258  if (model_->getAlphabet()->getAlphabetType() != register_->getAlphabet()->getAlphabetType())
259  throw Exception("UniformizationSubstitutionCount::substitutionRegisterHasChanged: alphabets do not match between register and model.");
260 
261  resetBMatrices_();
262  initBMatrices_();
263  fillBMatrices_();
264 
265  //Recompute counts:
266  if (currentLength_ > 0)
268 }
269 
270 /******************************************************************************/
271 
273 {
274  if (weights_->getAlphabet()->getAlphabetType() != register_->getAlphabet()->getAlphabetType())
275  throw Exception("UniformizationSubstitutionCount::weightsHaveChanged. Incorrect alphabet type.");
276 
277  //jdutheil on 25/07/14: not necessary if weights are only accounted for in the end.
278  //fillBMatrices_();
279 
280  //Recompute counts:
281  if (currentLength_ > 0)
283 }
284 
285 /******************************************************************************/
286 
std::vector< std::vector< RowMatrix< double > > > s_
Interface for all substitution models.
std::auto_ptr< SubstitutionRegister > register_
virtual const std::vector< int > & getAlphabetStates() const =0
Basic implementation of the the SubstitutionCount interface.
std::vector< RowMatrix< double > > counts_
The SubstitutionRegister interface.
virtual const Alphabet * getAlphabet() const =0
STL namespace.
Matrix< double > * getAllNumbersOfSubstitutions(double length, size_t type=1) const
Get the numbers of susbstitutions on a branch, for each initial and final states, and given the branc...
UniformizationSubstitutionCount(const SubstitutionModel *model, SubstitutionRegister *reg, const AlphabetIndex2 *weights=0)
std::vector< RowMatrix< double > > bMatrices_
virtual const Matrix< double > & getPij_t(double t) const =0
virtual size_t getNumberOfSubstitutionTypes() const
Short cut function, equivalent to getSubstitutionRegister().getNumberOfSubstitutionTypes().
virtual double Qij(size_t i, size_t j) const =0
virtual const Matrix< double > & getGenerator() const =0
std::vector< double > getNumberOfSubstitutionsForEachType(size_t initialState, size_t finalState, double length) const
Get the numbers of susbstitutions on a branch for all types, for an initial and final states...
double getNumberOfSubstitutions(size_t initialState, size_t finalState, double length, size_t type=1) const
Get the number of susbstitutions on a branch, given the initial and final states, and the branch leng...
void setSubstitutionModel(const SubstitutionModel *model)
Set the substitution model associated with this count, if relevent.
Partial implementation of the WeightedSubstitutionCount interface.
virtual const Alphabet * getAlphabet() const =0