bpp-phyl  2.2.0
AbstractSubstitutionModel.cpp
Go to the documentation of this file.
1 //
2 // File: AbstractSubstitutionModel.cpp
3 // Created by: Julien Dutheil
4 // Created on: Tue May 27 10:31:49 2003
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
41 
42 #include <Bpp/Text/TextTools.h>
43 #include <Bpp/Numeric/VectorTools.h>
44 #include <Bpp/Numeric/Matrix/MatrixTools.h>
45 #include <Bpp/Numeric/Matrix/EigenValue.h>
46 #include <Bpp/Numeric/NumConstants.h>
47 
48 // From SeqLib:
49 #include <Bpp/Seq/Container/SequenceContainerTools.h>
50 
51 using namespace bpp;
52 using namespace std;
53 
54 /******************************************************************************/
55 
56 AbstractSubstitutionModel::AbstractSubstitutionModel(const Alphabet* alpha, StateMap* stateMap, const std::string& prefix) :
57  AbstractParameterAliasable(prefix),
58  alphabet_(alpha),
59  stateMap_(stateMap),
60  size_(alpha->getSize()),
61  rate_(1),
62  generator_(size_, size_),
63  freq_(size_),
64  exchangeability_(size_, size_),
65  pijt_(size_, size_),
66  dpijt_(size_, size_),
67  d2pijt_(size_, size_),
68  eigenDecompose_(true),
69  eigenValues_(size_),
70  iEigenValues_(size_),
71  isDiagonalizable_(false),
72  rightEigenVectors_(size_, size_),
73  isNonSingular_(false),
74  leftEigenVectors_(size_, size_),
75  vPowGen_(),
76  tmpMat_(size_, size_)
77 {
78  for (size_t i = 0; i < size_; i++)
79  {
80  freq_[i] = 1.0 / static_cast<double>(size_);
81  }
82 }
83 
84 /******************************************************************************/
85 
87 {
88  // if the object is not an AbstractReversibleSubstitutionModel,
89  // computes the exchangeability_ Matrix (otherwise the generator_
90  // has been computed from the exchangeability_)
91 
92  if (!dynamic_cast<AbstractReversibleSubstitutionModel*>(this)) {
93  for (size_t i = 0; i < size_; i++)
94  {
95  for (size_t j = 0; j < size_; j++)
96  {
97  exchangeability_(i, j) = generator_(i, j) / freq_[j];
98  }
99  }
100  }
101 
102  // Compute eigen values and vectors:
104  {
105  EigenValue<double> ev(generator_);
106  rightEigenVectors_ = ev.getV();
107  eigenValues_ = ev.getRealEigenValues();
108  iEigenValues_ = ev.getImagEigenValues();
109  try
110  {
111  MatrixTools::inv(rightEigenVectors_, leftEigenVectors_);
112  isNonSingular_ = true;
113  isDiagonalizable_ = true;
114  for (size_t i = 0; i < size_ && isDiagonalizable_; i++)
115  {
116  if (abs(iEigenValues_[i]) > NumConstants::TINY())
117  isDiagonalizable_ = false;
118  }
119  }
120  catch (ZeroDivisionException& e)
121  {
122  ApplicationTools::displayMessage("Singularity during diagonalization. Taylor series used instead.");
123 
124  isNonSingular_ = false;
125  isDiagonalizable_ = false;
126  MatrixTools::Taylor(generator_, 30, vPowGen_);
127  }
128  }
129 }
130 
131 
132 /******************************************************************************/
133 
134 const Matrix<double>& AbstractSubstitutionModel::getPij_t(double t) const
135 {
136  if (t == 0)
137  {
138  MatrixTools::getId(size_, pijt_);
139  }
140  else if (isNonSingular_)
141  {
142  if (isDiagonalizable_)
143  {
144  MatrixTools::mult<double>(rightEigenVectors_, VectorTools::exp(eigenValues_ * (rate_ * t)), leftEigenVectors_, pijt_);
145  }
146  else
147  {
148  std::vector<double> vdia(size_);
149  std::vector<double> vup(size_ - 1);
150  std::vector<double> vlo(size_ - 1);
151  double c = 0, s = 0;
152  double l = rate_ * t;
153  for (size_t i = 0; i < size_; i++)
154  {
155  vdia[i] = std::exp(eigenValues_[i] * l);
156  if (iEigenValues_[i] != 0)
157  {
158  s = std::sin(iEigenValues_[i] * l);
159  c = std::cos(iEigenValues_[i] * l);
160  vup[i] = vdia[i] * s;
161  vlo[i] = -vup[i];
162  vdia[i] *= c;
163  vdia[i + 1] = vdia[i]; // trick to avoid computation
164  i++;
165  }
166  else
167  {
168  if (i < size_ - 1)
169  {
170  vup[i] = 0;
171  vlo[i] = 0;
172  }
173  }
174  }
175  MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, pijt_);
176  }
177  }
178  else
179  {
180  MatrixTools::getId(size_, pijt_);
181  double s = 1.0;
182  double v = rate_ * t;
183  size_t m = 0;
184  while (v > 0.5) // exp(r*t*A)=(exp(r*t/(2^m) A))^(2^m)
185  {
186  m += 1;
187  v /= 2;
188  }
189  for (size_t i = 1; i < vPowGen_.size(); i++)
190  {
191  s *= v / static_cast<double>(i);
192  MatrixTools::add(pijt_, s, vPowGen_[i]);
193  }
194  while (m > 0) // recover the 2^m
195  {
196  MatrixTools::mult(pijt_, pijt_, tmpMat_);
197  MatrixTools::copy(tmpMat_, pijt_);
198  m--;
199  }
200  }
201 // MatrixTools::print(pijt_);
202  return pijt_;
203 }
204 
205 const Matrix<double>& AbstractSubstitutionModel::getdPij_dt(double t) const
206 {
207  if (isNonSingular_)
208  {
209  if (isDiagonalizable_)
210  {
211  MatrixTools::mult(rightEigenVectors_, rate_ * eigenValues_ * VectorTools::exp(eigenValues_ * (rate_ * t)), leftEigenVectors_, dpijt_);
212  }
213  else
214  {
215  std::vector<double> vdia(size_);
216  std::vector<double> vup(size_ - 1);
217  std::vector<double> vlo(size_ - 1);
218  double c, s, e;
219  double l = rate_ * t;
220  for (size_t i = 0; i < size_; i++)
221  {
222  e = std::exp(eigenValues_[i] * l);
223  if (iEigenValues_[i] != 0)
224  {
225  s = std::sin(iEigenValues_[i] * l);
226  c = std::cos(iEigenValues_[i] * l);
227  vdia[i] = rate_ * (eigenValues_[i] * c - iEigenValues_[i] * s) * e;
228  vup[i] = rate_ * (eigenValues_[i] * s + iEigenValues_[i] * c) * e;
229  vlo[i] = -vup[i];
230  vdia[i + 1] = vdia[i]; // trick to avoid computation
231  i++;
232  }
233  else
234  {
235  if (i < size_ - 1)
236  {
237  vup[i] = 0;
238  vlo[i] = 0;
239  }
240  }
241  }
242  MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, dpijt_);
243  }
244  }
245  else
246  {
247  MatrixTools::getId(size_, dpijt_);
248  double s = 1.0;
249  double v = rate_ * t;
250  size_t m = 0;
251  while (v > 0.5) // r*A*exp(t*r*A)=r*A*(exp(r*t/(2^m) A))^(2^m)
252  {
253  m += 1;
254  v /= 2;
255  }
256  for (size_t i = 1; i < vPowGen_.size(); i++)
257  {
258  s *= v / static_cast<double>(i);
259  MatrixTools::add(dpijt_, s, vPowGen_[i]);
260  }
261  while (m > 0) // recover the 2^m
262  {
263  MatrixTools::mult(dpijt_, dpijt_, tmpMat_);
264  MatrixTools::copy(tmpMat_, dpijt_);
265  m--;
266  }
267  MatrixTools::scale(dpijt_, rate_);
268  MatrixTools::mult(vPowGen_[1], dpijt_, tmpMat_);
269  MatrixTools::copy(tmpMat_, dpijt_);
270  }
271  return dpijt_;
272 }
273 
274 const Matrix<double>& AbstractSubstitutionModel::getd2Pij_dt2(double t) const
275 {
276  if (isNonSingular_)
277  {
278  if (isDiagonalizable_)
279  {
280  MatrixTools::mult(rightEigenVectors_, VectorTools::sqr(rate_ * eigenValues_) * VectorTools::exp(eigenValues_ * (rate_ * t)), leftEigenVectors_, d2pijt_);
281  }
282  else
283  {
284  std::vector<double> vdia(size_);
285  std::vector<double> vup(size_ - 1);
286  std::vector<double> vlo(size_ - 1);
287  double c, s, e;
288  double l = rate_ * t;
289  for (size_t i = 0; i < size_; i++)
290  {
291  e = std::exp(eigenValues_[i] * l);
292  if (iEigenValues_[i] != 0)
293  {
294  s = std::sin(iEigenValues_[i] * l);
295  c = std::cos(iEigenValues_[i] * l);
296  vdia[i] = NumTools::sqr(rate_)
297  * ((NumTools::sqr(eigenValues_[i]) - NumTools::sqr(iEigenValues_[i])) * c
298  - 2 * eigenValues_[i] * iEigenValues_[i] * s) * e;
299  vup[i] = NumTools::sqr(rate_)
300  * ((NumTools::sqr(eigenValues_[i]) - NumTools::sqr(iEigenValues_[i])) * s
301  - 2 * eigenValues_[i] * iEigenValues_[i] * c) * e;
302  vlo[i] = -vup[i];
303  vdia[i + 1] = vdia[i]; // trick to avoid computation
304  i++;
305  }
306  else
307  {
308  if (i < size_ - 1)
309  {
310  vup[i] = 0;
311  vlo[i] = 0;
312  }
313  }
314  }
315  MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, d2pijt_);
316  }
317  }
318  else
319  {
320  MatrixTools::getId(size_, d2pijt_);
321  double s = 1.0;
322  double v = rate_ * t;
323  size_t m = 0;
324  while (v > 0.5) // r^2*A^2*exp(t*r*A)=r^2*A^2*(exp(r*t/(2^m) A))^(2^m)
325  {
326  m += 1;
327  v /= 2;
328  }
329  for (size_t i = 1; i < vPowGen_.size(); i++)
330  {
331  s *= v / static_cast<double>(i);
332  MatrixTools::add(d2pijt_, s, vPowGen_[i]);
333  }
334  while (m > 0) // recover the 2^m
335  {
336  MatrixTools::mult(d2pijt_, d2pijt_, tmpMat_);
337  MatrixTools::copy(tmpMat_, d2pijt_);
338  m--;
339  }
340  MatrixTools::scale(d2pijt_, rate_ * rate_);
341  MatrixTools::mult(vPowGen_[2], d2pijt_, tmpMat_);
342  MatrixTools::copy(tmpMat_, d2pijt_);
343  }
344  return d2pijt_;
345 }
346 
347 /******************************************************************************/
348 
349 double AbstractSubstitutionModel::getInitValue(size_t i, int state) const throw (IndexOutOfBoundsException, BadIntException)
350 {
351  if (i >= size_)
352  throw IndexOutOfBoundsException("AbstractSubstitutionModel::getInitValue", i, 0, size_ - 1);
353  if (state < 0 || !alphabet_->isIntInAlphabet(state))
354  throw BadIntException(state, "AbstractSubstitutionModel::getInitValue. Character " + alphabet_->intToChar(state) + " is not allowed in model.");
355  vector<int> states = alphabet_->getAlias(state);
356  for (size_t j = 0; j < states.size(); j++)
357  {
358  if (getAlphabetStateAsInt(i) == states[j])
359  return 1.;
360  }
361  return 0.;
362 }
363 
364 /******************************************************************************/
365 
366 void AbstractSubstitutionModel::setFreqFromData(const SequenceContainer& data, double pseudoCount)
367 {
368  map<int, int> counts;
369  SequenceContainerTools::getCounts(data, counts);
370  double t = 0;
371  map<int, double> freqs;
372 
373  for (int i = 0; i < static_cast<int>(size_); i++)
374  {
375  t += (counts[i] + pseudoCount);
376  }
377  for (int i = 0; i < static_cast<int>(size_); i++)
378  {
379  freqs[i] = (static_cast<double>(counts[i]) + pseudoCount) / t;
380  }
381 
382  // Re-compute generator and eigen values:
383  setFreq(freqs);
384 }
385 
386 /******************************************************************************/
387 
388 void AbstractSubstitutionModel::setFreq(map<int, double>& freqs)
389 {
390  for (size_t i = 0; i < size_; ++i)
391  {
392  freq_[i] = freqs[static_cast<int>(i)];
393  }
394  // Re-compute generator and eigen values:
395  updateMatrices();
396 }
397 
398 /******************************************************************************/
399 
401 {
402  vector<double> v;
403  MatrixTools::diag(generator_, v);
404  return -VectorTools::scalar<double, double>(v, freq_);
405 }
406 
407 /******************************************************************************/
408 
410  MatrixTools::scale(generator_, scale);
411 }
412 
413 /******************************************************************************/
414 
416 {
417  return rate_;
418 }
419 
420 /******************************************************************************/
421 
423 {
424  if (rate <= 0)
425  throw Exception("Bad value for rate: " + TextTools::toString(rate));
426 
427  if (hasParameter("rate"))
428  setParameterValue("rate", rate_);
429 
430  rate_ = rate;
431 }
432 
434 {
435  addParameter_(new Parameter(getNamespace() + "rate", rate_, &Parameter::R_PLUS_STAR));
436 }
437 
438 /******************************************************************************/
439 
441 {
442  RowMatrix<double> Pi;
443  MatrixTools::diag(freq_, Pi);
444  MatrixTools::mult(exchangeability_, Pi, generator_); // Diagonal elements of the exchangability matrix will be ignored.
445  // Compute diagonal elements of the generator:
446  for (size_t i = 0; i < size_; i++)
447  {
448  double lambda = 0;
449  for (size_t j = 0; j < size_; j++)
450  {
451  if (j != i)
452  lambda += generator_(i, j);
453  }
454  generator_(i, i) = -lambda;
455  }
456  // Normalization:
457  double scale = getScale();
458  MatrixTools::scale(generator_, 1. / scale);
459 
460  // Normalize exchangeability matrix too:
461  MatrixTools::scale(exchangeability_, 1. / scale);
462  // Compute diagonal elements of the exchangeability matrix:
463  for (size_t i = 0; i < size_; i++)
464  {
465  exchangeability_(i, i) = generator_(i, i) / freq_[i];
466  }
468 }
469 
470 /******************************************************************************/
471 
virtual void updateMatrices()
Diagonalize the matrix, and fill the eigenValues_, iEigenValues_, leftEigenVectors_ and rightEigenVe...
bool enableEigenDecomposition()
Tell if eigenValues and Vectors must be computed.
RowMatrix< double > exchangeability_
The exchangeability matrix of the model, defined as . When the model is reversible, this matrix is symetric.
virtual double getRate() const
Get the rate.
bool isNonSingular_
boolean value for non-singularity of rightEigenVectors_
virtual const Matrix< double > & getPij_t(double t) const
RowMatrix< double > pijt_
These ones are for bookkeeping:
RowMatrix< double > rightEigenVectors_
The matrix made of right eigen vectors (by column).
Vdouble eigenValues_
The vector of eigen values.
STL namespace.
Vdouble freq_
The vector of equilibrium frequencies.
double getInitValue(size_t i, int state) const
virtual void updateMatrices()
Compute and diagonalize the matrix, and fill the eigenValues_, leftEigenVectors_ and rightEigenVecto...
double getScale() const
Get the scalar product of diagonal elements of the generator and the frequencies vector. If the generator is normalized, then scale=1. Otherwise each element must be multiplied by 1/scale.
void setFreqFromData(const SequenceContainer &data, double pseudoCount=0)
Set equilibrium frequencies equal to the frequencies estimated from the data.
void setScale(double scale)
Multiplies the current generator by the given scale.
virtual const Matrix< double > & getdPij_dt(double t) const
virtual const Matrix< double > & getd2Pij_dt2(double t) const
RowMatrix< double > leftEigenVectors_
The matrix made of left eigen vectors (by row) if rightEigenVectors_ is non-singular.
RowMatrix< double > generator_
The generator matrix of the model.
double rate_
The rate of the model (default: 1). The generator (and all its vectorial components) is independent o...
std::vector< RowMatrix< double > > vPowGen_
vector of the powers of generator_ for Taylor development (if rightEigenVectors_ is singular)...
virtual void setRate(double rate)
Set the rate of the model (must be positive).
size_t size_
The size of the generator, i.e. the number of states.
Map the states of a given alphabet which have a model state.
Definition: StateMap.h:58
RowMatrix< double > tmpMat_
For computational issues.
bool isDiagonalizable_
boolean value for diagonalizability in R of the generator_
Vdouble iEigenValues_
The vector of the imaginary part of the eigen values.
AbstractSubstitutionModel(const Alphabet *alpha, StateMap *stateMap, const std::string &prefix)
virtual void setFreq(std::map< int, double > &)
Set equilibrium frequencies.