bpp-phyl  2.2.0
AbstractWordSubstitutionModel.cpp
Go to the documentation of this file.
1 //
2 // File: AbstractWordSubstitutionModel.cpp
3 // Created by: Laurent Gueguen
4 // Created on: Jan 2009
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9  This software is a computer program whose purpose is to provide classes
10  for phylogenetic data analysis.
11 
12  This software is governed by the CeCILL license under French law and
13  abiding by the rules of distribution of free software. You can use,
14  modify and/ or redistribute the software under the terms of the CeCILL
15  license as circulated by CEA, CNRS and INRIA at the following URL
16  "http://www.cecill.info".
17 
18  As a counterpart to the access to the source code and rights to copy,
19  modify and redistribute granted by the license, users are provided only
20  with a limited warranty and the software's author, the holder of the
21  economic rights, and the successive licensors have only limited
22  liability.
23 
24  In this respect, the user's attention is drawn to the risks associated
25  with loading, using, modifying and/or developing or reproducing the
26  software by the user in light of its specific status of free software,
27  that may mean that it is complicated to manipulate, and that also
28  therefore means that it is reserved for developers and experienced
29  professionals having in-depth computer knowledge. Users are therefore
30  encouraged to load and test the software's suitability as regards their
31  requirements in conditions enabling the security of their systems and/or
32  data to be ensured and, more generally, to use and operate it in the
33  same conditions as regards security.
34 
35  The fact that you are presently reading this means that you have had
36  knowledge of the CeCILL license and that you accept its terms.
37  */
38 
40 
41 #include <Bpp/Numeric/Matrix/MatrixTools.h>
42 #include <Bpp/Numeric/Matrix/EigenValue.h>
43 #include <Bpp/Numeric/VectorTools.h>
44 
45 // From SeqLib:
46 #include <Bpp/Seq/Alphabet/WordAlphabet.h>
47 #include <Bpp/Seq/Alphabet/AlphabetTools.h>
48 #include <Bpp/Seq/Container/SequenceContainerTools.h>
49 
50 #include <Bpp/App/ApplicationTools.h>
51 
52 using namespace bpp;
53 
54 // From the STL:
55 #include <cmath>
56 #include <complex>
57 
58 using namespace std;
59 
60 /******************************************************************************/
61 
62 //TODO: jdutheil on 24/09/14: we should define a class "vector of models", insuring they share the same alphabet and StateMap for instance.
63 //This is the only way to avoid a segfault in case the user provides a vector of models of length 0.
65  const std::vector<SubstitutionModel*>& modelVector,
66  const std::string& prefix) :
67  AbstractParameterAliasable(prefix),
68  AbstractSubstitutionModel(AbstractWordSubstitutionModel::extractAlph(modelVector), modelVector[0]->getStateMap().clone(), prefix),
69  new_alphabet_ (true),
70  VSubMod_ (),
71  VnestedPrefix_(),
72  Vrate_ (modelVector.size())
73 {
75  size_t i, j;
76  size_t n = modelVector.size();
77 
78  // test whether two models are identical
79 
80  bool flag = 0;
81  i = 0;
82  j = 1;
83  while (!flag && i < (n - 1))
84  {
85  if (modelVector[i] == modelVector[j])
86  flag = 1;
87  else
88  {
89  j++;
90  if (j == n)
91  {
92  i++;
93  j = i + 1;
94  }
95  }
96  }
97 
98  if (!flag)
99  {
100  for (i = 0; i < n; i++)
101  {
102  VSubMod_.push_back(modelVector[i]);
103  VnestedPrefix_.push_back(modelVector[i]->getNamespace());
104  VSubMod_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + VnestedPrefix_[i]);
105  addParameters_(VSubMod_[i]->getParameters());
106  }
107  }
108  else
109  {
110  string t = "";
111  for (i = 0; i < n; i++)
112  {
113  VSubMod_.push_back(modelVector[0]);
114  VnestedPrefix_.push_back(modelVector[0]->getNamespace());
115  t += TextTools::toString(i + 1);
116  }
117  VSubMod_[0]->setNamespace(prefix + t + "_" + VnestedPrefix_[0]);
118  addParameters_(VSubMod_[0]->getParameters());
119  }
120 
121  for (i = 0; i < n; i++)
122  {
123  Vrate_[i] = 1.0 / static_cast<double>(n);
124  }
125 }
126 
128  const Alphabet* alph,
129  StateMap* stateMap,
130  const std::string& prefix) :
131  AbstractParameterAliasable(prefix),
132  AbstractSubstitutionModel(alph, stateMap, prefix),
133  new_alphabet_ (false),
134  VSubMod_ (),
135  VnestedPrefix_(),
136  Vrate_ (0)
137 {
139 }
140 
142  SubstitutionModel* pmodel,
143  unsigned int num,
144  const std::string& prefix) :
145  AbstractParameterAliasable(prefix),
146  AbstractSubstitutionModel(new WordAlphabet(pmodel->getAlphabet(), num), pmodel->getStateMap().clone(), prefix),
147  new_alphabet_ (true),
148  VSubMod_ (),
149  VnestedPrefix_(),
150  Vrate_ (num)
151 {
153  size_t i;
154 
155  string t = "";
156  for (i = 0; i < num; i++)
157  {
158  VSubMod_.push_back(pmodel);
159  VnestedPrefix_.push_back(pmodel->getNamespace());
160  Vrate_[i] = 1.0 / num;
161  t += TextTools::toString(i + 1);
162  }
163 
164  pmodel->setNamespace(prefix + t + "_" + VnestedPrefix_[0]);
165  addParameters_(pmodel->getParameters());
166 }
167 
169  const AbstractWordSubstitutionModel& wrsm) :
170  AbstractParameterAliasable(wrsm),
172  new_alphabet_ (wrsm.new_alphabet_),
173  VSubMod_ (),
174  VnestedPrefix_(wrsm.VnestedPrefix_),
175  Vrate_ (wrsm.Vrate_)
176 {
177  size_t i;
178  size_t num = wrsm.VSubMod_.size();
179 
180  if (wrsm.new_alphabet_)
181  alphabet_ = new WordAlphabet(*(dynamic_cast<const WordAlphabet*>(wrsm.getAlphabet())));
182 
183  SubstitutionModel* pSM = 0;
184  if ((num > 1) & (wrsm.VSubMod_[0] == wrsm.VSubMod_[1]))
185  pSM = wrsm.VSubMod_[0]->clone();
186 
187  for (i = 0; i < num; i++)
188  {
189  VSubMod_.push_back(pSM ? pSM : wrsm.VSubMod_[i]->clone());
190  }
191 }
192 
194  const AbstractWordSubstitutionModel& model)
195 {
196  AbstractParameterAliasable::operator=(model);
200  Vrate_ = model.Vrate_;
201 
202  size_t i;
203  size_t num = model.VSubMod_.size();
204 
205  if (model.new_alphabet_)
206  alphabet_ = new WordAlphabet(*(dynamic_cast<const WordAlphabet*>(model.getAlphabet())));
207 
208  SubstitutionModel* pSM = 0;
209  if ((num > 1) & (model.VSubMod_[0] == model.VSubMod_[1]))
210  pSM = model.VSubMod_[0]->clone();
211 
212  for (i = 0; i < num; i++)
213  {
214  VSubMod_[i] = (pSM ? pSM : model.VSubMod_[i]->clone());
215  }
216 
217  return *this;
218 }
219 
221 {
222  if ((VSubMod_.size() > 1) && (VSubMod_[0] == VSubMod_[1]))
223  {
224  if (VSubMod_[0])
225  delete VSubMod_[0];
226  }
227  else
228  for (size_t i = 0; i < VSubMod_.size(); i++)
229  {
230  if (VSubMod_[i])
231  delete VSubMod_[i];
232  }
233  if (new_alphabet_)
234  delete alphabet_;
235 }
236 
238 {
239  return getAlphabet()->getSize();
240 }
241 
242 Alphabet* AbstractWordSubstitutionModel::extractAlph(const vector<SubstitutionModel*>& modelVector)
243 {
244  size_t i;
245 
246  vector<const Alphabet*> vAlph;
247 
248  for (i = 0; i < modelVector.size(); i++)
249  {
250  vAlph.push_back(modelVector[i]->getAlphabet());
251  }
252 
253  return new WordAlphabet(vAlph);
254 }
255 
256 void AbstractWordSubstitutionModel::setNamespace(const std::string& prefix)
257 {
258  AbstractSubstitutionModel::setNamespace(prefix);
259 
260  if (VSubMod_.size() < 2 || VSubMod_[0] == VSubMod_[1])
261  {
262  string t = "";
263  for (size_t i = 0; i < VSubMod_.size(); i++)
264  {
265  t += TextTools::toString(i + 1);
266  }
267  VSubMod_[0]->setNamespace(prefix + t + "_" + VnestedPrefix_[0]);
268  }
269  else
270  {
271  for (size_t i = 0; i < VSubMod_.size(); i++)
272  {
273  VSubMod_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + VnestedPrefix_[i]);
274  }
275  }
276 }
277 
278 /******************************************************************************/
279 
281 {
282  // First we update position specific models. This need to be done
283  // here and not in fireParameterChanged, as some parameter aliases
284  // might have been defined and need to be resolved first.
285  if (VSubMod_.size() < 2 || VSubMod_[0] == VSubMod_[1])
286  VSubMod_[0]->matchParametersValues(getParameters());
287  else
288  for (size_t i = 0; i < VSubMod_.size(); i++)
289  {
290  VSubMod_[i]->matchParametersValues(getParameters());
291  }
292 
293  size_t nbmod = VSubMod_.size();
294  size_t salph = getNumberOfStates();
295  size_t nbStop = 0;
296  vector<bool> vnull; // vector of the indices of lines with only zeros
297 
298  // Generator
299 
301  {
302  size_t i, j, n, l, k, m;
303 
304  vector<size_t> vsize;
305 
306  for (k = 0; k < nbmod; k++)
307  {
308  vsize.push_back(VSubMod_[k]->getNumberOfStates());
309  }
310 
311  RowMatrix<double> gk, exch;
312 
313  m = 1;
314 
315  for (k = nbmod; k > 0; k--)
316  {
317  gk = VSubMod_[k - 1]->getGenerator();
318  for (i = 0; i < vsize[k - 1]; i++)
319  {
320  for (j = 0; j < vsize[k - 1]; j++)
321  {
322  if (i != j)
323  {
324  n = 0;
325  while (n < salph)
326  { // loop on prefix
327  for (l = 0; l < m; l++)
328  { // loop on suffix
329  generator_(n + i * m + l, n + j * m + l) = gk(i, j) * Vrate_[k - 1];
330  }
331  n += m * vsize[k - 1];
332  }
333  }
334  }
335  }
336  m *= vsize[k - 1];
337  }
338  }
339 
340  // modification of generator_ and freq_
341 
343 
344  size_t i, j;
345  double x;
346 
347  for (i = 0; i < salph; i++)
348  {
349  x = 0;
350  for (j = 0; j < salph; j++)
351  {
352  if (j != i)
353  x += generator_(i, j);
354  }
355  generator_(i, i) = -x;
356  }
357 
358  // at that point generator_ and freq_ are done for models without
359  // enableEigenDecomposition
360 
361  // Eigen values:
362 
364  {
365  for (i = 0; i < salph; i++)
366  {
367  bool flag = true;
368  for (j = 0; j < salph; j++)
369  {
370  if ((i != j) && abs(generator_(i, j)) > NumConstants::TINY())
371  {
372  flag = false;
373  break;
374  }
375  }
376  if (flag)
377  nbStop++;
378  vnull.push_back(flag);
379  }
380 
381  if (nbStop != 0)
382  {
383  size_t gi = 0, gj = 0;
384 
385  RowMatrix<double> gk;
386 
387  gk.resize(salph - nbStop, salph - nbStop);
388  for (i = 0; i < salph; i++)
389  {
390  if (!vnull[i])
391  {
392  gj = 0;
393  for (j = 0; j < salph; j++)
394  {
395  if (!vnull[j])
396  {
397  gk(i - gi, j - gj) = generator_(i, j);
398  }
399  else
400  gj++;
401  }
402  }
403  else
404  gi++;
405  }
406 
407  EigenValue<double> ev(gk);
408  eigenValues_ = ev.getRealEigenValues();
409  iEigenValues_ = ev.getImagEigenValues();
410 
411  for (i = 0; i < nbStop; i++)
412  {
413  eigenValues_.push_back(0);
414  iEigenValues_.push_back(0);
415  }
416 
417  RowMatrix<double> rev = ev.getV();
418  rightEigenVectors_.resize(salph, salph);
419  gi = 0;
420  for (i = 0; i < salph; i++)
421  {
422  if (vnull[i])
423  {
424  gi++;
425  for (j = 0; j < salph; j++)
426  {
427  rightEigenVectors_(i, j) = 0;
428  }
429 
430  rightEigenVectors_(i, salph - nbStop + gi - 1) = 1;
431  }
432  else
433  {
434  for (j = 0; j < salph - nbStop; j++)
435  {
436  rightEigenVectors_(i, j) = rev(i - gi, j);
437  }
438 
439  for (j = salph - nbStop; j < salph; j++)
440  {
441  rightEigenVectors_(i, j) = 0;
442  }
443  }
444  }
445  }
446  else
447  {
448  EigenValue<double> ev(generator_);
449  eigenValues_ = ev.getRealEigenValues();
450  iEigenValues_ = ev.getImagEigenValues();
451  rightEigenVectors_ = ev.getV();
452  nbStop = 0;
453  }
454 
455  try
456  {
457  MatrixTools::inv(rightEigenVectors_, leftEigenVectors_);
458 
459  // is it diagonalizable ?
460 
461  isDiagonalizable_ = true;
462  for (i = 0; i < size_ && isDiagonalizable_; i++)
463  {
464  if (abs(iEigenValues_[i]) > NumConstants::SMALL())
465  isDiagonalizable_ = false;
466  }
467 
468  // is it singular?
469 
470  // looking for the 0 eigenvector for which the non-stop right
471  // eigen vector elements are equal.
472  //
473 
474  if (isDiagonalizable_)
475  {
476  size_t nulleigen = 0;
477  double val;
478 
479  isNonSingular_ = false;
480  while (nulleigen < salph - nbStop)
481  {
482  if ((abs(eigenValues_[nulleigen]) < NumConstants::SMALL()) && (abs(iEigenValues_[nulleigen]) < NumConstants::SMALL()))
483  {
484  i = 0;
485  while (vnull[i])
486  i++;
487 
488  val = rightEigenVectors_(i, nulleigen);
489  i++;
490  while (i < salph)
491  {
492  if (!vnull[i])
493  {
494  if (abs(rightEigenVectors_(i, nulleigen) - val) > NumConstants::SMALL())
495  break;
496  }
497  i++;
498  }
499 
500  if (i < salph)
501  nulleigen++;
502  else
503  {
504  isNonSingular_ = true;
505  break;
506  }
507  }
508  else
509  nulleigen++;
510  }
511 
512  if (isNonSingular_)
513  {
514  eigenValues_[nulleigen] = 0; // to avoid approximation errors on long long branches
515  iEigenValues_[nulleigen] = 0; // to avoid approximation errors on long long branches
516 
517  for (i = 0; i < salph; i++)
518  freq_[i] = leftEigenVectors_(nulleigen, i);
519 
520  x = 0;
521  for (i = 0; i < salph; i++)
522  x += freq_[i];
523 
524  for (i = 0; i < salph; i++)
525  freq_[i] /= x;
526  }
527 
528  else
529  {
530  ApplicationTools::displayMessage("Unable to find eigenvector for eigenvalue 1. Taylor series used instead.");
531  isDiagonalizable_ = false;
532  }
533  }
534  }
535 
536  // if rightEigenVectors_ is singular
537  catch (ZeroDivisionException& e)
538  {
539  ApplicationTools::displayMessage("Singularity during diagonalization. Taylor series used instead.");
540  isNonSingular_ = false;
541  isDiagonalizable_ = false;
542  }
543 
544  if (!isNonSingular_)
545  {
546  double min = generator_(0, 0);
547  for (i = 1; i < salph; i++)
548  {
549  if (min > generator_(i, i))
550  min = generator_(i, i);
551  }
552 
553  MatrixTools::scale(generator_, -1 / min);
554 
555  if (vPowGen_.size() == 0)
556  vPowGen_.resize(30);
557 
558  MatrixTools::getId(salph, tmpMat_); // to compute the equilibrium frequency (Q+Id)^256
559  MatrixTools::add(tmpMat_, generator_);
560  MatrixTools::pow(tmpMat_, 256, vPowGen_[0]);
561 
562  for (i = 0; i < salph; i++)
563  {
564  freq_[i] = vPowGen_[0](0, i);
565  }
566 
567  MatrixTools::getId(salph, vPowGen_[0]);
568  }
569 
570  // normalization
571 
572  x = 0;
573  for (i = 0; i < salph; i++)
574  x += freq_[i] * generator_(i, i);
575 
576  MatrixTools::scale(generator_, -1. / x);
577  for (i = 0; i < salph; i++)
578  {
579  eigenValues_[i] /= -x;
580  iEigenValues_[i] /= -x;
581  }
582 
583  if (!isNonSingular_)
584  MatrixTools::Taylor(generator_, 30, vPowGen_);
585  }
586 
587  // compute the exchangeability_
588 
589  for (i = 0; i < size_; i++)
590  for (j = 0; j < size_; j++)
591  exchangeability_(i, j) = generator_(i, j) / freq_[j];
592 
593 }
594 
595 void AbstractWordSubstitutionModel::setFreq(std::map<int, double>& freqs)
596 {
597  map<int, double> tmpFreq;
598  size_t nbmod = VSubMod_.size();
599 
600  size_t i, j, s, k, d, size;
601  d = size = getNumberOfStates();
602 
603  if (VSubMod_.size() < 2 || VSubMod_[0] == VSubMod_[1])
604  {
605  s = VSubMod_[0]->getAlphabet()->getSize();
606  for (j = 0; j < s; j++)
607  {
608  tmpFreq[static_cast<int>(j)] = 0;
609  }
610 
611  for (i = 0; i < nbmod; i++)
612  {
613  d /= s;
614  for (k = 0; k < size; k++)
615  {
616  tmpFreq[static_cast<int>((k / d) % s)] += freqs[static_cast<int>(k)];
617  }
618  }
619 
620  for (k = 0; k < s; k++)
621  {
622  tmpFreq[static_cast<int>(k)] /= static_cast<double>(nbmod);
623  }
624 
625  VSubMod_[0]->setFreq(tmpFreq);
626  matchParametersValues(VSubMod_[0]->getParameters());
627  }
628  else
629  for (i = 0; i < nbmod; i++)
630  {
631  tmpFreq.clear();
632  s = VSubMod_[i]->getAlphabet()->getSize();
633  d /= s;
634  for (j = 0; j < s; j++)
635  {
636  tmpFreq[static_cast<int>(j)] = 0;
637  }
638  for (k = 0; k < size; k++)
639  {
640  tmpFreq[static_cast<int>((k / d) % s)] += freqs[static_cast<int>(k)];
641  }
642  VSubMod_[i]->setFreq(tmpFreq);
643  matchParametersValues(VSubMod_[i]->getParameters());
644  }
645 }
Interface for all substitution models.
bool enableEigenDecomposition()
Tell if eigenValues and Vectors must be computed.
RowMatrix< double > exchangeability_
The exchangeability matrix of the model, defined as . When the model is reversible, this matrix is symetric.
bool isNonSingular_
boolean value for non-singularity of rightEigenVectors_
RowMatrix< double > rightEigenVectors_
The matrix made of right eigen vectors (by column).
Vdouble eigenValues_
The vector of eigen values.
static Alphabet * extractAlph(const std::vector< SubstitutionModel *> &modelVector)
STL namespace.
Vdouble freq_
The vector of equilibrium frequencies.
virtual size_t getNumberOfStates() const
Get the number of states.
AbstractSubstitutionModel & operator=(const AbstractSubstitutionModel &model)
Partial implementation of the SubstitutionModel interface.
AbstractWordSubstitutionModel(const std::vector< SubstitutionModel *> &modelVector, const std::string &prefix)
Build a new AbstractWordSubstitutionModel object from a vector of pointers to SubstitutionModels.
const Alphabet * alphabet_
The alphabet relevant to this model.
Abstract Basal class for words of substitution models.
RowMatrix< double > leftEigenVectors_
The matrix made of left eigen vectors (by row) if rightEigenVectors_ is non-singular.
AbstractWordSubstitutionModel & operator=(const AbstractWordSubstitutionModel &)
RowMatrix< double > generator_
The generator matrix of the model.
std::vector< RowMatrix< double > > vPowGen_
vector of the powers of generator_ for Taylor development (if rightEigenVectors_ is singular)...
virtual void completeMatrices()=0
Called by updateMatrices to handle specific modifications for inheriting classes. ...
size_t size_
The size of the generator, i.e. the number of states.
virtual void setFreq(std::map< int, double > &freqs)
Estimation of the parameters of the models so that the equilibrium frequencies match the given ones...
void updateMatrices()
Diagonalize the matrix, and fill the eigenValues_, iEigenValues_, leftEigenVectors_ and rightEigenVe...
bool new_alphabet_
boolean flag to check if a specific WordAlphabet has been built
Map the states of a given alphabet which have a model state.
Definition: StateMap.h:58
RowMatrix< double > tmpMat_
For computational issues.
bool isDiagonalizable_
boolean value for diagonalizability in R of the generator_
Vdouble iEigenValues_
The vector of the imaginary part of the eigen values.
std::vector< SubstitutionModel * > VSubMod_