bpp-phyl  2.2.0
1 //
2 // File: Coala.cpp
3 // Created by: Mathieu Groussin
4 // Created on: Sun Mar 13 12:00:00 2011
5 //
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
41 #include "Coala.h"
43 #include <Bpp/Io/FileTools.h>
44 #include <Bpp/Text/TextTools.h>
45 #include <Bpp/Text/StringTokenizer.h>
46 #include <Bpp/App/ApplicationTools.h>
47 #include <Bpp/Numeric/VectorTools.h>
48 #include <Bpp/Numeric/Matrix/MatrixTools.h>
49 #include <Bpp/Numeric/Stat/Mva/CorrespondenceAnalysis.h>
51 #include <Bpp/Seq/SequenceTools.h>
53 using namespace bpp;
55 // From the STL:
56 #include <iostream>
57 #include <fstream>
58 #include <string>
60 using namespace std;
63 /******************************************************************************/
66  const ProteicAlphabet* alpha,
67  const ProteinSubstitutionModel& model,
68  unsigned int nbAxes,
69  bool param) :
70  AbstractParameterAliasable("Coala."),
72  AbstractReversibleSubstitutionModel(alpha, new CanonicalStateMap(alpha, false), "Coala."),
73  CoalaCore(nbAxes, model.getName()),
74  init_(true),
75  nbrOfAxes_(nbAxes),
76  exch_(model.getName()),
77  file_(),
78  param_(param)
79 {
80  setNamespace(getName() + ".");
82  // Setting the exchangeability matrix
85 }
87 /******************************************************************************/
89 void Coala::readFromFile(string& file)
90 {
91  ifstream in(file.c_str(), ios::in);
92  // Read exchangeability matrix:
93  for (unsigned int i = 1; i < 20; i++)
94  {
95  string line = FileTools::getNextLine(in);
96  StringTokenizer st(line);
97  for (unsigned int j = 0; j < i; j++)
98  {
99  double s = TextTools::toDouble(st.nextToken());
100  exchangeability_(i, j) = exchangeability_(j, i) = s;
101  }
102  }
104  // Now build diagonal of the exchangeability matrix:
105  for (unsigned int i = 0; i < 20; i++)
106  {
107  double sum = 0;
108  for (unsigned int j = 0; j < 20; j++)
109  {
110  if (j != i)
111  sum += exchangeability_(i, j);
112  }
113  exchangeability_(i, i) = -sum;
114  }
116  // Closing stream:
117  in.close();
118 }
121 /******************************************************************************/
123 {
124  // Computes the equilibrium frequencies from a set of coordinates along the principal axes of the COA.
125  if (init_)
126  init_ = false;
127  else
128  {
129  // We get the coordinates:
130  vector<double> coord;
131  for (unsigned int i = 0; i < nbrOfAxes_; i++)
132  {
133  coord.push_back(getParameter("AxPos" + TextTools::toString(i)).getValue());
134  }
136  // Now, frequencies are computed from the vector of coordinates and the transpose of the principal axes matrix (P_):
137  vector<double> tmpFreqs;
138  tmpFreqs = prodMatrixVector(P_, coord);
139  for (unsigned int i = 0; i < tmpFreqs.size(); i++)
140  {
141  tmpFreqs[i] = (tmpFreqs[i] + 1) * colWeights_[i];
142  }
143  freq_ = tmpFreqs;
145  // Frequencies are not allowed to be lower than 10^-3 or higher than 0.5:
146  bool norm = false;
147  for (unsigned int i = 0; i < 20; i++)
148  {
149  if (freq_[i] < 0.001)
150  {
151  norm = true;
152  freq_[i] = 0.001;
153  }
154  if (freq_[i] > 0.2)
155  {
156  norm = true;
157  freq_[i] = 0.2;
158  }
159  }
160  if (norm == true)
161  {
162  double s = VectorTools::sum(freq_);
163  for (size_t i = 0; i < 20; i++)
164  {
165  freq_[i] = freq_[i] / s;
166  }
167  }
168  }
169 }
171 /******************************************************************************/
174 {
177 }
179 /******************************************************************************/
181 void Coala::setFreqFromData(const SequenceContainer& data, double pseudoCount)
182 {
183  // Compute the COA from the observed frequencies, add the axis position parameters and update the Markov matrix
184  ParameterList pList = computeCOA(data, param_);
185  addParameters_(pList);
186  updateMatrices();
187 }
189 /******************************************************************************/
RowMatrix< double > P_
Definition: CoalaCore.h:72
RowMatrix< double > exchangeability_
The exchangeability matrix of the model, defined as . When the model is reversible, this matrix is symetric.
virtual const Matrix< double > & getExchangeabilityMatrix() const =0
This class implements a state map where all resolved states are modeled.
Definition: StateMap.h:161
void updateMatrices()
Compute and diagonalize the matrix, and fill the eigenValues_, leftEigenVectors_ and rightEigenVecto...
Definition: Coala.cpp:173
Specialized interface for protein substitution model.
STL namespace.
Vdouble freq_
The vector of equilibrium frequencies.
Coala(const ProteicAlphabet *alpha, const ProteinSubstitutionModel &model, unsigned int nbAxes=0, bool param=true)
Definition: Coala.cpp:65
unsigned int nbrOfAxes_
Definition: Coala.h:79
virtual void updateMatrices()
Compute and diagonalize the matrix, and fill the eigenValues_, leftEigenVectors_ and rightEigenVecto...
std::vector< double > colWeights_
Definition: CoalaCore.h:74
Partial implementation of the ReversibleSubstitutionModel interface.
bool init_
Definition: Coala.h:78
void computeEquilibriumFrequencies()
Definition: Coala.cpp:122
std::vector< double > prodMatrixVector(RowMatrix< double > &P, std::vector< double > &V)
Definition: CoalaCore.cpp:176
void setFreqFromData(const SequenceContainer &data, double pseudoCount=0)
Set equilibrium frequencies equal to the frequencies estimated from the data.
Definition: Coala.cpp:181
void readFromFile(std::string &file)
Definition: Coala.cpp:89
std::string getName() const
Get the name of the model.
Definition: Coala.h:100
ParameterList computeCOA(const SequenceContainer &data, bool param=true)
Definition: CoalaCore.cpp:77
bool param_
Definition: Coala.h:82
This class is the core class inherited by the Coala class. COaLA is a branch-heterogeneous amino-acid...
Definition: CoalaCore.h:66