bpp-phyl  2.2.0
MixtureOfASubstitutionModel.cpp
Go to the documentation of this file.
1 //
2 // File: MixtureOfASubstitutionModel.cpp
3 // Created by: David Fournier, Laurent Gueguen
4 //
5 
6 /*
7  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
8 
9  This software is a computer program whose purpose is to provide classes
10  for phylogenetic data analysis.
11 
12  This software is governed by the CeCILL license under French law and
13  abiding by the rules of distribution of free software. You can use,
14  modify and/ or redistribute the software under the terms of the CeCILL
15  license as circulated by CEA, CNRS and INRIA at the following URL
16  "http://www.cecill.info".
17 
18  As a counterpart to the access to the source code and rights to copy,
19  modify and redistribute granted by the license, users are provided only
20  with a limited warranty and the software's author, the holder of the
21  economic rights, and the successive licensors have only limited
22  liability.
23 
24  In this respect, the user's attention is drawn to the risks associated
25  with loading, using, modifying and/or developing or reproducing the
26  software by the user in light of its specific status of free software,
27  that may mean that it is complicated to manipulate, and that also
28  therefore means that it is reserved for developers and experienced
29  professionals having in-depth computer knowledge. Users are therefore
30  encouraged to load and test the software's suitability as regards their
31  requirements in conditions enabling the security of their systems and/or
32  data to be ensured and, more generally, to use and operate it in the
33  same conditions as regards security.
34 
35  The fact that you are presently reading this means that you have had
36  knowledge of the CeCILL license and that you accept its terms.
37  */
38 
40 
41 #include <Bpp/Numeric/NumConstants.h>
42 #include <Bpp/Numeric/Prob/ConstantDistribution.h>
43 #include <Bpp/Exceptions.h>
44 
45 #include <string>
46 
47 using namespace bpp;
48 using namespace std;
49 
50 
52  const Alphabet* alpha,
53  SubstitutionModel* model,
54  std::map<std::string, DiscreteDistribution*> parametersDistributionsList,
55  int ffrom,
56  int tto) throw (Exception) :
57  AbstractParameterAliasable(model->getNamespace()),
58  AbstractMixedSubstitutionModel(alpha, model->getStateMap().clone(), model->getNamespace()),
59  distributionMap_(),
60  from_(ffrom),
61  to_(tto)
62 {
63  if (to_ >= int(alpha->getSize()))
64  throw BadIntegerException("Bad state in alphabet", to_);
65  if (from_ >= int(alpha->getSize()))
66  throw BadIntegerException("Bad state in alphabet", from_);
67 
68  size_t c, i;
69  string s1, s2, t;
70  map<string, DiscreteDistribution*>::iterator it;
71 
72  // Initialization of distributionMap_.
73 
74  vector<string> parnames = model->getParameters().getParameterNames();
75 
76  for (i = 0; i < model->getNumberOfParameters(); i++)
77  {
78  s1 = parnames[i];
79  s2 = model->getParameterNameWithoutNamespace(s1);
80 
81  if (parametersDistributionsList.find(s2) != parametersDistributionsList.end())
82  distributionMap_[s1] = parametersDistributionsList.find(s2)->second->clone();
83  else
84  distributionMap_[ s1] = new ConstantDistribution(model->getParameterValue(s2));
85 
86 
87  if (dynamic_cast<ConstantDistribution*>(distributionMap_[s1]) == 0)
88  distributionMap_[s1]->setNamespace(s1 + "_" + distributionMap_[s1]->getNamespace());
89  else
90  distributionMap_[s1]->setNamespace(s1 + "_");
91  }
92 
93  // Initialization of modelsContainer_.
94 
95  c = 1;
96 
97  for (it = distributionMap_.begin(); it != distributionMap_.end(); it++)
98  {
99  c *= it->second->getNumberOfCategories();
100  }
101 
102  for (i = 0; i < c; i++)
103  {
104  modelsContainer_.push_back(model->clone());
105  modelsContainer_[i]->addRateParameter();
106  modelsContainer_[i]->setNamespace(model->getNamespace());
107 
108  vProbas_.push_back(1.0 / static_cast<double>(c));
109  vRates_.push_back(1.0);
110  }
111 
112  // Initialization of parameters_.
113 
114  Parameter pm;
115  DiscreteDistribution* pd;
116 
117  for (it = distributionMap_.begin(); it != distributionMap_.end(); it++)
118  {
119  pm = model->getParameter(model->getParameterNameWithoutNamespace(getParameterNameWithoutNamespace(it->first)));
120  pd = it->second;
121 
122  if (pm.hasConstraint())
123  pd->restrictToConstraint(*pm.getConstraint());
124 
125  if (!dynamic_cast<ConstantDistribution*>(it->second))
126  {
127  const ParameterList pl = pd->getParameters();
128  for (i = 0; i != it->second->getNumberOfParameters(); i++)
129  {
130  addParameter_(pl[i].clone());
131  }
132  }
133  else
134  addParameter_(new Parameter(it->first, pd->getCategory(0), (pd->getParameter("value").getConstraint()) ? pd->getParameter("value").getConstraint()->clone() : 0, true));
135  }
136  updateMatrices();
137 }
138 
140  AbstractParameterAliasable(msm),
142  distributionMap_(),
143  from_(msm.from_),
144  to_(msm.to_)
145 {
146  map<string, DiscreteDistribution*>::const_iterator it;
147 
148  for (it = msm.distributionMap_.begin(); it != msm.distributionMap_.end(); it++)
149  {
150  distributionMap_[it->first] = it->second->clone();
151  }
152 }
153 
155 {
156  AbstractParameterAliasable::operator=(msm);
158  from_ = msm.from_;
159  to_ = msm.to_;
160 
161  // Clear existing containers:
162  distributionMap_.clear();
163 
164  // Now copy new containers:
165  map<string, DiscreteDistribution*>::const_iterator it;
166  for (it = msm.distributionMap_.begin(); it != msm.distributionMap_.end(); it++)
167  {
168  distributionMap_[it->first] = it->second->clone();
169  }
170  return *this;
171 }
172 
173 
175 {
176  map<string, DiscreteDistribution*>::iterator it;
177 
178  for (it = distributionMap_.begin(); it != distributionMap_.end(); it++)
179  {
180  delete it->second;
181  }
182 }
183 
184 const DiscreteDistribution* MixtureOfASubstitutionModel::getDistribution(std::string& parName) const
185 {
186  if (distributionMap_.find(parName) != distributionMap_.end())
187  return distributionMap_.find(parName)->second;
188  else
189  return NULL;
190 }
191 
193 {
194  string s, t;
195  size_t i, j, l;
196  double d;
197  ParameterList pl;
198  map<string, DiscreteDistribution*>::iterator it;
199 
200  // Update of distribution parameters from the parameters_ member
201  // data. (reverse operation compared to what has been done in the
202  // constructor).
203  // vector<string> v=getParameters().getParameterNames();
204 
205  for (it = distributionMap_.begin(); it != distributionMap_.end(); it++)
206  {
207  if (dynamic_cast<ConstantDistribution*>(it->second) == NULL)
208  {
209  vector<string> vDistnames = it->second->getParameters().getParameterNames();
210  for (i = 0; i < it->second->getNumberOfParameters(); i++)
211  {
212  d = getParameterValue(getParameterNameWithoutNamespace(vDistnames[i]));
213  pl.addParameter(Parameter(vDistnames[i], d));
214  }
215  it->second->matchParametersValues(pl);
216  pl.reset();
217  }
218  else
219  {
220  t = it->second->getNamespace();
221  d = getParameter(getParameterNameWithoutNamespace(t.substr(0, t.length() - 1))).getValue();
222  it->second->setParameterValue("value", d);
223  }
224  }
225 
226  for (i = 0; i < modelsContainer_.size(); i++)
227  {
228  vProbas_[i] = 1;
229  j = i;
230  for (it = distributionMap_.begin(); it != distributionMap_.end(); it++)
231  {
232  s = it->first;
233  l = j % it->second->getNumberOfCategories();
234 
235  d = it->second->getCategory(l);
236  vProbas_[i] *= it->second->getProbability(l);
237  if (pl.hasParameter(s))
238  pl.setParameterValue(s, d);
239  else
240  pl.addParameter(Parameter(s, d));
241 
242  j = j / it->second->getNumberOfCategories();
243  }
244 
245  modelsContainer_[i]->matchParametersValues(pl);
246  }
247 
248  // setting the equilibrium freqs
249  for (i = 0; i < getNumberOfStates(); i++)
250  {
251  freq_[i] = 0;
252  for (j = 0; j < modelsContainer_.size(); j++)
253  {
254  freq_[i] += vProbas_[j] * modelsContainer_[j]->freq(i);
255  }
256  }
257 
258  // setting the rates, if to_ & from_ are different from -1
259 
260  if (to_ >= 0 && from_ >= 0)
261  {
262  Vdouble vd;
263 
264  for (j = 0; j < modelsContainer_.size(); j++)
265  {
266  vd.push_back(1 / getNModel(j)->Qij(static_cast<size_t>(from_), static_cast<size_t>(to_)));
267  }
268 
269  setVRates(vd);
270  }
271 }
272 
273 void MixtureOfASubstitutionModel::setFreq(std::map<int, double>& m)
274 {
275  modelsContainer_[0]->setFreq(m);
276  matchParametersValues(modelsContainer_[0]->getParameters());
277 }
278 
280 {
281  vector<string> parnames = modelsContainer_[0]->getParameters().getParameterNames();
282  std::map<std::string, size_t> msubn;
283  map<string, DiscreteDistribution*>::const_iterator it;
284 
285  StringTokenizer st(desc, ",");
286  while (st.hasMoreToken())
287  {
288  string param = st.nextToken();
289  string::size_type index = param.rfind("_");
290  if (index == string::npos)
291  throw Exception("MixtureOfASubstitutionModel::getSubmodelNumbers parameter description should contain a number" + param);
292  msubn[param.substr(0, index)] = TextTools::to<size_t>(param.substr(index + 1, 4)) - 1;
293  }
294 
295  Vint submodnb;
296  size_t i, j, l;
297  string s;
298 
299  bool nameok = false;
300 
301  for (i = 0; i < modelsContainer_.size(); i++)
302  {
303  j = i;
304  for (it = distributionMap_.begin(); it != distributionMap_.end(); it++)
305  {
306  s = it->first;
307  l = j % it->second->getNumberOfCategories();
308 
309  if (msubn.find(s) != msubn.end())
310  {
311  nameok = true;
312  if (msubn[s] != l)
313  break;
314  }
315 
316  j = j / it->second->getNumberOfCategories();
317  }
318  if (nameok && it == distributionMap_.end())
319  submodnb.push_back(static_cast<int>(i));
320  }
321 
322  return submodnb;
323 }
Vint getSubmodelNumbers(std::string &desc) const
Interface for all substitution models.
MixtureOfASubstitutionModel & operator=(const MixtureOfASubstitutionModel &)
std::map< std::string, DiscreteDistribution * > distributionMap_
STL namespace.
virtual void setVRates(const Vdouble &vd)
Sets the rates of the submodels to be proportional to a given vector, with the constraint that the me...
Vdouble freq_
The vector of equilibrium frequencies.
Partial implementation for Mixed Substitution models, defined as a mixture of "simple" substitution m...
void updateMatrices()
Diagonalize the matrix, and fill the eigenValues_, iEigenValues_, leftEigenVectors_ and rightEigenVe...
AbstractMixedSubstitutionModel & operator=(const AbstractMixedSubstitutionModel &)
Substitution models defined as a mixture of nested substitution models.
const DiscreteDistribution * getDistribution(std::string &parName) const
returns the DiscreteDistribution associated with a given parameter name.
double Qij(size_t i, size_t j) const
This function can not be applied here, so it is defined to prevent wrong usage.
virtual const SubstitutionModel * getNModel(size_t i) const
Returns a specific model from the mixture.
MixtureOfASubstitutionModel(const Alphabet *alpha, SubstitutionModel *model, std::map< std::string, DiscreteDistribution *> parametersDistributionsList, int ffrom=-1, int tto=-1)
void setFreq(std::map< int, double > &)
sets the eq frequencies of the first nested model, and adapts the parameters at best to it (surely th...
virtual size_t getNumberOfStates() const
From SubstitutionModel interface.
std::vector< SubstitutionModel * > modelsContainer_
vector of pointers to SubstitutionModels.
std::vector< double > vProbas_
vector of the probabilities of the models