bpp-phyl  2.2.0
BppORateDistributionFormat.cpp
Go to the documentation of this file.
1 //
2 // File: BppORateDistributionFormat.cpp
3 // Created by: Laurent Guéguen and Julien Dutheil
4 // Created on: Fri 16 november 2012, at 13:44
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
41 #include "../Model/RateDistribution/ConstantRateDistribution.h"
42 #include "../Model/RateDistribution/GammaDiscreteRateDistribution.h"
43 #include "../Model/RateDistribution/GaussianDiscreteRateDistribution.h"
44 #include "../Model/RateDistribution/ExponentialDiscreteRateDistribution.h"
45 
46 //From bpp-core:
47 #include <Bpp/Text/KeyvalTools.h>
48 #include <Bpp/Numeric/Prob/InvariantMixedDiscreteDistribution.h>
49 #include <Bpp/Numeric/Prob/SimpleDiscreteDistribution.h>
50 #include <Bpp/Numeric/Prob/MixtureOfDiscreteDistributions.h>
51 #include <Bpp/Io/BppOParametrizableFormat.h>
52 
53 using namespace bpp;
54 
55 // From the STL:
56 #include <iomanip>
57 
58 using namespace std;
59 
60 
61 DiscreteDistribution* BppORateDistributionFormat::read(
62  const std::string& distDescription,
63  bool parseArguments)
64 {
65  unparsedArguments_.clear();
66  string distName;
67  map<string, string> args;
68  KeyvalTools::parseProcedure(distDescription, distName, args);
69  auto_ptr<DiscreteDistribution> rDist;
70 
71  if (distName == "Uniform")
72  throw Exception("BppO Warning: Uniform distribution is deprecated, use Constant instead.");
73 
74  if ((distName == "InvariantMixed") || (distName == "Invariant"))
75  {
76  // We have to parse the nested distribution first:
77  string nestedDistDescription = args["dist"];
78  if (TextTools::isEmpty(nestedDistDescription))
79  throw Exception("BppORateDistributionFormat::read. Missing argument 'dist' for distribution 'Invariant'.");
80  if (verbose_)
81  ApplicationTools::displayResult("Invariant Mixed distribution", distName );
82  BppORateDistributionFormat nestedReader(false);
83  DiscreteDistribution* nestedDistribution = nestedReader.read(nestedDistDescription, false);
84  map<string, string> unparsedArgumentsNested(nestedReader.getUnparsedArguments());
85 
86  // Now we create the Invariant rate distribution:
87  rDist.reset(new InvariantMixedDiscreteDistribution(nestedDistribution, 0.1, 0.000001));
88 
89  // Then we update the parameter set:
90  for (map<string, string>::iterator it = unparsedArgumentsNested.begin();
91  it != unparsedArgumentsNested.end(); it++)
92  {
93  unparsedArguments_["Invariant." + it->first] = it->second;
94  }
95 
96  if (args.find("p") != args.end())
97  unparsedArguments_["Invariant.p"] = args["p"];
98  }
99  else if (distName == "Constant")
100  {
101  if (!allowConstant_)
102  throw Exception("BppORateDistributionFormat::read(). Constant distribution not allowed.");
103 
104  if (args.find("value") != args.end())
105  throw Exception("Found argument 'value' in Constant distribution. Constant distribution is defined to have an average of 1.");
106  rDist.reset(new ConstantRateDistribution());
107  }
108  else if (distName == "Simple")
109  {
110  if (args.find("values") == args.end())
111  throw Exception("Missing argument 'values' in Simple distribution");
112  if (args.find("probas") == args.end())
113  throw Exception("Missing argument 'probas' in Simple distribution");
114  vector<double> probas, values;
115 
116  string rf = args["values"];
117  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
118  while (strtok.hasMoreToken())
119  values.push_back(TextTools::toDouble(strtok.nextToken()));
120 
121  rf = args["probas"];
122  StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
123  while (strtok2.hasMoreToken())
124  probas.push_back(TextTools::toDouble(strtok2.nextToken()));
125 
126  std::map<size_t, std::vector<double> > ranges;
127 
128  if (args.find("ranges") != args.end())
129  {
130  string rr = args["ranges"];
131  StringTokenizer strtok3(rr.substr(1, rr.length() - 2), ",");
132  string desc;
133  double deb, fin;
134  size_t num;
135  size_t po, pf, ppv;
136  while (strtok3.hasMoreToken())
137  {
138  desc = strtok3.nextToken();
139  po = desc.find("[");
140  ppv = desc.find(";");
141  pf = desc.find("]");
142  num = (size_t)(TextTools::toInt(desc.substr(1, po - 1)));
143  deb = TextTools::toDouble(desc.substr(po + 1, ppv - po - 1));
144  fin = TextTools::toDouble(desc.substr(ppv + 1, pf - ppv - 1));
145  vector<double> vd;
146  vd.push_back(deb);
147  vd.push_back(fin);
148  ranges[num] = vd;
149  }
150  }
151  if (ranges.size() == 0)
152  rDist.reset(new SimpleDiscreteDistribution(values, probas));
153  else
154  rDist.reset(new SimpleDiscreteDistribution(values, ranges, probas));
155 
156  vector<string> v = rDist->getParameters().getParameterNames();
157 
158  for (size_t i = 0; i < v.size(); i++)
159  {
160  unparsedArguments_[v[i]] = TextTools::toString(rDist->getParameterValue(rDist->getParameterNameWithoutNamespace(v[i])));
161  }
162  }
163  else if (distName == "Mixture")
164  {
165  if (args.find("probas") == args.end())
166  throw Exception("Missing argument 'probas' in Mixture distribution");
167  vector<double> probas;
168  vector<DiscreteDistribution*> v_pdd;
169  string rf = args["probas"];
170  StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
171  while (strtok2.hasMoreToken())
172  probas.push_back(TextTools::toDouble(strtok2.nextToken()));
173 
174  vector<string> v_nestedDistrDescr;
175 
176  size_t nbd = 0;
177  while (args.find("dist" + TextTools::toString(++nbd)) != args.end())
178  v_nestedDistrDescr.push_back(args["dist" + TextTools::toString(nbd)]);
179 
180  if (v_nestedDistrDescr.size() != probas.size())
181  throw Exception("Number of distributions (keyword 'dist" + TextTools::toString(probas.size()) + "') do not fit the number of probabilities");
182 
183  for (unsigned i = 0; i < v_nestedDistrDescr.size(); i++)
184  {
185  BppORateDistributionFormat nestedReader(false);
186  auto_ptr<DiscreteDistribution> pdd(nestedReader.read(v_nestedDistrDescr[i], false));
187  map<string, string> unparsedArgumentsNested(nestedReader.getUnparsedArguments());
188 
189  for (map<string, string>::iterator it = unparsedArgumentsNested.begin(); it != unparsedArgumentsNested.end(); it++)
190  {
191  unparsedArguments_[distName + "." + TextTools::toString(i + 1) + "_" + it->first] = it->second;
192  }
193  v_pdd.push_back(pdd.release());
194  }
195  rDist.reset(new MixtureOfDiscreteDistributions(v_pdd, probas));
196  }
197  else
198  {
199  if (args.find("n") == args.end())
200  throw Exception("Missing argument 'n' (number of classes) in " + distName
201  + " distribution");
202  size_t nbClasses = TextTools::to<size_t>(args["n"]);
203 
204  if (distName == "Gamma")
205  {
206  rDist.reset(new GammaDiscreteRateDistribution(nbClasses, 1.));
207 
208  if (args.find("alpha") != args.end())
209  unparsedArguments_["Gamma.alpha"] = args["alpha"];
210  if (args.find("beta") != args.end())
211  throw Exception("Found argument 'beta' in Gamma distribution. Gamma distribution is defined to have an average of 1, with beta=alpha.");
212  }
213  else if (distName == "Gaussian")
214  {
215  rDist.reset(new GaussianDiscreteRateDistribution(nbClasses, 1));
216  if (args.find("mu") != args.end())
217  throw Exception("Found argument 'mu' in Gaussian distribution. Gaussian distribution is defined to have an average of 1, with mu=1.");
218  if (args.find("sigma") != args.end())
219  unparsedArguments_["Gaussian.sigma"] = args["sigma"];
220  }
221  else if (distName == "Exponential")
222  {
223  rDist.reset(new ExponentialDiscreteRateDistribution(nbClasses));
224  if (args.find("lambda") != args.end())
225  throw Exception("Found argument 'lambda' in Exponential distribution. Exponential distribution is defined to have an average of 1, with lambda=1.");
226  }
227  else
228  {
229  throw Exception("Unsupported rate distribution: " + distName + ".");
230  }
231  }
232  if (verbose_)
233  {
234  ApplicationTools::displayResult("Distribution", distName);
235  ApplicationTools::displayResult("Number of classes", TextTools::toString(rDist->getNumberOfCategories()));
236  }
237 
238  if (parseArguments)
239  initialize_(*rDist);
240 
241  return rDist.release();
242 }
243 
244 
246  const DiscreteDistribution& dist,
247  OutputStream& out,
248  std::map<std::string, std::string>& globalAliases,
249  std::vector<std::string>& writtenNames) const
250 {
251  bool comma = false;
252 
253  const DiscreteDistribution* pd;
254 
255  out << dist.getName() + "(";
256 
257  const InvariantMixedDiscreteDistribution* invar = dynamic_cast<const InvariantMixedDiscreteDistribution*>(&dist);
258  if (invar)
259  {
260  pd = invar->getVariableSubDistribution();
261  out << "dist=";
262  write(*pd, out, globalAliases, writtenNames);
263  comma = true;
264  }
265  else
266  {
267  const MixtureOfDiscreteDistributions* mix = dynamic_cast<const MixtureOfDiscreteDistributions*>(&dist);
268  if (mix)
269  {
270  size_t nd = mix->getNumberOfDistributions();
271  for (size_t i = 0; i < nd; i++)
272  {
273  if (comma)
274  out << ",";
275  out << "dist" + TextTools::toString(i + 1) + "=";
276  write(*mix->getNDistribution(i), out, globalAliases, writtenNames);
277  comma = true;
278  }
279  out << ",probas=(";
280  for (size_t i = 0; i < nd; i++)
281  {
282  out << mix->getNProbability(i);
283  if (i != nd - 1)
284  out << ",";
285  }
286  out << ")";
287  for (size_t i = 1; i < nd; i++)
288  {
289  writtenNames.push_back(mix->getNamespace() + "theta" + TextTools::toString(i));
290  }
291  }
292  }
293  if (dynamic_cast<const ExponentialDiscreteRateDistribution*>(&dist) ||
294  dynamic_cast<const GammaDiscreteRateDistribution*>(&dist) ||
295  dynamic_cast<const GaussianDiscreteRateDistribution*>(&dist))
296  {
297  if (comma)
298  out << ",";
299  out << "n=" << dist.getNumberOfCategories();
300  comma = true;
301  }
302 
303  const SimpleDiscreteDistribution* ps = dynamic_cast<const SimpleDiscreteDistribution*>(&dist);
304  if (ps)
305  {
306  size_t nd = ps->getNumberOfCategories();
307  if (comma)
308  out << ",";
309  out << "values=(";
310  for (size_t i = 0; i < nd; i++)
311  {
312  out << ps->getCategory(i);
313  if (i != nd - 1)
314  out << ",";
315  }
316  out << "),probas=(";
317  for (size_t i = 0; i < nd; i++)
318  {
319  out << ps->getProbability(i);
320  if (i != nd - 1)
321  out << ",";
322  }
323  out << ")";
324 
325  const std::map<size_t, std::vector<double> > range = ps->getRanges();
326  if (range.size() != 0)
327  {
328  out << ", ranges=(";
329  std::map<size_t, std::vector<double> >::const_iterator it(range.begin());
330  while (it != range.end())
331  {
332  out << "V" << TextTools::toString(it->first);
333  out << "[" << TextTools::toString(it->second[0]) << ";" << TextTools::toString(it->second[1]) << "]";
334  it++;
335  if (it != range.end())
336  out << ",";
337  }
338  }
339 
340  out << ")";
341 
342  for (size_t i = 1; i < nd; i++)
343  {
344  writtenNames.push_back(ps->getNamespace() + "theta" + TextTools::toString(i));
345  }
346  for (size_t i = 1; i < nd + 1; i++)
347  {
348  writtenNames.push_back(ps->getNamespace() + "V" + TextTools::toString(i));
349  }
350 
351  comma = true;
352  }
353 
354 
355  // Writing the parameters
356  BppOParametrizableFormat bOP;
357  bOP.write(dynamic_cast<const ParameterAliasable*>(&dist), out, globalAliases, dist.getIndependentParameters().getParameterNames(), writtenNames, true, comma);
358  out << ")";
359 }
360 
361 
STL namespace.
DiscreteDistribution * read(const std::string &distDescription, bool parseArguments)
Rate Distribution I/O in BppO format.
void write(const DiscreteDistribution &dist, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const