bpp-core  2.2.0
BppODiscreteDistributionFormat.cpp
Go to the documentation of this file.
1 //
2 // File: BppODiscreteDistributionFormatFormat.cpp
3 // Created by: Laurent Guéguen
4 // Created on: lundi 3 septembre 2012, à 14h 48
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
41 
42 #include "../Io/FileTools.h"
43 #include "../Text/TextTools.h"
44 #include "../Text/StringTokenizer.h"
45 #include "../Text/KeyvalTools.h"
46 
47 #include "../Numeric/Prob/DiscreteDistribution.h"
48 #include "../Numeric/Prob/InvariantMixedDiscreteDistribution.h"
49 #include "../Numeric/Prob/ConstantDistribution.h"
50 #include "../Numeric/Prob/SimpleDiscreteDistribution.h"
51 #include "../Numeric/Prob/MixtureOfDiscreteDistributions.h"
52 #include "../Numeric/Prob/GammaDiscreteDistribution.h"
53 #include "../Numeric/Prob/GaussianDiscreteDistribution.h"
54 #include "../Numeric/Prob/BetaDiscreteDistribution.h"
55 #include "../Numeric/Prob/ExponentialDiscreteDistribution.h"
56 #include "../Numeric/Prob/TruncatedExponentialDiscreteDistribution.h"
57 #include "../Numeric/Prob/UniformDiscreteDistribution.h"
58 #include "../Numeric/AutoParameter.h"
60 
61 using namespace bpp;
62 
63 // From the STL:
64 #include <iomanip>
65 
66 using namespace std;
67 
68 
70  const std::string& distDescription,
71  bool parseArguments)
72 {
73  unparsedArguments_.clear();
74  string distName;
75  auto_ptr<DiscreteDistribution> rDist;
76  map<string, string> args;
77  KeyvalTools::parseProcedure(distDescription, distName, args);
78 
79  if ((distName == "InvariantMixed") || (distName == "Invariant"))
80  {
81  // We have to parse the nested distribution first:
82  string nestedDistDescription = args["dist"];
83  if (TextTools::isEmpty(nestedDistDescription))
84  throw Exception("BppODiscreteDistributionFormat::read. Missing argument 'dist' for distribution 'Invariant'.");
85  if (verbose_)
86  ApplicationTools::displayResult("Invariant Mixed distribution", distName );
87  BppODiscreteDistributionFormat nestedReader(verbose_);
88  DiscreteDistribution* nestedDistribution = nestedReader.read(nestedDistDescription, true);
89  map<string, string> unparsedArgumentsNested(nestedReader.getUnparsedArguments());
90 
91  // Now we create the Invariant rate distribution:
92  rDist.reset(new InvariantMixedDiscreteDistribution(nestedDistribution, 0.1, 0.000001));
93 
94  // Then we update the parameter set:
95  for (map<string, string>::iterator it = unparsedArgumentsNested.begin();
96  it != unparsedArgumentsNested.end(); it++)
97  {
98  unparsedArguments_["Invariant." + it->first] = it->second;
99  }
100 
101  if (args.find("p") != args.end())
102  unparsedArguments_["Invariant.p"] = args["p"];
103  }
104  else if (distName == "Constant")
105  {
106  if (args.find("value") == args.end())
107  throw Exception("Missing argument 'value' in Constant distribution");
108  rDist.reset(new ConstantDistribution(TextTools::to<double>(args["value"])));
109  unparsedArguments_["Constant.value"] = args["value"];
110  }
111  else if (distName == "Simple")
112  {
113  if (args.find("values") == args.end())
114  throw Exception("Missing argument 'values' in Simple distribution");
115  if (args.find("probas") == args.end())
116  throw Exception("Missing argument 'probas' in Simple distribution");
117  vector<double> probas, values;
118 
119  string rf = args["values"];
120  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
121  while (strtok.hasMoreToken())
122  values.push_back(TextTools::toDouble(strtok.nextToken()));
123 
124  rf = args["probas"];
125  StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
126  while (strtok2.hasMoreToken())
127  probas.push_back(TextTools::toDouble(strtok2.nextToken()));
128 
129  std::map<size_t, std::vector<double> > ranges;
130 
131  if (args.find("ranges") != args.end())
132  {
133  string rr = args["ranges"];
134  StringTokenizer strtok3(rr.substr(1, rr.length() - 2), ",");
135  string desc;
136  double deb, fin;
137  unsigned int num;
138  size_t po, pf, ppv;
139  while (strtok3.hasMoreToken())
140  {
141  desc = strtok3.nextToken();
142  po = desc.find("[");
143  ppv = desc.find(";");
144  pf = desc.find("]");
145  num = (unsigned int)(TextTools::toInt(desc.substr(1, po - 1)));
146  deb = TextTools::toDouble(desc.substr(po + 1, ppv - po - 1));
147  fin = TextTools::toDouble(desc.substr(ppv + 1, pf - ppv - 1));
148  vector<double> vd;
149  vd.push_back(deb);
150  vd.push_back(fin);
151  ranges[num] = vd;
152  }
153  }
154  if (ranges.size() == 0)
155  rDist.reset(new SimpleDiscreteDistribution(values, probas));
156  else
157  rDist.reset(new SimpleDiscreteDistribution(values, ranges, probas));
158 
159  vector<string> v = rDist->getParameters().getParameterNames();
160 
161  for (unsigned int i = 0; i < v.size(); i++)
162  {
163  unparsedArguments_[v[i]] = TextTools::toString(rDist->getParameterValue(rDist->getParameterNameWithoutNamespace(v[i])));
164  }
165  }
166  else if (distName == "Mixture")
167  {
168  if (args.find("probas") == args.end())
169  throw Exception("Missing argument 'probas' in Mixture distribution");
170  vector<double> probas;
171  vector<DiscreteDistribution*> v_pdd;
173  string rf = args["probas"];
174  StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
175  while (strtok2.hasMoreToken())
176  probas.push_back(TextTools::toDouble(strtok2.nextToken()));
177 
178  vector<string> v_nestedDistrDescr;
179 
180  unsigned int nbd = 0;
181  while (args.find("dist" + TextTools::toString(++nbd)) != args.end())
182  v_nestedDistrDescr.push_back(args["dist" + TextTools::toString(nbd)]);
183 
184  if (v_nestedDistrDescr.size() != probas.size())
185  throw Exception("Number of distributions (keyword 'dist" + TextTools::toString(probas.size()) + "') do not fit the number of probabilities");
186 
187  BppODiscreteDistributionFormat nestedReader;
188 
189  for (unsigned i = 0; i < v_nestedDistrDescr.size(); i++)
190  {
191  pdd = nestedReader.read(v_nestedDistrDescr[i], true);
192  map<string, string> unparsedArgumentsNested(nestedReader.getUnparsedArguments());
193 
194  for (map<string, string>::iterator it = unparsedArgumentsNested.begin(); it != unparsedArgumentsNested.end(); it++)
195  {
196  unparsedArguments_[distName + "." + TextTools::toString(i + 1) + "_" + it->first] = it->second;
197  }
198  v_pdd.push_back(pdd);
199  }
200  rDist.reset(new MixtureOfDiscreteDistributions(v_pdd, probas));
201  }
202  else
203  {
204  if (args.find("n") == args.end())
205  throw Exception("Missing argument 'n' (number of classes) in " + distName
206  + " distribution");
207  unsigned int nbClasses = TextTools::to<unsigned int>(args["n"]);
208 
209  if (distName == "Gamma")
210  {
211  rDist.reset(new GammaDiscreteDistribution(nbClasses, 1, 1));
212  if (args.find("alpha") != args.end())
213  unparsedArguments_["Gamma.alpha"] = args["alpha"];
214  if (args.find("beta") != args.end())
215  unparsedArguments_["Gamma.beta"] = args["beta"];
216  }
217  else if (distName == "Gaussian")
218  {
219  rDist.reset(new GaussianDiscreteDistribution(nbClasses, 0, 1));
220  if (args.find("mu") != args.end())
221  unparsedArguments_["Gaussian.mu"] = args["mu"];
222  if (args.find("sigma") != args.end())
223  unparsedArguments_["Gaussian.sigma"] = args["sigma"];
224  }
225  else if (distName == "Beta")
226  {
227  rDist.reset(new BetaDiscreteDistribution(nbClasses, 1, 1));
228  if (args.find("alpha") != args.end())
229  unparsedArguments_["Beta.alpha"] = args["alpha"];
230  if (args.find("beta") != args.end())
231  unparsedArguments_["Beta.beta"] = args["beta"];
232  }
233  else if (distName == "Exponential")
234  {
235  rDist.reset(new ExponentialDiscreteDistribution(nbClasses, 1));
236  if (args.find("lambda") != args.end())
237  unparsedArguments_["Exponential.lambda"] = args["lambda"];
238  if (args.find("median") != args.end())
239  rDist->setMedian(true);
240  }
241  else if (distName == "TruncExponential")
242  {
243  rDist.reset(new TruncatedExponentialDiscreteDistribution(nbClasses, 1, 0));
244 
245  if (args.find("median") != args.end())
246  rDist->setMedian(true);
247  if (args.find("lambda") != args.end())
248  unparsedArguments_["TruncExponential.lambda"] = args["lambda"];
249  if (args.find("tp") != args.end())
250  unparsedArguments_["TruncExponential.tp"] = args["tp"];
251  }
252  else if (distName == "Uniform")
253  {
254  if (args.find("begin") == args.end())
255  throw Exception("Missing argument 'begin' in Uniform distribution");
256  if (args.find("end") == args.end())
257  throw Exception("Missing argument 'end' in Uniform distribution");
258  rDist.reset(new UniformDiscreteDistribution(
259  nbClasses,
260  TextTools::to<double>(args["begin"]),
261  TextTools::to<double>(args["end"])));
262  }
263  else
264  {
265  throw Exception("Unknown distribution: " + distName + ".");
266  }
267  }
268  if (verbose_)
269  {
270  ApplicationTools::displayResult("Distribution", distName);
272  }
273 
274  if (parseArguments)
275  initialize_(*rDist);
276 
277  return rDist.release();
278 }
279 
280 
282  OutputStream& out,
283  std::map<std::string, std::string>& globalAliases,
284  std::vector<std::string>& writtenNames) const
285 {
286  bool comma = false;
287 
288  const DiscreteDistribution* pd;
289 
290  out << dist.getName() + "(";
291 
292  const InvariantMixedDiscreteDistribution* invar = dynamic_cast<const InvariantMixedDiscreteDistribution*>(&dist);
293  if (invar)
294  {
295  pd = invar->getVariableSubDistribution();
296  out << "dist=";
297  write(*pd, out, globalAliases, writtenNames);
298  comma = true;
299  }
300  else
301  {
302  const MixtureOfDiscreteDistributions* mix = dynamic_cast<const MixtureOfDiscreteDistributions*>(&dist);
303  if (mix)
304  {
305  size_t nd = mix->getNumberOfDistributions();
306  for (unsigned int i = 0; i < nd; i++)
307  {
308  if (comma)
309  out << ",";
310  out << "dist" + TextTools::toString(i + 1) + "=";
311  write(*mix->getNDistribution(i), out, globalAliases, writtenNames);
312  comma = true;
313  }
314  out << ",probas=(";
315  for (unsigned int i = 0; i < nd; i++)
316  {
317  out << mix->getNProbability(i);
318  if (i != nd - 1)
319  out << ",";
320  }
321  out << ")";
322  for (unsigned int i = 1; i < nd; i++)
323  {
324  writtenNames.push_back(mix->getNamespace() + "theta" + TextTools::toString(i));
325  }
326  }
327  }
328 
329  if (dynamic_cast<const BetaDiscreteDistribution*>(&dist) ||
330  dynamic_cast<const ExponentialDiscreteDistribution*>(&dist) ||
331  dynamic_cast<const GammaDiscreteDistribution*>(&dist) ||
332  dynamic_cast<const GaussianDiscreteDistribution*>(&dist) ||
333  dynamic_cast<const TruncatedExponentialDiscreteDistribution*>(&dist) ||
334  dynamic_cast<const UniformDiscreteDistribution*>(&dist))
335  {
336  if (comma)
337  out << ",";
338  out << "n=" << dist.getNumberOfCategories();
339  comma = true;
340  }
341 
342  const ConstantDistribution* pc = dynamic_cast<const ConstantDistribution*>(&dist);
343  if (pc && dist.getNumberOfParameters() == 0)
344  {
345  if (comma)
346  out << ",";
347  out << "value=" << pc->getLowerBound();
348  comma = true;
349  }
350 
351  const SimpleDiscreteDistribution* ps = dynamic_cast<const SimpleDiscreteDistribution*>(&dist);
352  if (ps)
353  {
354  size_t nd = ps->getNumberOfCategories();
355  if (comma)
356  out << ",";
357  out << "values=(";
358  for (unsigned int i = 0; i < nd; i++)
359  {
360  out << ps->getCategory(i);
361  if (i != nd - 1)
362  out << ",";
363  }
364  out << "),probas=(";
365  for (size_t i = 0; i < nd; i++)
366  {
367  out << ps->getProbability(i);
368  if (i != nd - 1)
369  out << ",";
370  }
371  out << ")";
372 
373  const std::map<size_t, std::vector<double> > range = ps->getRanges();
374  if (range.size() != 0)
375  {
376  out << ",ranges=(";
377  std::map<size_t, std::vector<double> >::const_iterator it(range.begin());
378  while (it != range.end())
379  {
380  out << "V" << TextTools::toString(it->first);
381  out << "[" << TextTools::toString(it->second[0]) << ";" << TextTools::toString(it->second[1]) << "]";
382  it++;
383  if (it != range.end())
384  out << ",";
385  }
386  }
387  out << ")";
388 
389  for (size_t i = 1; i < nd; i++)
390  {
391  writtenNames.push_back(ps->getNamespace() + "theta" + TextTools::toString(i));
392  }
393  for (size_t i = 1; i < nd + 1; i++)
394  {
395  writtenNames.push_back(ps->getNamespace() + "V" + TextTools::toString(i));
396  }
397  comma = true;
398  }
399 
400  // Writing the parameters
402  bOP.write(dynamic_cast<const ParameterAliasable*>(&dist), out, globalAliases, dist.getIndependentParameters().getParameterNames(), writtenNames, true, comma);
403  out << ")";
404 }
405 
407  DiscreteDistribution& rDist) throw (Exception)
408 {
409  ParameterList pl = rDist.getIndependentParameters();
410 
411  for (size_t i = 0; i < pl.size(); ++i)
412  {
413  AutoParameter ap(pl[i]);
415  pl.setParameter(i, ap);
416  }
417 
418  for (size_t i = 0; i < pl.size(); ++i)
419  {
420  const string pName = pl[i].getName();
421  double value = ApplicationTools::getDoubleParameter(pName, unparsedArguments_, pl[i].getValue());
422  pl[i].setValue(value);
423  if (verbose_)
424  ApplicationTools::displayResult("Parameter found", pName + "=" + TextTools::toString(pl[i].getValue()));
425  }
426 
427  rDist.matchParametersValues(pl);
428  if (verbose_)
429  {
430  for (size_t c = 0; c < rDist.getNumberOfCategories(); ++c)
431  {
433  + " (Pr = " + TextTools::toString(rDist.getProbability(c)) + ") rate",
434  TextTools::toString(rDist.getCategory(c)));
435  }
436  }
437 }
438 
439 
440 
double getNProbability(size_t n) const
Returns the probability of the n-th discrete distribution in the mixture.
double getCategory(size_t categoryIndex) const
void write(const Parametrizable *parametrizable, OutputStream &out, std::vector< std::string > &writtenNames, bool printComma=false) const
Write a Parametrizable to a stream.
virtual size_t getNumberOfCategories() const =0
A tokenizer for strings.
const std::map< size_t, std::vector< double > > getRanges() const
This class allows to perform a correspondence analysis.
Interface for discrete distribution objects.
virtual double getParameterValue(const std::string &name) const =0
Get the value for parameter of name &#39;name&#39;.
bool hasMoreToken() const
Tell if some tokens are still available.
static void displayResult(const std::string &text, const T &result)
Print a result message.
Parametrizable output in BppO format.
virtual const ParameterList & getParameters() const =0
Get all parameters available.
size_t size() const
Definition: ParameterList.h:90
STL namespace.
const std::string & nextToken()
Get the next available token. If no token is availbale, throw an Exception.
Discretized Gaussian distribution.
Discretized Beta distribution with parameters alpha and beta, on a given interval. On default, the interval is , but it can be restricted.
double getProbability(size_t categoryIndex) const
static double getDoubleParameter(const std::string &parameterName, std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
Get a double parameter.
Discretized Exponential distribution.
static std::string toString(T t)
General template method to convert to a string.
Definition: TextTools.h:189
static int toInt(const std::string &s, char scientificNotation='e')
Convert from string to int.
Definition: TextTools.cpp:302
A Discrete distribution object defined by a vector of Discrete Distributions and a set of probabiliti...
The parameter list object.
Definition: ParameterList.h:61
A Discrete distribution object, where some specific probabilities are assigned to a finite set of val...
const DiscreteDistribution * getVariableSubDistribution() const
size_t getNumberOfDistributions() const
Returns the number of discrete distributions in the mixture.
static bool isEmpty(const std::string &s)
Tell if a string is empty.
Definition: TextTools.cpp:52
virtual void setMessageHandler(OutputStream *mh)
Set the message handler for this AutoParameter.
virtual std::vector< std::string > getParameterNames() const
Get all parameter names in the list.
virtual std::string getName() const =0
Get the name of the distribution.
The AutoParameter class.
Definition: AutoParameter.h:58
virtual const ParameterList & getIndependentParameters() const =0
Get the minimal list of parameters to set the model.
const std::map< std::string, std::string > & getUnparsedArguments() const
OutputStream interface.
Definition: OutputStream.h:64
virtual void setParameter(size_t index, const Parameter &param)
Change given parameter.
virtual std::string getParameterNameWithoutNamespace(const std::string &name) const =0
Resolves a parameter name according to the current namespace.
Discretized Uniform distribution. All categories are equidistributed all along a given interval...
Exception base class.
Definition: Exceptions.h:57
Discretized Gamma distribution.
virtual void setMedian(bool median)=0
Sets the median value to true to say that the value in a class is proportional to the median value of...
Constant discrete distribution.
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
Parse (not recursively) a procedure string.
Definition: KeyvalTools.cpp:98
Discrete mixed distribution, with a one-category fixed value (called "invariant") and a user-specifie...
void write(const DiscreteDistribution &dist, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
Write a discrete distribution to a stream.
DiscreteDistribution * read(const std::string &distDescription, bool parseArguments=true)
Read a discrete distribution from a string.
static OutputStream * warning
The output stream where warnings have to be displayed.
Discrete Distribution I/O in BppO format.
const DiscreteDistribution * getNDistribution(size_t n) const
Returns a pointer to the n-th discrete distribution in the mixture.
static double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
Convert from string to double.
Definition: TextTools.cpp:313
virtual size_t getNumberOfParameters() const =0
Get the number of parameters.
double getLowerBound() const
methods about the range of the definition
Discretized Truncated (on the right) Exponential distribution, where the probabilities are given the ...
void initialize_(DiscreteDistribution &rDist)
Set parameter initial values of a given distribution according to options.