bpp-core  2.2.0
AbstractDiscreteDistribution.cpp
Go to the documentation of this file.
1 //
2 // File: AbstractDiscreteDistribution.cpp
3 // Created by: Julien Dutheil
4 // Created on: ?
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 19, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for numerical calculus.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
41 
42 #include "../Random/RandomTools.h"
43 #include "../VectorTools.h"
44 
45 using namespace bpp;
46 using namespace std;
47 
48 
49 AbstractDiscreteDistribution::AbstractDiscreteDistribution(size_t nbClasses, const std::string& prefix) :
51  numberOfCategories_(nbClasses),
52  distribution_(),
53  bounds_(nbClasses-1),
54  intMinMax_(-NumConstants::VERY_BIG(), NumConstants::VERY_BIG(), true, true),
55  median_(false)
56 {}
57 
58 AbstractDiscreteDistribution::AbstractDiscreteDistribution(size_t nbClasses, double delta, const std::string& prefix) :
60  numberOfCategories_(nbClasses),
61  distribution_(Order(delta)),
62  bounds_(nbClasses-1),
63  intMinMax_(-NumConstants::VERY_BIG(), NumConstants::VERY_BIG(),true, true),
64  median_(false)
65 {}
66 
69  numberOfCategories_(adde.numberOfCategories_),
70  distribution_(adde.distribution_),
71  bounds_(adde.bounds_),
72  intMinMax_(adde.intMinMax_),
73  median_(adde.median_)
74 {
75 }
76 
78 {
82  bounds_=adde.bounds_;
84  median_=adde.median_;
85 
86  return *this;
87 }
88 
89 /******************************************************************************/
90 
92 {
93  return numberOfCategories_;
94 }
95 
97 {
98  if (nbClasses <= 0)
99  cerr << "DEBUG: ERROR!!! Number of categories is <= 0 in AbstractDiscreteDistribution::setNumberOfCategories()." << endl;
100 
101  if (numberOfCategories_ != nbClasses)
102  {
103  numberOfCategories_ = nbClasses;
104  discretize();
105  }
106 }
107 
108 
109 /******************************************************************************/
110 
111 double AbstractDiscreteDistribution::getCategory(size_t categoryIndex) const
112 {
113  map<double, double>::const_iterator it = distribution_.begin();
114  for (unsigned int i = 0; i < categoryIndex; i++)
115  {
116  it++;
117  }
118  return it->first;
119 }
120 
121 /******************************************************************************/
122 
123 double AbstractDiscreteDistribution::getProbability(size_t categoryIndex) const
124 {
125  map<double, double>::const_iterator it = distribution_.begin();
126  for (unsigned int i = 0; i < categoryIndex; i++)
127  {
128  it++;
129  }
130  return it->second;
131 }
132 
133 /******************************************************************************/
134 
135 double AbstractDiscreteDistribution::getProbability(double category) const
136 {
137  return distribution_.find(category)->second;
138 }
139 
140 /******************************************************************************/
141 
143 {
144  Vdouble result(distribution_.size());
145  unsigned int i = 0;
146  for (map<double, double>::const_iterator it = distribution_.begin();
147  it != distribution_.end();
148  it++)
149  {
150  result[i] = it->first;
151  i++;
152  }
153  return result;
154 }
155 
156 /******************************************************************************/
157 
159 {
160  Vdouble result(distribution_.size());
161  size_t i = 0;
162  for (map<double, double>::const_iterator it = distribution_.begin();
163  it != distribution_.end();
164  it++)
165  {
166  result[i] = it->second;
167  i++;
168  }
169  return result;
170 }
171 
172 /******************************************************************************/
173 
174 void AbstractDiscreteDistribution::set(double category, double probability)
175 {
176  distribution_[category] = probability;
177 }
178 
179 /******************************************************************************/
180 
181 void AbstractDiscreteDistribution::add(double category, double probability)
182 {
183  if (distribution_.find(category) == distribution_.end())
184  {
185  // new category
186  distribution_[category] = probability;
187  }
188  else
189  {
190  // existing category
191  distribution_[category] += probability;
192  }
193 }
194 
195 /******************************************************************************/
196 
198 {
200  double cumprob = 0;
201  for (map<double, double>::const_iterator i = distribution_.begin();
202  i != distribution_.end();
203  i++)
204  {
205  cumprob += i->second;
206  if (r <= cumprob)
207  return i->first;
208  }
209  // This line can't be reached:
210  return -1.;
211 }
212 
213 /******************************************************************************/
214 
216 {
217  double prob = 0;
218  map<double, double>::const_iterator it = distribution_.find(category);
219  for (map<double, double>::const_iterator i = distribution_.begin();
220  i != it;
221  i++)
222  {
223  prob += i->second;
224  }
225  return prob;
226 }
227 
228 /******************************************************************************/
229 
231 {
232  double prob = 0;
233  map<double, double>::const_iterator it = distribution_.find(category);
234  if (it == distribution_.end())
235  return 0;
236  for (map<double, double>::const_iterator i = ++it;
237  i != distribution_.end();
238  i++)
239  {
240  prob += i->second;
241  }
242  return 1. - prob;
243 }
244 
245 /******************************************************************************/
246 
248 {
249  double prob = 0;
250  map<double, double>::const_iterator it = distribution_.find(category);
251  if (it == distribution_.end())
252  return 0;
253  for (map<double, double>::const_iterator i = ++it;
254  i != distribution_.end();
255  i++)
256  {
257  prob += i->second;
258  }
259  return prob;
260 }
261 
262 /******************************************************************************/
263 
265 {
266  double prob = 0;
267  map<double, double>::const_iterator it = distribution_.find(category);
268  for (map<double, double>::const_iterator i = distribution_.begin();
269  i != it;
270  i++)
271  {
272  prob += i->second;
273  }
274  return 1. - prob;
275 }
276 
277 /******************************************************************************/
278 
280 {
281  for (map<double, double>::const_iterator i = distribution_.begin(); i != distribution_.end(); i++)
282  {
283  (out << "Pr(" << (i->first) << ") = " << (i->second)).endLine();
284  }
285 }
286 
287 /******************************************************************************/
288 
290 {
291  if (!(intMinMax_.isCorrect(value)))
292  throw Exception("AbstractDiscreteDistribution::getValueCategory out of bounds:" + TextTools::toString(value));
293 
294  map<double, double>::const_iterator it = distribution_.begin();
295  for (unsigned int i=1;i<bounds_.size();i++)
296  if (value<bounds_[i])
297  break;
298  else
299  it++;
300 
301  return it->first;
302 }
303 
304 /***********************************************************************/
305 
306 
308 {
309  /* discretization of distribution with equal proportions in each
310  category
311  */
312 
313  distribution_.clear();
314  bounds_.resize(numberOfCategories_ - 1);
315 
316  double minX = pProb(intMinMax_.getLowerBound());
317  double maxX = pProb(intMinMax_.getUpperBound());
318 
319  double ec;
320  size_t i;
321  vector<double> values(numberOfCategories_);
322 
323  if (maxX != minX)
324  {
325  // divide the domain into equiprobable intervals
326  ec = (maxX - minX) / static_cast<double>(numberOfCategories_);
327 
328  for (i = 1; i < numberOfCategories_; i++)
329  {
330  bounds_[i-1] = qProb(minX + static_cast<double>(i) * ec);
331  }
332 
333  // for each category, sets the value v as the median, adjusted
334  // such that the sum of the values = 1
335  if (median_)
336  {
337  double t=0;
338  for (i = 0; i < numberOfCategories_; i++)
339  values[i] = qProb(minX + (static_cast<double>(i) + 0.5) * ec);
340 
341  for (i = 0, t = 0; i < numberOfCategories_; i++)
342  t += values[i];
343 
345 
346  for (i = 0; i < numberOfCategories_; i++)
347  values[i] *= mean / t / ec;
348  }
349  else
350  // for each category, sets the value v such that
351  // v * length_of_the_interval = the surface of the category
352  {
353  double a = Expectation(intMinMax_.getLowerBound()), b;
354  for (i = 0; i < numberOfCategories_-1; i++)
355  {
356  b = Expectation(bounds_[i]);
357  values[i] = (b - a) / ec;
358  a = b;
359  }
361  }
362  }
363  else
364  // if maxX==minX, uniform discretization of the range
365  {
366  ec = (intMinMax_.getUpperBound() - intMinMax_.getLowerBound()) / static_cast<double>(numberOfCategories_);
367  for (i = 1; i < numberOfCategories_; i++)
368  bounds_[i-1] = intMinMax_.getLowerBound() + static_cast<double>(i) * ec;
369 
370  values[0] = (intMinMax_.getLowerBound() + bounds_[0]) / 2;
371 
372  for (i = 1; i < numberOfCategories_-1; i++)
373  values[i] = (bounds_[i-1] + bounds_[i]) / 2;
374 
376  }
377 
378  // adjustments near the boundaries of the domain, according to the precision chosen
380  {
381  for (i = 0; i < numberOfCategories_; i++)
382  {
383  if (values[i] < intMinMax_.getLowerBound() + precision())
384  values[i] = intMinMax_.getLowerBound() + precision();
385  else
386  break;
387  }
388  }
389  else
390  {
391  for (i = 0; i < numberOfCategories_; i++)
392  {
393  if (values[i] < intMinMax_.getLowerBound())
394  values[i] = intMinMax_.getLowerBound() + precision();
395  else
396  break;
397  }
398  }
399 
401  {
402  for (i = numberOfCategories_; i > 0; i--)
403  {
404  if (values[i-1] > intMinMax_.getUpperBound() - precision())
405  values[i-1] = intMinMax_.getUpperBound() - precision();
406  else
407  break;
408  }
409  }
410  else
411  {
412  for (i = numberOfCategories_; i > 0; i--)
413  {
414  if (values[i-1] > intMinMax_.getUpperBound())
415  values[i-1] = intMinMax_.getUpperBound() - precision();
416  else
417  break;
418  }
419  }
420 
421  // now the distribution_ map, taking care that all values are different
422 
423  double p = 1. / static_cast<double>(numberOfCategories_);
424  for (i = 0; i < numberOfCategories_; i++)
425  {
426  if (distribution_.find(values[i]) != distribution_.end())
427  {
428  int j = 1;
429  int f = ((values[i] + NumConstants::TINY()) >= intMinMax_.getUpperBound()) ? -1 : 1;
430  while (distribution_.find(values[i] + f * j * precision()) != distribution_.end())
431  {
432  j++;
433  f = ((values[i] + f * j * precision()) >= intMinMax_.getUpperBound()) ? -1 : 1;
434  }
435  distribution_[values[i] + f * j * precision()] = p;
436  }
437  else
438  distribution_[values[i]] = p;
439  }
440 
441  return;
442 }
443 
445 {
447  vb[0] = getLowerBound();
448  for (unsigned int i = 0; i < numberOfCategories_ - 1; i++)
449  vb[i + 1] = getBound(i);
451  return vb;
452 }
453 
455 {
456  const IntervalConstraint* pi = dynamic_cast<const IntervalConstraint*>(&c);
457 
458  if (!pi)
459  throw Exception("AbstractDiscreteDistribution::restrictToConstraint: the constraint is not an interval");
460 
461  if (!(intMinMax_ <= (*pi)))
462  {
463  intMinMax_ &= c;
464  discretize();
465  }
466 }
void set(double category, double probability)
Set the probability associated to a class.
double getInfCumulativeProbability(double category) const
double getCategory(size_t categoryIndex) const
Comparator class for AbstractDiscreteDistribution.
virtual double pProb(double x) const =0
Return the cumulative quantile of the continuous version of the distribution, ie .
virtual double Expectation(double a) const =0
Return a primitive function used for the expectation of the continuous version of the distribution...
double getLowerBound() const
Definition: Constraints.h:213
Partial implementation of the DiscreteDistribution interface.
double getIInfCumulativeProbability(double category) const
bool strictLowerBound() const
Definition: Constraints.h:216
this static class contains several useful constant values.
Definition: NumConstants.h:54
An interval, either bounded or not, which can also have infinite bounds.
Definition: Constraints.h:135
This class allows to perform a correspondence analysis.
virtual bool isCorrect(double value) const
Tell if a given value is correct.
Definition: Constraints.h:228
double getSSupCumulativeProbability(double category) const
double getSupCumulativeProbability(double category) const
void add(double category, double probability)
Modify the probability associated to a class.
STL namespace.
double getProbability(size_t categoryIndex) const
static std::string toString(T t)
General template method to convert to a string.
Definition: TextTools.h:189
AbstractDiscreteDistribution & operator=(const AbstractDiscreteDistribution &adde)
A partial implementation of the Parametrizable interface.
void print(OutputStream &out) const
Print the distribution (categories and corresponding probabilities) to a stream.
bool strictUpperBound() const
Definition: Constraints.h:217
double rand() const
Draw a random number from this distribution.
static double TINY()
Definition: NumConstants.h:81
double getLowerBound() const
methods about the range of the definition
The constraint interface.
Definition: Constraints.h:62
std::vector< double > Vdouble
Definition: VectorTools.h:67
std::map< double, double, Order > distribution_
virtual void discretize()
Discretizes the distribution in equiprobable classes.
virtual double qProb(double x) const =0
Return the quantile of the continuous version of the distribution, ie y such that ...
AbstractDiscreteDistribution(size_t nbClasses, const std::string &prefix="")
OutputStream interface.
Definition: OutputStream.h:64
AbstractParameterAliasable & operator=(const AbstractParameterAliasable &ap)
Exception base class.
Definition: Exceptions.h:57
void setNumberOfCategories(size_t nbClasses)
sets the number of categories and discretizes if there is a change in this number.
static double giveRandomNumberBetweenZeroAndEntry(double entry, const RandomFactory &generator= *DEFAULT_GENERATOR)
Get a double random value (between 0 and specified range).
Definition: RandomTools.cpp:62
double getUpperBound() const
Definition: Constraints.h:214
IntervalConstraint intMinMax_
the interval where the distribution is defined/restricted.
virtual void restrictToConstraint(const Constraint &c)
Restricts the distribution to the domain where the constraint is respected, in addition of other pred...