bpp-phyl  2.2.0
DistanceEstimation.h
Go to the documentation of this file.
1 //
2 // File: DistanceEstimation.h
3 // Created by: Julien Dutheil
4 // Vincent Ranwez
5 // Created on: Wed jun 08 10:39 2005
6 //
7 
8 /*
9 Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
10 
11 This software is a computer program whose purpose is to provide classes
12 for phylogenetic data analysis.
13 
14 This software is governed by the CeCILL license under French law and
15 abiding by the rules of distribution of free software. You can use,
16 modify and/ or redistribute the software under the terms of the CeCILL
17 license as circulated by CEA, CNRS and INRIA at the following URL
18 "http://www.cecill.info".
19 
20 As a counterpart to the access to the source code and rights to copy,
21 modify and redistribute granted by the license, users are provided only
22 with a limited warranty and the software's author, the holder of the
23 economic rights, and the successive licensors have only limited
24 liability.
25 
26 In this respect, the user's attention is drawn to the risks associated
27 with loading, using, modifying and/or developing or reproducing the
28 software by the user in light of its specific status of free software,
29 that may mean that it is complicated to manipulate, and that also
30 therefore means that it is reserved for developers and experienced
31 professionals having in-depth computer knowledge. Users are therefore
32 encouraged to load and test the software's suitability as regards their
33 requirements in conditions enabling the security of their systems and/or
34 data to be ensured and, more generally, to use and operate it in the
35 same conditions as regards security.
36 
37 The fact that you are presently reading this means that you have had
38 knowledge of the CeCILL license and that you accept its terms.
39 */
40 
41 #ifndef _DISTANCEESTIMATION_H_
42 #define _DISTANCEESTIMATION_H_
43 
44 #include "../Model/SubstitutionModel.h"
45 #include "../Likelihood/AbstractTreeLikelihood.h"
46 #include "../Likelihood/DRHomogeneousTreeLikelihood.h"
47 #include "../Likelihood/PseudoNewtonOptimizer.h"
48 
49 #include <Bpp/Clonable.h>
50 #include <Bpp/Numeric/ParameterList.h>
51 #include <Bpp/Numeric/Prob/DiscreteDistribution.h>
52 #include <Bpp/Numeric/Function/Optimizer.h>
53 #include <Bpp/Numeric/Function/SimpleMultiDimensions.h>
54 #include <Bpp/Numeric/Function/MetaOptimizer.h>
55 
56 // From SeqLib:
57 #include <Bpp/Seq/Container/SiteContainer.h>
58 
59 namespace bpp
60 {
61 
67 {
68  private:
69  SiteContainer* shrunkData_;
70  std::vector<std::string> seqnames_;
72  ParameterList brLenParameters_;
73 
74  mutable VVVdouble pxy_;
75 
76  mutable VVVdouble dpxy_;
77 
78  mutable VVVdouble d2pxy_;
79 
91  std::vector<size_t> rootPatternLinks_;
92 
96  std::vector<unsigned int> rootWeights_;
97 
98  //some values we'll need:
99  size_t nbSites_, //the number of sites in the container
100  nbClasses_, //the number of rate classes
101  nbStates_, //the number of states in the alphabet
102  nbDistinctSites_; //the number of distinct sites in the container
103 
104  mutable VVVdouble rootLikelihoods_;
105  mutable VVdouble rootLikelihoodsS_;
106  mutable Vdouble rootLikelihoodsSR_;
107  mutable Vdouble dLikelihoods_;
108  mutable Vdouble d2Likelihoods_;
110 
112  Constraint* brLenConstraint_;
113  double brLen_;
114 
115  public:
117  const std::string& seq1, const std::string& seq2,
118  const SiteContainer& data,
119  SubstitutionModel* model,
120  DiscreteDistribution* rDist,
121  bool verbose) throw (Exception);
122 
124 
126 
127  TwoTreeLikelihood* clone() const { return new TwoTreeLikelihood(*this); }
128 
129  virtual ~TwoTreeLikelihood();
130 
131  public:
132 
140  size_t getNumberOfStates() const { return model_->getNumberOfStates(); }
141 
142  const std::vector<int>& getAlphabetStates() const { return model_->getAlphabetStates(); }
143 
144  int getAlphabetStateAsInt(size_t i) const { return model_->getAlphabetStateAsInt(i); }
145 
146  std::string getAlphabetStateAsChar(size_t i) const { return model_->getAlphabetStateAsChar(i); }
147 
148  TreeLikelihoodData* getLikelihoodData() throw (NotImplementedException)
149  {
150  throw NotImplementedException("TwoTreeLikelihood::getLikelihoodData.");
151  }
152  const TreeLikelihoodData* getLikelihoodData() const throw (NotImplementedException)
153  {
154  throw NotImplementedException("TwoTreeLikelihood::getLikelihoodData.");
155  }
156  double getLikelihood() const;
157  double getLogLikelihood() const;
158  double getLikelihoodForASite (size_t site) const;
159  double getLogLikelihoodForASite(size_t site) const;
160  ParameterList getBranchLengthsParameters() const;
161  ParameterList getSubstitutionModelParameters() const;
162  SubstitutionModel* getSubstitutionModel(int nodeId, size_t siteIndex) throw (NodeNotFoundException) { return model_; }
163  const SubstitutionModel* getSubstitutionModel(int nodeId, size_t siteIndex) const throw (NodeNotFoundException) { return model_; }
164  const std::vector<double>& getRootFrequencies(size_t siteIndex) const { return model_->getFrequencies(); }
165  size_t getSiteIndex(size_t site) const throw (IndexOutOfBoundsException) { return rootPatternLinks_[site]; }
169  VVVdouble getTransitionProbabilitiesPerRateClass(int nodeId, size_t siteIndex) const { return pxy_; }
170  void setData(const SiteContainer& sites) throw (Exception) {}
171  void initialize() throw(Exception);
179  double getLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const;
180  double getLogLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const;
181  double getLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const;
182  double getLogLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const;
190  const SubstitutionModel* getSubstitutionModel() const { return model_; }
191 
198 
199  ConstBranchModelIterator* getNewBranchModelIterator(int nodeId) const throw (NotImplementedException)
200  {
201  throw NotImplementedException("TwoTreeLikelihood::getNewBranchSiteModelIterator. This class does not (yet) provide support for partition models.");
202  }
203 
204  ConstSiteModelIterator* getNewSiteModelIterator(size_t siteIndex) const throw (NotImplementedException)
205  {
206  throw NotImplementedException("TwoTreeLikelihood::getNewSiteModelIterator. This class is for inner use only and does not provide site model iterators.");
207  }
208 
209 
210 
224  void setParameters(const ParameterList& parameters) throw (ParameterNotFoundException, ConstraintException);
225  double getValue() const throw(Exception);
226 
232  double getFirstOrderDerivative(const std::string& variable) const throw (Exception);
240  double getSecondOrderDerivative(const std::string& variable) const throw (Exception);
241  double getSecondOrderDerivative(const std::string& variable1, const std::string& variable2) const throw (Exception) { return 0; } // Not implemented for now.
244  virtual void initBranchLengthsParameters();
245 
246  virtual void setMinimumBranchLength(double minimum)
247  {
248  minimumBrLen_ = minimum;
250  brLenConstraint_ = new IntervalConstraint(1, minimumBrLen_, true);
252  }
253 
254  virtual double getMinimumBranchLength() const { return minimumBrLen_; }
255 
256  protected:
257 
272  virtual void initTreeLikelihoods(const SequenceContainer & sequences) throw (Exception);
273 
274  void fireParameterChanged(const ParameterList & params);
275  virtual void computeTreeLikelihood();
276  virtual void computeTreeDLikelihood();
277  virtual void computeTreeD2Likelihood();
282  virtual void initParameters();
283 
290  virtual void applyParameters() throw (Exception);
291 
292 };
293 
307  public virtual Clonable
308 {
309  private:
310  auto_ptr<SubstitutionModel> model_;
311  auto_ptr<DiscreteDistribution> rateDist_;
312  const SiteContainer* sites_;
313  DistanceMatrix* dist_;
314  Optimizer* optimizer_;
315  MetaOptimizer* defaultOptimizer_;
316  size_t verbose_;
317  ParameterList parameters_;
318 
319  public:
320 
336  SubstitutionModel* model,
337  DiscreteDistribution* rateDist,
338  size_t verbose = 1) :
339  model_(model),
340  rateDist_(rateDist),
341  sites_(0),
342  dist_(0),
343  optimizer_(0),
344  defaultOptimizer_(0),
345  verbose_(verbose),
346  parameters_()
347  {
348  init_();
349  }
350 
369  SubstitutionModel* model,
370  DiscreteDistribution* rateDist,
371  const SiteContainer* sites,
372  size_t verbose = 1,
373  bool computeMat = true) :
374  model_(model),
375  rateDist_(rateDist),
376  sites_(sites),
377  dist_(0),
378  optimizer_(0),
379  defaultOptimizer_(0),
380  verbose_(verbose),
381  parameters_()
382  {
383  init_();
384  if(computeMat) computeMatrix();
385  }
386 
394  DistanceEstimation(const DistanceEstimation& distanceEstimation):
395  model_(distanceEstimation.model_->clone()),
396  rateDist_(distanceEstimation.rateDist_->clone()),
397  sites_(distanceEstimation.sites_),
398  dist_(0),
399  optimizer_(dynamic_cast<Optimizer *>(distanceEstimation.optimizer_->clone())),
400  defaultOptimizer_(dynamic_cast<MetaOptimizer *>(distanceEstimation.defaultOptimizer_->clone())),
401  verbose_(distanceEstimation.verbose_),
402  parameters_(distanceEstimation.parameters_)
403  {
404  if(distanceEstimation.dist_ != 0)
405  dist_ = new DistanceMatrix(*distanceEstimation.dist_);
406  else
407  dist_ = 0;
408  }
409 
418  DistanceEstimation& operator=(const DistanceEstimation& distanceEstimation)
419  {
420  model_.reset(distanceEstimation.model_->clone());
421  rateDist_.reset(distanceEstimation.rateDist_->clone());
422  sites_ = distanceEstimation.sites_;
423  if (distanceEstimation.dist_ != 0)
424  dist_ = new DistanceMatrix(*distanceEstimation.dist_);
425  else
426  dist_ = 0;
427  optimizer_ = dynamic_cast<Optimizer *>(distanceEstimation.optimizer_->clone());
428  // _defaultOptimizer has already been initialized since the default constructor has been called.
429  verbose_ = distanceEstimation.verbose_;
430  parameters_ = distanceEstimation.parameters_;
431  return *this;
432  }
433 
435  {
436  if (dist_) delete dist_;
437  delete defaultOptimizer_;
438  delete optimizer_;
439  }
440 
441 #ifndef NO_VIRTUAL_COV
443 #else
444  Clonable*
445 #endif
446  clone() const { return new DistanceEstimation(*this); }
447 
448  private:
449  void init_()
450  {
451  MetaOptimizerInfos* desc = new MetaOptimizerInfos();
452  std::vector<std::string> name;
453  name.push_back("BrLen");
454  desc->addOptimizer("Branch length", new PseudoNewtonOptimizer(0), name, 2, MetaOptimizerInfos::IT_TYPE_FULL);
455  ParameterList tmp = model_->getParameters();
456  tmp.addParameters(rateDist_->getParameters());
457  desc->addOptimizer("substitution model and rate distribution", new SimpleMultiDimensions(0), tmp.getParameterNames(), 0, MetaOptimizerInfos::IT_TYPE_STEP);
458  defaultOptimizer_ = new MetaOptimizer(0, desc);
459  defaultOptimizer_->setMessageHandler(0);
460  defaultOptimizer_->setProfiler(0);
461  defaultOptimizer_->getStopCondition()->setTolerance(0.0001);
462  optimizer_ = dynamic_cast<Optimizer*>(defaultOptimizer_->clone());
463  }
464 
465  public:
466 
475  void computeMatrix() throw (NullPointerException);
476 
482  DistanceMatrix* getMatrix() const { return dist_ == 0 ? 0 : new DistanceMatrix(*dist_); }
483 
484  bool hasSubstitutionModel() const { return model_.get(); }
485 
486  const SubstitutionModel& getSubstitutionModel() const throw (Exception) {
487  if (hasSubstitutionModel())
488  return *model_;
489  else
490  throw Exception("DistanceEstimation::getSubstitutionModel(). No model assciated to this instance.");
491  }
492 
493  void resetSubstitutionModel(SubstitutionModel* model = 0) { model_.reset(model); }
494 
495  bool hasRateDistribution() const { return rateDist_.get(); }
496 
497  const DiscreteDistribution& getRateDistribution() const throw (Exception) {
498  if (hasRateDistribution())
499  return *rateDist_;
500  else
501  throw Exception("DistanceEstimation::getRateDistribution(). No rate distribution assciated to this instance.");
502  }
503 
504  void resetRateDistribution(DiscreteDistribution* rateDist = 0) { rateDist_.reset(rateDist); }
505 
506  void setData(const SiteContainer* sites) { sites_ = sites; }
507  const SiteContainer* getData() const { return sites_; }
508  void resetData() { sites_ = 0; }
509 
510  void setOptimizer(const Optimizer * optimizer)
511  {
512  if (optimizer_) delete optimizer_;
513  optimizer_ = dynamic_cast<Optimizer *>(optimizer->clone());
514  }
515  const Optimizer* getOptimizer() const { return optimizer_; }
516  Optimizer* getOptimizer() { return optimizer_; }
517  void resetOptimizer() { optimizer_ = dynamic_cast<Optimizer*>(defaultOptimizer_->clone()); }
518 
526  void setAdditionalParameters(const ParameterList& parameters)
527  {
528  parameters_ = parameters;
529  }
530 
535  {
536  parameters_.reset();
537  }
538 
542  void setVerbose(size_t verbose) { verbose_ = verbose; }
546  size_t getVerbose() const { return verbose_; }
547 };
548 
549 } //end of namespace bpp.
550 
551 #endif //_DISTANCEESTIMATION_H_
552 
Interface for all substitution models.
DistanceEstimation(const DistanceEstimation &distanceEstimation)
Copy constructor.
virtual int getAlphabetStateAsInt(size_t index) const =0
VVVdouble getTransitionProbabilitiesPerRateClass(int nodeId, size_t siteIndex) const
This method is not applicable for this object.
auto_ptr< DiscreteDistribution > rateDist_
const DiscreteDistribution & getRateDistribution() const
const TreeLikelihoodData * getLikelihoodData() const
void resetSubstitutionModel(SubstitutionModel *model=0)
virtual const std::vector< int > & getAlphabetStates() const =0
Partial implementation of the DiscreteRatesAcrossSitesTreeLikelihood interface.
TwoTreeLikelihood(const std::string &seq1, const std::string &seq2, const SiteContainer &data, SubstitutionModel *model, DiscreteDistribution *rDist, bool verbose)
ParameterList getBranchLengthsParameters() const
Get the branch lengths parameters.
This class is a simplified version of DRHomogeneousTreeLikelihood for 2-Trees.
DistanceEstimation & operator=(const DistanceEstimation &distanceEstimation)
Assigment operator.
void setParameters(const ParameterList &parameters)
Implements the Function interface.
double getLikelihoodForASite(size_t site) const
Get the likelihood for a site.
Iterates through all models used for all sites on a given branch.
virtual void initParameters()
This builds the parameters list from all parametrizable objects, i.e. substitution model...
TwoTreeLikelihood & operator=(const TwoTreeLikelihood &lik)
const SubstitutionModel & getSubstitutionModel() const
SubstitutionModel * model_
const SiteContainer * sites_
SubstitutionModel * getSubstitutionModel()
Get the substitution model used for the computation.
ConstBranchModelIterator * getNewBranchModelIterator(int nodeId) const
STL namespace.
TreeLikelihood data structure.
virtual void initBranchLengthsParameters()
virtual double getMinimumBranchLength() const
double getLogLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
Get the logarithm of the likelihood for a site knowing its rate class.
size_t getNumberOfStates() const
std::string getAlphabetStateAsChar(size_t i) const
void fireParameterChanged(const ParameterList &params)
void setVerbose(size_t verbose)
void setData(const SiteContainer &sites)
Set the dataset for which the likelihood must be evaluated.
void setOptimizer(const Optimizer *optimizer)
virtual const Vdouble & getFrequencies() const =0
const std::vector< int > & getAlphabetStates() const
std::vector< unsigned int > rootWeights_
The frequency of each site.
const std::vector< double > & getRootFrequencies(size_t siteIndex) const
Get the values of the frequencies for each state in the alphabet at the root node.
void setData(const SiteContainer *sites)
Iterates through all models used for all branches on a given site.
double getLogLikelihood() const
Get the logarithm of the likelihood for the whole dataset.
SubstitutionModel * getSubstitutionModel(int nodeId, size_t siteIndex)
Get the substitution model associated to a given node and alignment column.
const SubstitutionModel * getSubstitutionModel(int nodeId, size_t siteIndex) const
Get the substitution model associated to a given node and alignment column.
void initialize()
Init the likelihood object.
virtual void initTreeLikelihoods(const SequenceContainer &sequences)
This method initializes the leaves according to a sequence container.
const SubstitutionModel * getSubstitutionModel() const
Get the substitution model used for the computation.
virtual void computeTreeDLikelihood()
This Optimizer implements Newton&#39;s algorithm for finding a minimum of a function. This is in fact a m...
Exception thrown when something is wrong with a particular node.
std::vector< std::string > seqnames_
std::vector< size_t > rootPatternLinks_
As previous, but for the global container.
virtual void computeTreeLikelihood()
const SiteContainer * getData() const
size_t getSiteIndex(size_t site) const
Get the index (used for inner computations) of a given site (original alignment column).
virtual void computeTreeD2Likelihood()
int getAlphabetStateAsInt(size_t i) const
double getLogLikelihoodForASite(size_t site) const
Get the logarithm of the likelihood for a site.
virtual std::string getAlphabetStateAsChar(size_t index) const =0
double getSecondOrderDerivative(const std::string &variable) const
double getFirstOrderDerivative(const std::string &variable) const
TreeLikelihoodData * getLikelihoodData()
double getLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
Get the likelihood for a site knowing its rate class.
double getLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
Get the likelihood for a site knowing its rate class and its ancestral state.
const Optimizer * getOptimizer() const
SiteContainer * shrunkData_
DistanceEstimation * clone() const
ConstSiteModelIterator * getNewSiteModelIterator(size_t siteIndex) const
void setAdditionalParameters(const ParameterList &parameters)
Specify a list of parameters to be estimated.
double getLogLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
Get the logarithm of the likelihood for a site knowing its rate class and its ancestral state...
double getLikelihood() const
Get the likelihood for the whole dataset.
TwoTreeLikelihood * clone() const
DistanceEstimation(SubstitutionModel *model, DiscreteDistribution *rateDist, size_t verbose=1)
Create a new DistanceEstimation object according to a given substitution model and a rate distributio...
Estimate a distance matrix from sequence data, according to a given model.
void resetAdditionalParameters()
Reset all additional parameters.
virtual size_t getNumberOfStates() const =0
Get the number of states.
DistanceEstimation(SubstitutionModel *model, DiscreteDistribution *rateDist, const SiteContainer *sites, size_t verbose=1, bool computeMat=true)
Create a new DistanceEstimation object and compute distances according to a given substitution model ...
ParameterList brLenParameters_
void resetRateDistribution(DiscreteDistribution *rateDist=0)
virtual void applyParameters()
All parameters are stores in a parameter list.
ParameterList getSubstitutionModelParameters() const
Get the parameters associated to substitution model(s).
auto_ptr< SubstitutionModel > model_
MetaOptimizer * defaultOptimizer_
virtual void setMinimumBranchLength(double minimum)