bpp-phyl-omics  2.2.0
MaximumLikelihoodModelFitMafStatistics.h
Go to the documentation of this file.
1 //
2 // File: MaximumLikelihoodModelFitMafStatistics.h
3 // Created by: Julien Dutheil
4 // Created on: Mar 25 2014
5 //
6 
7 /*
8 Copyright or © or Copr. Bio++ Development Team
9 
10 This software is a computer program whose purpose is to test the
11 homogeneity of the substitution process of a given alignment.
12 
13 This software is governed by the CeCILL license under French law and
14 abiding by the rules of distribution of free software. You can use,
15 modify and/ or redistribute the software under the terms of the CeCILL
16 license as circulated by CEA, CNRS and INRIA at the following URL
17 "http://www.cecill.info".
18 
19 As a counterpart to the access to the source code and rights to copy,
20 modify and redistribute granted by the license, users are provided only
21 with a limited warranty and the software's author, the holder of the
22 economic rights, and the successive licensors have only limited
23 liability.
24 
25 In this respect, the user's attention is drawn to the risks associated
26 with loading, using, modifying and/or developing or reproducing the
27 software by the user in light of its specific status of free software,
28 that may mean that it is complicated to manipulate, and that also
29 therefore means that it is reserved for developers and experienced
30 professionals having in-depth computer knowledge. Users are therefore
31 encouraged to load and test the software's suitability as regards their
32 requirements in conditions enabling the security of their systems and/or
33 data to be ensured and, more generally, to use and operate it in the
34 same conditions as regards security.
35 
36 The fact that you are presently reading this means that you have had
37 knowledge of the CeCILL license and that you accept its terms.
38 */
39 
40 #ifndef _MAXIMUMLIKELIHOODMODELFITMAFSTATISTICS_H_
41 #define _MAXIMUMLIKELIHOODMODELFITMAFSTATISTICS_H_
42 
43 #include <Bpp/Seq/Io/Maf/MafStatistics.h>
44 #include <Bpp/Seq/Container/SiteContainer.h>
45 
46 //From bpp-phyl:
47 #include <Bpp/Phyl/Model/SubstitutionModel.h>
48 #include <Bpp/Phyl/Model/SubstitutionModelSetTools.h>
49 #include <Bpp/Phyl/Model/FrequenciesSet/NucleotideFrequenciesSet.h>
50 #include <Bpp/Phyl/Tree.h>
51 #include <Bpp/Phyl/Likelihood/DiscreteRatesAcrossSitesTreeLikelihood.h>
52 
53 //From bpp-core
54 #include <Bpp/Numeric/Prob/DiscreteDistribution.h>
55 
56 namespace bpp {
57 
65  public AbstractMafStatistics
66 {
67 
68  private:
69  std::auto_ptr<SubstitutionModel> model_;
70  std::auto_ptr<SubstitutionModelSet> modelSet_; //Only used in case of non-stationary model.
71  std::auto_ptr<DiscreteDistribution> rDist_;
72  std::auto_ptr<NucleotideFrequenciesSet> rootFreqs_;
73  std::string treePropertyIn_;
74  std::auto_ptr<const Tree> tree_;
75  std::vector<std::string> parametersOut_;
77  double propGapsToKeep_; //Exclude sites with too many gaps
78  bool gapsAsUnresolved_; //For most models, should be yes as they do not allow for gap characters
79  ParameterList initParameters_;
80  ParameterList fixedParameters_;
81 
82  public:
99  SubstitutionModel* model,
100  DiscreteDistribution* rDist,
101  NucleotideFrequenciesSet* rootFreqs,
102  const std::string& treePropertyIn,
103  const std::vector<std::string>& parametersOut,
104  const ParameterList& fixedParameters,
105  bool reestimateBrLen = true,
106  double propGapsToKeep = 0,
107  bool gapsAsUnresolved = true):
108  AbstractMafStatistics(),
109  model_(model), modelSet_(0), rDist_(rDist), rootFreqs_(rootFreqs),
110  treePropertyIn_(treePropertyIn), tree_(0), parametersOut_(parametersOut),
111  reestimateBrLen_(reestimateBrLen), propGapsToKeep_(propGapsToKeep), gapsAsUnresolved_(gapsAsUnresolved),
112  initParameters_(), fixedParameters_(fixedParameters)
113  {
114  if (!rootFreqs)
115  init_();
116  //Otherwise we do not initialize parameters as the tree might change for each block.
117  //We therefore have to initialize once for each block.
118  }
119 
136  SubstitutionModel* model,
137  DiscreteDistribution* rDist,
138  NucleotideFrequenciesSet* rootFreqs,
139  const Tree* tree,
140  const std::vector<std::string>& parametersOut,
141  const ParameterList& fixedParameters,
142  bool reestimateBrLen = true,
143  double propGapsToKeep = 0,
144  bool gapsAsUnresolved = true):
145  AbstractMafStatistics(),
146  model_(model), modelSet_(0), rDist_(rDist), rootFreqs_(rootFreqs),
147  treePropertyIn_(NO_PROPERTY), tree_(0), parametersOut_(parametersOut),
148  reestimateBrLen_(reestimateBrLen), propGapsToKeep_(propGapsToKeep), gapsAsUnresolved_(gapsAsUnresolved),
149  initParameters_(), fixedParameters_(fixedParameters)
150  {
151  if (rootFreqs)
152  modelSet_.reset(SubstitutionModelSetTools::createHomogeneousModelSet(model->clone(), rootFreqs->clone(), tree));
153  init_();
154  }
155 
156  private:
158  AbstractMafStatistics(),
159  model_(0), modelSet_(0), rDist_(0), rootFreqs_(0),
163  {}
164 
166  {
167  model_.reset();
168  modelSet_.reset();
169  rDist_.reset();
170  rootFreqs_.reset();
172  tree_.reset();
173  parametersOut_ = mafstat.parametersOut_;
179  return *this;
180  }
181 
182  public:
183  std::string getShortName() const { return "MLModelFit"; }
184  std::string getFullName() const { return "Maximum Likelihood Model Fitting"; }
185  void compute(const MafBlock& block);
186  std::vector<std::string> getSupportedTags() const {
187  std::vector<std::string> tags;
188  tags.push_back("NbIterations");
189  tags.insert(tags.end(), parametersOut_.begin(), parametersOut_.end());
190  return tags;
191  }
192 
193  static const std::string NO_PROPERTY;
194 
195  private:
196  void init_();
197 };
198 
199 } //end of namespace bpp.
200 
201 #endif //_MAXIMUMLIKELIHOODDISTANCEESTIMATIONMAFSTATISTICS_H_
202 
MaximumLikelihoodModelFitMafStatistics & operator=(const MaximumLikelihoodModelFitMafStatistics &mafstat)
MaximumLikelihoodModelFitMafStatistics(SubstitutionModel *model, DiscreteDistribution *rDist, NucleotideFrequenciesSet *rootFreqs, const Tree *tree, const std::vector< std::string > &parametersOut, const ParameterList &fixedParameters, bool reestimateBrLen=true, double propGapsToKeep=0, bool gapsAsUnresolved=true)
Build a new distance estimation maf mafstat, based on the DistanceEstimation class.
MaximumLikelihoodModelFitMafStatistics(SubstitutionModel *model, DiscreteDistribution *rDist, NucleotideFrequenciesSet *rootFreqs, const std::string &treePropertyIn, const std::vector< std::string > &parametersOut, const ParameterList &fixedParameters, bool reestimateBrLen=true, double propGapsToKeep=0, bool gapsAsUnresolved=true)
Build a new distance estimation maf mafstat, based on the DistanceEstimation class.
MaximumLikelihoodModelFitMafStatistics(const MaximumLikelihoodModelFitMafStatistics &mafstat)