bpp-phyl  2.2.0
BppOSubstitutionModelFormat.cpp
Go to the documentation of this file.
1 //
2 // File: BppOSubstitutionModelFormat.cpp
3 // Created by: Laurent Guéguen
4 // Created on: mercredi 4 juillet 2012, à 13h 58
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
42 
43 #include <Bpp/Io/FileTools.h>
44 #include <Bpp/Text/TextTools.h>
45 #include <Bpp/Text/StringTokenizer.h>
46 #include <Bpp/Text/KeyvalTools.h>
47 
48 #include "../Model/Codon/MG94.h"
49 #include "../Model/Codon/GY94.h"
50 #include "../Model/Codon/YNGKP_M1.h"
51 #include "../Model/Codon/YNGKP_M2.h"
52 #include "../Model/Codon/YNGKP_M3.h"
53 #include "../Model/Codon/YNGKP_M7.h"
54 #include "../Model/Codon/YNGKP_M8.h"
55 #include "../Model/Codon/YN98.h"
56 #include "../Model/Codon/TripletSubstitutionModel.h"
57 #include "../Model/Codon/CodonRateSubstitutionModel.h"
58 #include "../Model/Codon/CodonDistanceSubstitutionModel.h"
59 #include "../Model/Codon/CodonRateFrequenciesSubstitutionModel.h"
60 #include "../Model/Codon/CodonDistanceFrequenciesSubstitutionModel.h"
61 #include "../Model/Codon/CodonDistancePhaseFrequenciesSubstitutionModel.h"
62 #include "../Model/Codon/CodonDistanceFitnessPhaseFrequenciesSubstitutionModel.h"
63 #include "../Model/RE08.h"
64 #include "../Model/TS98.h"
65 #include "../Model/G2001.h"
66 #include "../Model/Nucleotide/F84.h"
67 #include "../Model/Nucleotide/NucleotideSubstitutionModel.h"
68 #include "../Model/Nucleotide/gBGC.h"
69 #include "../Model/Nucleotide/RN95.h"
70 #include "../Model/Nucleotide/GTR.h"
71 #include "../Model/Nucleotide/RN95s.h"
72 #include "../Model/Nucleotide/HKY85.h"
73 #include "../Model/Nucleotide/SSR.h"
74 #include "../Model/Nucleotide/JCnuc.h"
75 #include "../Model/Nucleotide/T92.h"
76 #include "../Model/Nucleotide/K80.h"
77 #include "../Model/Nucleotide/TN93.h"
78 #include "../Model/Nucleotide/L95.h"
79 #include "../Model/Nucleotide/YpR.h"
80 #include "../Model/Protein/CoalaCore.h"
81 #include "../Model/Protein/LLG08_EX2.h"
82 #include "../Model/Protein/Coala.h"
83 #include "../Model/Protein/LLG08_EX3.h"
84 #include "../Model/Protein/DSO78.h"
85 #include "../Model/Protein/LLG08_UL2.h"
86 #include "../Model/Protein/JCprot.h"
87 #include "../Model/Protein/LLG08_UL3.h"
88 #include "../Model/Protein/JTT92.h"
89 #include "../Model/Protein/ProteinSubstitutionModel.h"
90 #include "../Model/Protein/LG08.h"
91 #include "../Model/Protein/UserProteinSubstitutionModel.h"
92 #include "../Model/Protein/LGL08_CAT.h"
93 #include "../Model/Protein/WAG01.h"
94 #include "../Model/Protein/LLG08_EHO.h"
95 #include "../Model/Protein/LG10_EX_EHO.h"
96 #include "../Model/BinarySubstitutionModel.h"
97 
98 #include "../App/PhylogeneticsApplicationTools.h"
99 
101 
102 #include <Bpp/Seq/App/SequenceApplicationTools.h>
103 #include <Bpp/Seq/Alphabet/AlphabetTools.h>
104 
105 #include <Bpp/Io/OutputStream.h>
106 #include <Bpp/Io/BppOParametrizableFormat.h>
107 #include <Bpp/Io/BppODiscreteDistributionFormat.h>
108 
109 //From Numeric
110 
111 #include <Bpp/Numeric/Prob/ConstantDistribution.h>
112 #include <Bpp/Numeric/AutoParameter.h>
113 
114 using namespace bpp;
115 
116 // From the STL:
117 #include <iomanip>
118 
119 using namespace std;
120 
121 unsigned char BppOSubstitutionModelFormat::DNA = 1;
122 unsigned char BppOSubstitutionModelFormat::RNA = 2;
123 unsigned char BppOSubstitutionModelFormat::NUCLEOTIDE = 1 | 2;
124 unsigned char BppOSubstitutionModelFormat::PROTEIN = 4;
125 unsigned char BppOSubstitutionModelFormat::CODON = 8;
126 unsigned char BppOSubstitutionModelFormat::WORD = 16;
127 unsigned char BppOSubstitutionModelFormat::BINARY = 32;
128 unsigned char BppOSubstitutionModelFormat::ALL = 1 | 2 | 4 | 8 | 16 | 32;
129 
130 
132  const Alphabet* alphabet,
133  const std::string& modelDescription,
134  const SiteContainer* data,
135  bool parseArguments)
136 {
137  unparsedArguments_.clear();
138  auto_ptr<SubstitutionModel> model;
139  string modelName = "";
140  map<string, string> args;
141  KeyvalTools::parseProcedure(modelDescription, modelName, args);
142 
143  // //////////////////////////////////
144  // / MIXED MODELS
145  // ////////////////////////////////
146 
147  if ((modelName == "MixedModel" || (modelName == "Mixture")) && allowMixed_)
148  model.reset(readMixed_(alphabet, modelDescription, data));
149 
150 
151  // /////////////////////////////////
152  // / WORDS and CODONS
153  // ///////////////////////////////
154 
155  else if ((modelName == "Word") || (modelName == "Triplet") || (modelName.substr(0, 5) == "Codon"))
156  model.reset(readWord_(alphabet, modelDescription, data));
157 
158 
159  // //////////////////////////////////////
160  // PREDEFINED CODON MODELS
161  // //////////////////////////////////////
162 
163  else if (((modelName == "MG94") || (modelName == "YN98") ||
164  (modelName == "GY94") || (modelName.substr(0, 5) == "YNGKP")) && (alphabetCode_ & CODON))
165  {
166  if (!(alphabetCode_ & CODON))
167  throw Exception("BppOSubstitutionModelFormat::read. Codon alphabet not supported.");
168  if (!geneticCode_)
169  throw Exception("BppOSubstitutionModelFormat::read(). No genetic code specified! Consider using 'setGeneticCode'.");
170 
171  if (!AlphabetTools::isCodonAlphabet(alphabet))
172  throw Exception("Alphabet should be Codon Alphabet.");
173 
174  const CodonAlphabet* pCA = dynamic_cast<const CodonAlphabet*>(alphabet);
175 
176  if (args.find("genetic_code") != args.end()) {
177  ApplicationTools::displayWarning("'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
178  throw Exception("BppOSubstitutionModelFormat::read. Deprecated 'genetic_code' argument.");
179  }
180 
181  if (geneticCode_->getSourceAlphabet()->getAlphabetType() != pCA->getAlphabetType())
182  throw Exception("Mismatch between genetic code and codon alphabet");
183 
184  string freqOpt = ApplicationTools::getStringParameter("frequencies", args, "F0", "", true, warningLevel_);
185  BppOFrequenciesSetFormat freqReader(BppOFrequenciesSetFormat::ALL, verbose_, warningLevel_);
186  freqReader.setGeneticCode(geneticCode_); //This uses the same instance as the one that will be used by the model.
187  auto_ptr<FrequenciesSet> codonFreqs(freqReader.read(pCA, freqOpt, data, false));
188  map<string, string> unparsedParameterValuesNested(freqReader.getUnparsedArguments());
189 
190  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
191  {
192  unparsedArguments_[modelName + "." + it->first] = it->second;
193  }
194 
195  if (modelName == "MG94")
196  model.reset(new MG94(geneticCode_, codonFreqs.release()));
197  else if (modelName == "GY94")
198  model.reset(new GY94(geneticCode_, codonFreqs.release()));
199  else if ((modelName == "YN98") || (modelName == "YNGKP_M0"))
200  model.reset(new YN98(geneticCode_, codonFreqs.release()));
201  else if (modelName == "YNGKP_M1")
202  model.reset(new YNGKP_M1(geneticCode_, codonFreqs.release()));
203  else if (modelName == "YNGKP_M2")
204  model.reset(new YNGKP_M2(geneticCode_, codonFreqs.release()));
205  else if (modelName == "YNGKP_M3")
206  if (args.find("n") == args.end())
207  model.reset(new YNGKP_M3(geneticCode_, codonFreqs.release()));
208  else
209  model.reset(new YNGKP_M3(geneticCode_, codonFreqs.release(), TextTools::to<unsigned int>(args["n"])));
210  else if ((modelName == "YNGKP_M7") || modelName == "YNGKP_M8")
211  {
212  if (args.find("n") == args.end())
213  throw Exception("Missing argument 'n' (number of classes) in " + modelName + " distribution");
214  unsigned int nbClasses = TextTools::to<unsigned int>(args["n"]);
215  if (verbose_)
216  ApplicationTools::displayResult("Number of classes in model", nbClasses);
217 
218  if (modelName == "YNGKP_M7")
219  model.reset(new YNGKP_M7(geneticCode_, codonFreqs.release(), nbClasses));
220  else if (modelName == "YNGKP_M8")
221  model.reset(new YNGKP_M8(geneticCode_, codonFreqs.release(), nbClasses));
222  }
223  else
224  throw Exception("Unknown Codon model: " + modelName);
225  }
226 
227 
228  // //////////////////////////////////
229  // gBGC
230  // //////////////////////////////////
231 
232  else if (modelName == "gBGC")
233  {
234  if (!(alphabetCode_ & NUCLEOTIDE))
235  throw Exception("BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
236  // We have to parse the nested model first:
237  string nestedModelDescription = args["model"];
238  if (TextTools::isEmpty(nestedModelDescription))
239  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'gBGC'.");
240  if (verbose_)
241  ApplicationTools::displayResult("Biased gene conversion", modelName);
242  BppOSubstitutionModelFormat nestedReader(NUCLEOTIDE, true, true, false, verbose_, warningLevel_);
243  auto_ptr<NucleotideSubstitutionModel> nestedModel(dynamic_cast<NucleotideSubstitutionModel*>(nestedReader.read(alphabet, nestedModelDescription, data, false)));
244  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
245 
246  // Now we create the gBGC substitution model:
247  model.reset(new gBGC(dynamic_cast<const NucleicAlphabet*>(alphabet), nestedModel.release()));
248 
249  // Then we update the parameter set:
250  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
251  {
252  unparsedArguments_["gBGC." + it->first] = it->second;
253  }
254  }
255 
256  // //////////////////////////////////
257  // YpR
258  // //////////////////////////////////
259 
260  else if (modelName == "YpR_Sym")
261  {
262  if (!(alphabetCode_ & NUCLEOTIDE))
263  throw Exception("BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
264  if (alphabet->getAlphabetType() != "RNY alphabet")
265  throw Exception("Mismatch alphabet: " + alphabet->getAlphabetType() + " for model: " + modelName);
266  const RNY* prny = dynamic_cast<const RNY*>(alphabet);
267 
268  string nestedModelDescription = args["model"];
269  if (TextTools::isEmpty(nestedModelDescription))
270  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'YpR_sym'.");
271  if (verbose_)
272  ApplicationTools::displayResult("Symetric YpR model", modelName);
273  BppOSubstitutionModelFormat nestedReader(NUCLEOTIDE, false, false, false, verbose_, warningLevel_);
274  auto_ptr<NucleotideSubstitutionModel> nestedModel(dynamic_cast<NucleotideSubstitutionModel*>(nestedReader.read(&prny->getLetterAlphabet(), nestedModelDescription, data, false)));
275  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
276  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
277  {
278  unparsedArguments_["YpR_Sym." + it->first] = it->second;
279  }
280 
281  model.reset(new YpR_Sym(prny, nestedModel.release()));
282  }
283  else if (modelName == "YpR_Gen")
284  {
285  if (!(alphabetCode_ & NUCLEOTIDE))
286  throw Exception("BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
287  if (alphabet->getAlphabetType() != "RNY alphabet")
288  throw Exception("Mismatch alphabet: " + alphabet->getAlphabetType() + " for model: " + modelName);
289  const RNY* prny = dynamic_cast<const RNY*>(alphabet);
290 
291  string nestedModelDescription = args["model"];
292  if (TextTools::isEmpty(nestedModelDescription))
293  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'YpR_gen'.");
294  if (verbose_)
295  ApplicationTools::displayResult("General YpR model", modelName);
296  BppOSubstitutionModelFormat nestedReader(NUCLEOTIDE, false, false, false, verbose_, warningLevel_);
297  auto_ptr<NucleotideSubstitutionModel> nestedModel(dynamic_cast<NucleotideSubstitutionModel*>(nestedReader.read(&prny->getLetterAlphabet(), nestedModelDescription, data, false)));
298  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
299 
300  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
301  {
302  unparsedArguments_["YpR_Gen." + it->first] = it->second;
303  }
304 
305  model.reset(new YpR_Gen(prny, nestedModel.release()));
306  }
307 
308 
309  // /////////////////////////////////
310  // / RE08
311  // ///////////////////////////////
312 
313  else if (modelName == "RE08")
314  {
315  if (!allowGaps_)
316  throw Exception("BppOSubstitutionModelFormat::read. No Gap model allowed here.");
317 
318  // We have to parse the nested model first:
319  string nestedModelDescription = args["model"];
320  if (TextTools::isEmpty(nestedModelDescription))
321  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'RE08'.");
322  if (verbose_)
323  ApplicationTools::displayResult("Gap model", modelName);
324  BppOSubstitutionModelFormat nestedReader(ALL, allowCovarions_, false, false, verbose_, warningLevel_);
325  auto_ptr<ReversibleSubstitutionModel> nestedModel(dynamic_cast<ReversibleSubstitutionModel*>(nestedReader.read(alphabet, nestedModelDescription, data, false)));
326  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
327 
328  // Now we create the RE08 substitution model:
329  model.reset(new RE08(nestedModel.release()));
330 
331  // Then we update the parameter set:
332  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
333  {
334  unparsedArguments_["RE08.model_" + it->first] = it->second;
335  }
336  }
337 
338  // /////////////////////////////////
339  // / TS98
340  // ///////////////////////////////
341 
342  else if (modelName == "TS98")
343  {
344  if (!allowCovarions_)
345  throw Exception("BppOSubstitutionModelFormat::read. No Covarion model allowed here.");
346 
347  // We have to parse the nested model first:
348  string nestedModelDescription = args["model"];
349  if (TextTools::isEmpty(nestedModelDescription))
350  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'TS98'.");
351  if (verbose_)
352  ApplicationTools::displayResult("Covarion model", modelName);
353  BppOSubstitutionModelFormat nestedReader(ALL, false, allowMixed_, allowGaps_, false, warningLevel_);
354  auto_ptr<ReversibleSubstitutionModel> nestedModel(dynamic_cast<ReversibleSubstitutionModel*>(nestedReader.read(alphabet, nestedModelDescription, data, false)));
355  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
356 
357  // Now we create the TS98 substitution model:
358  model.reset(new TS98(nestedModel.release()));
359 
360  // Then we update the parameter set:
361  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
362  {
363  unparsedArguments_["TS98.model_" + it->first] = it->second;
364  }
365  }
366 
367  // /////////////////////////////////
368  // / G01
369  // ///////////////////////////////
370 
371  else if (modelName == "G01")
372  {
373  if (!allowCovarions_)
374  throw Exception("BppOSubstitutionModelFormat::read. No Covarion model allowed here.");
375 
376  // We have to parse the nested model first:
377  string nestedModelDescription = args["model"];
378  if (TextTools::isEmpty(nestedModelDescription))
379  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'G01'.");
380  string nestedRateDistDescription = args["rdist"];
381  if (TextTools::isEmpty(nestedRateDistDescription))
382  throw Exception("BppOSubstitutionModelFormat::read. Missing argument 'rdist' for model 'G01'.");
383  if (verbose_)
384  ApplicationTools::displayResult("Covarion model", modelName);
385  BppOSubstitutionModelFormat nestedReader(ALL, false, allowMixed_, allowGaps_, verbose_, warningLevel_);
386  auto_ptr<ReversibleSubstitutionModel> nestedModel(dynamic_cast<ReversibleSubstitutionModel*>(nestedReader.read(alphabet, nestedModelDescription, data, false)));
387  map<string, string> unparsedParameterValuesNestedModel(nestedReader.getUnparsedArguments());
388  BppORateDistributionFormat rateReader(false);
389  auto_ptr<DiscreteDistribution> nestedRDist(rateReader.read(nestedRateDistDescription, false));
390  map<string, string> unparsedParameterValuesNestedDist(rateReader.getUnparsedArguments());
391 
392  // Now we create the TS98 substitution model:
393  model.reset(new G2001(nestedModel.release(), nestedRDist.release()));
394 
395  // Then we update the parameter set:
396  for (map<string, string>::iterator it = unparsedParameterValuesNestedModel.begin(); it != unparsedParameterValuesNestedModel.end(); it++)
397  {
398  unparsedArguments_["G01.model_" + it->first] = it->second;
399  }
400  for (map<string, string>::iterator it = unparsedParameterValuesNestedDist.begin(); it != unparsedParameterValuesNestedDist.end(); it++)
401  {
402  unparsedArguments_["G01.rdist_" + it->first] = it->second;
403  }
404  }
405  else
406  {
407  // This is a 'simple' model...
408  if (AlphabetTools::isNucleicAlphabet(alphabet))
409  {
410  if (!(alphabetCode_ & NUCLEOTIDE))
411  throw Exception("BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
412  const NucleicAlphabet* alpha = dynamic_cast<const NucleicAlphabet*>(alphabet);
413 
414  // /////////////////////////////////
415  // / GTR
416  // ///////////////////////////////
417 
418  if (modelName == "GTR")
419  {
420  model.reset(new GTR(alpha));
421  }
422 
423 
424  // /////////////////////////////////
425  // / SSR
426  // ///////////////////////////////
427 
428  else if (modelName == "SSR")
429  {
430  model.reset(new SSR(alpha));
431  }
432 
433  // /////////////////////////////////
434  // / L95
435  // ///////////////////////////////
436 
437  else if (modelName == "L95")
438  {
439  model.reset(new L95(alpha));
440  }
441 
442  // /////////////////////////////////
443  // / RN95
444  // ///////////////////////////////
445 
446  else if (modelName == "RN95")
447  {
448  model.reset(new RN95(alpha));
449  }
450 
451  // /////////////////////////////////
452  // / RN95s
453  // ///////////////////////////////
454 
455  else if (modelName == "RN95s")
456  {
457  model.reset(new RN95s(alpha));
458  }
459 
460  // /////////////////////////////////
461  // / TN93
462  // //////////////////////////////
463 
464  else if (modelName == "TN93")
465  {
466  model.reset(new TN93(alpha));
467  }
468 
469  // /////////////////////////////////
470  // / HKY85
471  // ///////////////////////////////
472 
473  else if (modelName == "HKY85")
474  {
475  model.reset(new HKY85(alpha));
476  }
477 
478  // /////////////////////////////////
479  // / F84
480  // ///////////////////////////////
481 
482  else if (modelName == "F84")
483  {
484  model.reset(new F84(alpha));
485  }
486 
487  // /////////////////////////////////
488  // / T92
489  // ///////////////////////////////
490 
491  else if (modelName == "T92")
492  {
493  model.reset(new T92(alpha));
494  }
495 
496  // /////////////////////////////////
497  // / K80
498  // ///////////////////////////////
499 
500  else if (modelName == "K80")
501  {
502  model.reset(new K80(alpha));
503  }
504 
505 
506  // /////////////////////////////////
507  // / JC69
508  // ///////////////////////////////
509 
510  else if (modelName == "JC69")
511  {
512  model.reset(new JCnuc(alpha));
513  }
514  else
515  {
516  throw Exception("Model '" + modelName + "' unknown.");
517  }
518  }
519  else
520  {
521  if (!(alphabetCode_ & PROTEIN))
522  throw Exception("BppOSubstitutionModelFormat::read. Protein alphabet not supported.");
523  const ProteicAlphabet* alpha = dynamic_cast<const ProteicAlphabet*>(alphabet);
524 
525  if (modelName.find("+F") != string::npos) {
526  string freqOpt = ApplicationTools::getStringParameter("frequencies", args, "Full", "", true, warningLevel_);
527  BppOFrequenciesSetFormat freqReader(BppOFrequenciesSetFormat::ALL, false, warningLevel_);
528  auto_ptr<FrequenciesSet> protFreq(freqReader.read(alpha, freqOpt, data, true));
529  map<string, string> unparsedParameterValuesNested(freqReader.getUnparsedArguments());
530 
531  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
532  {
533  unparsedArguments_[modelName + "." + it->first] = it->second;
534  }
535 
536  if (modelName == "JC69+F")
537  model.reset(new JCprot(alpha, dynamic_cast<ProteinFrequenciesSet*>(protFreq.release()), true));
538  else if (modelName == "DSO78+F")
539  model.reset(new DSO78(alpha, dynamic_cast<ProteinFrequenciesSet*>(protFreq.release()), true));
540  else if (modelName == "JTT92+F")
541  model.reset(new JTT92(alpha, dynamic_cast<ProteinFrequenciesSet*>(protFreq.release()), true));
542  else if (modelName == "LG08+F")
543  model.reset(new LG08(alpha, dynamic_cast<ProteinFrequenciesSet*>(protFreq.release()), true));
544  else if (modelName == "WAG01+F")
545  model.reset(new WAG01(alpha, dynamic_cast<ProteinFrequenciesSet*>(protFreq.release()), true));
546  else if (modelName == "Empirical+F")
547  {
548  string prefix = args["name"];
549  if (TextTools::isEmpty(prefix))
550  throw Exception("'name' argument missing for user-defined substitution model.");
551  model.reset(new UserProteinSubstitutionModel(alpha, args["file"], dynamic_cast<ProteinFrequenciesSet*>(protFreq.release()), prefix + "+F.", true));
552  }
553  }
554  else if (modelName == "JC69")
555  model.reset(new JCprot(alpha));
556  else if (modelName == "DSO78")
557  model.reset(new DSO78(alpha));
558  else if (modelName == "JTT92")
559  model.reset(new JTT92(alpha));
560  else if (modelName == "LG08")
561  model.reset(new LG08(alpha));
562  else if (modelName == "WAG01")
563  model.reset(new WAG01(alpha));
564  else if (modelName == "LLG08_EHO")
565  model.reset(new LLG08_EHO(alpha));
566  else if (modelName == "LLG08_EX2")
567  model.reset(new LLG08_EX2(alpha));
568  else if (modelName == "LLG08_EX3")
569  model.reset(new LLG08_EX3(alpha));
570  else if (modelName == "LLG08_UL2")
571  model.reset(new LLG08_UL2(alpha));
572  else if (modelName == "LLG08_UL3")
573  model.reset(new LLG08_UL3(alpha));
574  else if (modelName == "LG10_EX_EHO")
575  model.reset(new LG10_EX_EHO(alpha));
576  else if (modelName == "LGL08_CAT")
577  {
578  unsigned int nbCat = TextTools::to<unsigned int>(args["nbCat"]);
579  model.reset(new LGL08_CAT(alpha, nbCat));
580  }
581  else if (modelName == "Empirical")
582  {
583  string prefix = args["name"];
584  if (TextTools::isEmpty(prefix))
585  throw Exception("'name' argument missing for user-defined substitution model.");
586  model.reset(new UserProteinSubstitutionModel(alpha, args["file"], prefix));
587  }
588  else if (modelName == "Coala")
589  {
590  string exchangeability = args["exch"];
591  if (TextTools::isEmpty(exchangeability))
592  throw Exception("BppOSubstitutionModelFormat::read. missing argument 'exch' for model 'Coala'.");
593  string prefix = args["name"];
594  if (exchangeability == "Empirical" && TextTools::isEmpty(prefix))
595  throw Exception("'name' argument missing to specify the exchangeabilities of the user-defined empirical model.");
596  BppOSubstitutionModelFormat nestedReader(PROTEIN, false, allowMixed_, allowGaps_, verbose_, warningLevel_);
597  auto_ptr<ProteinSubstitutionModel> nestedModel(dynamic_cast<ProteinSubstitutionModel*>(nestedReader.read(alphabet, exchangeability, data, false)));
598  string nbrOfParametersPerBranch = args["nbrAxes"];
599  if (TextTools::isEmpty(nbrOfParametersPerBranch))
600  throw Exception("'nbrAxes' argument missing to define the number of axis of the Correspondence Analysis.");
601  //Now we create the Coala model:
602  model.reset(new Coala(alpha, *nestedModel, TextTools::to<unsigned int>(nbrOfParametersPerBranch)));
603  model->setFreqFromData(*data);
604  }
605 
606  else if (AlphabetTools::isBinaryAlphabet(alphabet))
607  {
608  if (!(alphabetCode_ & BINARY))
609  throw Exception("BppOSubstitutionModelFormat::read. Binary alphabet not supported.");
610  const BinaryAlphabet* balpha = dynamic_cast<const BinaryAlphabet*>(alphabet);
611 
612  if (modelName == "Binary")
613  model.reset(new BinarySubstitutionModel(balpha));
614  }
615  else
616  throw Exception("Model '" + modelName + "' unknown.");
617  }
618  if (verbose_)
619  ApplicationTools::displayResult("Substitution model", modelName);
620  }
621 
622  // Update parameter args:
623  vector<string> pnames = model->getParameters().getParameterNames();
624 
625  string pref = model->getNamespace();
626 
627  for (size_t i = 0; i < pnames.size(); i++)
628  {
629  string name = model->getParameterNameWithoutNamespace(pnames[i]);
630  if (args.find(name) != args.end())
631  unparsedArguments_[pref + name] = args[name];
632  }
633 
634  // Now look if some parameters are aliased:
635  ParameterList pl = model->getIndependentParameters();
636  string pname, pval, pname2;
637  for (size_t i = 0; i < pl.size(); i++)
638  {
639  pname = model->getParameterNameWithoutNamespace(pl[i].getName());
640 
641  if (args.find(pname) == args.end())
642  continue;
643  pval = args[pname];
644 
645  if (((pval.rfind("_") != string::npos) && (TextTools::isDecimalInteger(pval.substr(pval.rfind("_")+1,string::npos)))) ||
646  (pval.find("(") != string::npos))
647  continue;
648  bool found = false;
649  for (unsigned int j = 0; j < pl.size() && !found; j++)
650  {
651  pname2 = model->getParameterNameWithoutNamespace(pl[j].getName());
652 
653  // if (j == i || args.find(pname2) == args.end()) continue; Julien 03/03/2010: This extra condition prevents complicated (nested) models to work properly...
654  if (j == i)
655  continue;
656  if (pval == pname2)
657  {
658  // This is an alias...
659  // NB: this may throw an exception if uncorrect! We leave it as is for now :s
660  model->aliasParameters(pname2, pname);
661  if (verbose_)
662  ApplicationTools::displayResult("Parameter alias found", pname + "->" + pname2);
663  found = true;
664  }
665  }
666  // if (!TextTools::isDecimalNumber(pval) && !found)
667  // throw Exception("Incorrect parameter syntax: parameter " + pval + " was not found and can't be used as a value for parameter " + pname + ".");
668  }
669 
670  // 2 following tests be removed in a later version
671  if (args.find("useObservedFreqs") != args.end())
672  throw Exception("useObservedFreqs argument is obsolete. Please use 'initFreqs=observed' instead.");
673  if (args.find("useObservedFreqs.pseudoCount") != args.end())
674  throw Exception("useObservedFreqs.pseudoCount argument is obsolete. Please use 'initFreqs.observedPseudoCount' instead.");
675 
676 
677  if (args.find("initFreqs") != args.end())
678  unparsedArguments_[pref + "initFreqs"] = args["initFreqs"];
679  if (args.find("initFreqs.observedPseudoCount") != args.end())
680  unparsedArguments_[pref + "initFreqs.observedPseudoCount"] = args["initFreqs.observedPseudoCount"];
681 
682  if (parseArguments)
683  initialize_(*model, data);
684 
685  return model.release();
686 }
687 
688 
689 SubstitutionModel* BppOSubstitutionModelFormat::readWord_(const Alphabet* alphabet, const std::string& modelDescription, const SiteContainer* data)
690 {
691  auto_ptr<SubstitutionModel> model;
692  string modelName = "";
693  map<string, string> args;
694  KeyvalTools::parseProcedure(modelDescription, modelName, args);
695 
696  vector<string> v_nestedModelDescription;
697  vector<SubstitutionModel*> v_pSM;
698  const WordAlphabet* pWA;
699 
700  string s, nestedModelDescription;
701  unsigned int nbmodels;
702 
703  if ((modelName == "Word" && !AlphabetTools::isWordAlphabet(alphabet)) ||
704  (modelName != "Word" && !AlphabetTools::isCodonAlphabet(alphabet)))
705  throw Exception("Bad alphabet type "
706  + alphabet->getAlphabetType() + " for model " + modelName + ".");
707 
708  pWA = dynamic_cast<const WordAlphabet*>(alphabet);
709 
710  if (args.find("model") != args.end())
711  {
712  v_nestedModelDescription.push_back(args["model"]);
713  nbmodels = (modelName == "Word") ? pWA->getLength() : 3;
714  }
715  else
716  {
717  if (args.find("model1") == args.end())
718  throw Exception("Missing argument 'model' or 'model1' for model " + modelName + ".");
719 
720  nbmodels = 0;
721 
722  while (args.find("model" + TextTools::toString(nbmodels + 1)) != args.end())
723  v_nestedModelDescription.push_back(args["model" + TextTools::toString(++nbmodels)]);
724  }
725 
726  if (nbmodels < 2)
727  throw Exception("Missing nested models for model " + modelName + ".");
728 
729  if (pWA->getLength() != nbmodels)
730  throw Exception("Bad alphabet type "
731  + alphabet->getAlphabetType() + " for model " + modelName + ".");
732 
733 
734  if (v_nestedModelDescription.size() != nbmodels)
735  {
736  BppOSubstitutionModelFormat nestedReader(alphabetCode_, false, true, false, false, warningLevel_);
737  model.reset(nestedReader.read(pWA->getNAlphabet(0), v_nestedModelDescription[0], data, false));
738  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
739  string pref = "";
740  for (unsigned int i = 0; i < nbmodels; i++)
741  {
742  pref += TextTools::toString(i + 1);
743  }
744 
745  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
746  {
747  unparsedArguments_[modelName + "." + pref + "_" + it->first] = it->second;
748  }
749 
750  v_pSM.push_back(model.release());
751  }
752  else
753  {
754  for (unsigned i = 0; i < v_nestedModelDescription.size(); i++)
755  {
756  BppOSubstitutionModelFormat nestedReader(alphabetCode_, false, true, false, false, warningLevel_);
757  model.reset(nestedReader.read(pWA->getNAlphabet(i), v_nestedModelDescription[i], data, false));
758  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
759  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
760  {
761  unparsedArguments_[modelName + "." + TextTools::toString(i + 1) + "_" + it->first] = it->second;
762  }
763 
764  v_pSM.push_back(model.release());
765  }
766  }
767 
768  // /////////////////////////////////
769  // / WORD
770  // ///////////////////////////////
771 
772  if (modelName == "Word")
773  {
774  model.reset((v_nestedModelDescription.size() != nbmodels)
775  ? new WordSubstitutionModel(v_pSM[0], nbmodels)
776  : new WordSubstitutionModel(v_pSM));
777  }
778 
779  // /////////////////////////////////
780  // / CODON
781  // ///////////////////////////////
782 
783  else
784  {
785  const CodonAlphabet* pCA = dynamic_cast<const CodonAlphabet*>(pWA);
786  if (pCA == 0)
787  throw Exception("Non codon Alphabet for model" + modelName + ".");
788 
789  auto_ptr< AlphabetIndex2 > pai2;
790  auto_ptr<FrequenciesSet> pFS;
791 
792  if ((dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]) == 0) ||
793  ((v_nestedModelDescription.size() == 3) &&
794  (dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]) == 0 || dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]) == 0)))
795  throw Exception("Non simple NucleotideSubstitutionModel imbedded in " + modelName + " model.");
796 
797  if (args.find("genetic_code") != args.end()) {
798  ApplicationTools::displayWarning("'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
799  throw Exception("BppOSubstitutionModelFormat::read. Deprecated 'genetic_code' argument.");
800  }
801 
802  if (!geneticCode_)
803  throw Exception("BppOSubstitutionModelFormat::readWord_(). No genetic code specified! Consider using 'setGeneticCode'.");
804 
805 
806  if (modelName.find("Dist") != string::npos)
807  pai2.reset((args.find("aadistance") == args.end()) ? 0 : SequenceApplicationTools::getAlphabetIndex2(&AlphabetTools::PROTEIN_ALPHABET, args["aadistance"]));
808 
809 
810  if (modelName.find("Freq") != string::npos)
811  {
812  if (args.find("frequencies") == args.end())
813  throw Exception("Missing equilibrium frequencies.");
814 
815  BppOFrequenciesSetFormat bIOFreq(alphabetCode_, verbose_, warningLevel_);
816  bIOFreq.setGeneticCode(geneticCode_); //This uses the same instance as the one that will be used by the model
817  pFS.reset(bIOFreq.read(pCA, args["frequencies"], data, false));
818  map<string, string> unparsedParameterValuesNested(bIOFreq.getUnparsedArguments());
819 
820  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
821  {
822  unparsedArguments_[modelName + "." + it->first] = it->second;
823  }
824  }
825 
826 
827  // //
828 
829  if (modelName == "Triplet")
830  model.reset((v_nestedModelDescription.size() != 3)
832  pCA,
833  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]))
835  pCA,
836  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
837  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
838  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2])));
839 
840  else if (modelName == "CodonRate")
841  model.reset((v_nestedModelDescription.size() != 3)
843  geneticCode_,
844  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]))
846  geneticCode_,
847  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
848  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
849  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2])));
850 
851 
852  else if (modelName == "CodonDist")
853  {
854  if (v_nestedModelDescription.size() != 3)
855  model.reset(new CodonDistanceSubstitutionModel(geneticCode_, dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]), pai2.release()));
856  else
857  model.reset(new CodonDistanceSubstitutionModel(
858  geneticCode_,
859  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
860  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
861  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]), pai2.release()));
862  }
863 
864  else if (modelName == "CodonRateFreq")
865  {
866  if (v_nestedModelDescription.size() != 3)
867  model.reset(
869  geneticCode_,
870  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
871  pFS.release()));
872  else
873  model.reset(
875  geneticCode_,
876  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
877  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
878  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]),
879  pFS.release()));
880  }
881 
882  else if (modelName == "CodonDistFreq")
883  {
884  if (v_nestedModelDescription.size() != 3)
885  model.reset(new CodonDistanceFrequenciesSubstitutionModel(geneticCode_,
886  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
887  pFS.release(),
888  pai2.release()));
889  else
891  geneticCode_,
892  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
893  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
894  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]),
895  pFS.release(),
896  pai2.release()));
897  }
898 
899  else if (modelName == "CodonDistPhasFreq")
900  {
901  if (v_nestedModelDescription.size() != 3)
902  model.reset(new CodonDistancePhaseFrequenciesSubstitutionModel(geneticCode_,
903  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
904  pFS.release(),
905  pai2.release()));
906  else
908  geneticCode_,
909  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
910  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
911  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]),
912  pFS.release(),
913  pai2.release()));
914  }
915  else if (modelName == "CodonDistFitPhasFreq")
916  {
917  if (args.find("fitness") == args.end())
918  throw Exception("Missing fitness in model " + modelName + ".");
919 
920  BppOFrequenciesSetFormat bIOFreq(alphabetCode_, verbose_, warningLevel_);
921  bIOFreq.setGeneticCode(geneticCode_);
922  auto_ptr<FrequenciesSet> pFit(bIOFreq.read(pCA, args["fitness"], data, false));
923  map<string, string> unparsedParameterValuesNested(bIOFreq.getUnparsedArguments());
924 
925  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
926  {
927  unparsedArguments_[modelName + ".fit_" + it->first] = it->second;
928  }
929 
930  if (v_nestedModelDescription.size() != 3)
931  {
932  model.reset(new CodonDistanceFitnessPhaseFrequenciesSubstitutionModel(geneticCode_,
933  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
934  pFit.release(),
935  pFS.release(),
936  pai2.release()));
937  }
938  else
940  geneticCode_,
941  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
942  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
943  dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]),
944  pFit.release(),
945  pFS.release(),
946  pai2.release()));
947  }
948  }
949  return model.release();
950 }
951 
952 
953 MixedSubstitutionModel* BppOSubstitutionModelFormat::readMixed_(const Alphabet* alphabet, const std::string& modelDescription, const SiteContainer* data)
954 {
955  auto_ptr<MixedSubstitutionModel> model;
956 
957  string modelName = "";
958  map<string, string> args;
959  KeyvalTools::parseProcedure(modelDescription, modelName, args);
960  auto_ptr<SubstitutionModel> pSM;
961 
962  if (modelName == "MixedModel")
963  {
964  if (args.find("model") == args.end())
965  throw Exception("The argument 'model' is missing from MixedSubstitutionModel description");
966  string nestedModelDescription = args["model"];
967  BppOSubstitutionModelFormat nestedReader(alphabetCode_, allowCovarions_, true, allowGaps_, false, warningLevel_);
968  pSM.reset(nestedReader.read(alphabet, nestedModelDescription, data, false));
969  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
970 
971  map<string, DiscreteDistribution*> mdist;
972  map<string, string> unparsedParameterValuesNested2;
973 
974  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin();
975  it != unparsedParameterValuesNested.end();
976  it++)
977  {
978  if (it->second.find("(") != string::npos)
979  {
980  BppODiscreteDistributionFormat bIO(false);
981  mdist[pSM->getParameterNameWithoutNamespace(it->first)] = bIO.read(it->second, false);
982  map<string, string> unparsedParameterValuesNested3(bIO.getUnparsedArguments());
983  for (map<string, string>::iterator it2 = unparsedParameterValuesNested3.begin();
984  it2 != unparsedParameterValuesNested3.end();
985  it2++)
986  {
987  unparsedParameterValuesNested2[it->first + "_" + it2->first] = it2->second;
988  }
989  }
990  else
991  unparsedParameterValuesNested2[it->first] = it->second;
992  }
993 
994  for (map<string, string>::iterator it = unparsedParameterValuesNested2.begin();
995  it != unparsedParameterValuesNested2.end();
996  it++)
997  {
998  unparsedArguments_[it->first] = it->second;
999  }
1000 
1001 
1002  int fi(-1), ti(-1);
1003 
1004  if (args.find("from") != args.end())
1005  fi = alphabet->charToInt(args["from"]);
1006  if (args.find("to") != args.end())
1007  ti = alphabet->charToInt(args["to"]);
1008 
1009  string sModN=pSM->getName();
1010  model.reset(new MixtureOfASubstitutionModel(alphabet, pSM.release(), mdist, fi, ti));
1011 
1012  vector<string> v = model->getParameters().getParameterNames();
1013 
1014  for (map<string, DiscreteDistribution*>::iterator it = mdist.begin();
1015  it != mdist.end(); it++)
1016  {
1017  delete it->second;
1018  }
1019 
1020  if (verbose_)
1021  {
1022  ApplicationTools::displayResult("Mixture Of A Substitution Model", sModN);
1023  ApplicationTools::displayResult("Number of classes", model->getNumberOfModels());
1024  }
1025  }
1026 
1027 
1028  else if (modelName == "Mixture")
1029  {
1030  vector<string> v_nestedModelDescription;
1031  vector<SubstitutionModel*> v_pSM;
1032 
1033  if (args.find("model1") == args.end())
1034  {
1035  throw Exception("Missing argument 'model1' for model " + modelName + ".");
1036  }
1037  unsigned int nbmodels = 0;
1038 
1039  while (args.find("model" + TextTools::toString(nbmodels + 1)) != args.end())
1040  {
1041  v_nestedModelDescription.push_back(args["model" + TextTools::toString(++nbmodels)]);
1042  }
1043 
1044  if (nbmodels < 2)
1045  throw Exception("Missing nested models for model " + modelName + ".");
1046 
1047  for (unsigned i = 0; i < v_nestedModelDescription.size(); i++)
1048  {
1049  BppOSubstitutionModelFormat nestedReader(alphabetCode_, false, true, false, false, warningLevel_);
1050  pSM.reset(nestedReader.read(alphabet, v_nestedModelDescription[i], data, false));
1051  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
1052  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
1053  {
1054  unparsedArguments_[modelName + "." + TextTools::toString(i + 1) + "_" + it->first] = it->second;
1055  }
1056  v_pSM.push_back(pSM.release());
1057  }
1058 
1059  model.reset(new MixtureOfSubstitutionModels(alphabet, v_pSM));
1060  if (verbose_)
1061  ApplicationTools::displayResult("Mixture Of Substitution Models", modelName );
1062  }
1063  else
1064  throw Exception("Unknown model name for mixture " + modelName);
1065 
1066  return model.release();
1067 }
1068 
1069 
1071  OutputStream& out,
1072  std::map<std::string, std::string>& globalAliases,
1073  std::vector<std::string>& writtenNames) const
1074 {
1075  bool comma = false;
1076 
1077  // Mixed Model that are defined as "Mixture" and "Mixed"
1078 
1079  if ((dynamic_cast<const MixedSubstitutionModel*>(&model) != NULL) && (dynamic_cast<const AbstractBiblioMixedSubstitutionModel*>(&model) == NULL))
1080  {
1081  writeMixed_(*dynamic_cast<const MixedSubstitutionModel*>(&model), out, globalAliases, writtenNames);
1082  return;
1083  }
1084 
1085  out << model.getName() + "(";
1086 
1087  // Is it a protein user defined model?
1088  const UserProteinSubstitutionModel* userModel = dynamic_cast<const UserProteinSubstitutionModel*>(&model);
1089  if (userModel)
1090  {
1091  out << "file=" << userModel->getPath();
1092  comma = true;
1093  }
1094 
1095  // Is it a markov-modulated model?
1096  const MarkovModulatedSubstitutionModel* mmModel = dynamic_cast<const MarkovModulatedSubstitutionModel*>(&model);
1097  if (mmModel)
1098  {
1099  out << "model=";
1100  const SubstitutionModel* nestedModel = mmModel->getNestedModel();
1101  write(*nestedModel, out, globalAliases, writtenNames);
1102 
1103  const G2001* gModel = dynamic_cast<const G2001*>(&model);
1104  if (gModel)
1105  {
1106  // Also print distribution here:
1107  out << ",rdist=";
1108  const DiscreteDistribution* nestedDist = gModel->getRateDistribution();
1109  const BppODiscreteDistributionFormat* bIO = new BppODiscreteDistributionFormat();
1110 
1111  bIO->write(*nestedDist, out, globalAliases, writtenNames);
1112  delete bIO;
1113  }
1114  comma = true;
1115  }
1116 
1117  // Is it a model with gaps?
1118  const RE08* reModel = dynamic_cast<const RE08*>(&model);
1119  if (reModel)
1120  {
1121  out << "model=";
1122  const SubstitutionModel* nestedModel = reModel->getNestedModel();
1123  write(*nestedModel, out, globalAliases, writtenNames);
1124  comma = true;
1125  }
1126 
1127  // Is it a YpR model?
1128  const YpR* yprModel = dynamic_cast<const YpR*>(&model);
1129  if (yprModel)
1130  {
1131  out << "model=";
1132  const SubstitutionModel* nestedModel = yprModel->getNestedModel();
1133  write(*nestedModel, out, globalAliases, writtenNames);
1134  comma = true;
1135  }
1136 
1137  // Is it a gBGC model?
1138  const gBGC* gbgcModel = dynamic_cast<const gBGC*>(&model);
1139  if (gbgcModel)
1140  {
1141  out << "model=";
1142  const SubstitutionModel* nestedModel = gbgcModel->getNestedModel();
1143  write(*nestedModel, out, globalAliases, writtenNames);
1144  comma = true;
1145  }
1146 
1147  // Is it a word model?
1148 
1149  const AbstractWordSubstitutionModel* wM = dynamic_cast<const AbstractWordSubstitutionModel*>(&model);
1150  if (wM)
1151  {
1152  size_t nmod = wM->getNumberOfModels();
1153  const SubstitutionModel* mod0 = wM->getNModel(0);
1154  if (nmod == 1)
1155  {
1156  out << "model=";
1157  write(*mod0, out, globalAliases, writtenNames);
1158  }
1159  else
1160  {
1161  const SubstitutionModel* mod1 = wM->getNModel(1);
1162  if (mod1 == mod0)
1163  {
1164  out << "model=";
1165  write(*mod0, out, globalAliases, writtenNames);
1166  }
1167  else
1168  {
1169  out << "model1=";
1170  write(*mod0, out, globalAliases, writtenNames);
1171  for (unsigned int i = 1; i < nmod; i++)
1172  {
1173  out << ",model" + TextTools::toString(i + 1) + "=";
1174  write(*wM->getNModel(i), out, globalAliases, writtenNames);
1175  }
1176  }
1177  }
1178  comma = true;
1179  }
1180 
1181  // Is it a COaLA model ?
1182  const Coala* coalaModel = dynamic_cast<const Coala*>(&model);
1183  if (coalaModel)
1184  {
1185  out << "exch=" << coalaModel->getExch() << ",nbrAxes=" << coalaModel->getNbrOfAxes();
1186  comma = true;
1187  }
1188 
1189  // Regular model
1190  const FrequenciesSet* pfs = model.getFrequenciesSet();
1191  if (pfs)
1192  {
1193  if (comma)
1194  out << ",";
1195  out << "frequencies=";
1196 
1197  BppOFrequenciesSetFormat bIOFreq(alphabetCode_, false, warningLevel_);
1198  bIOFreq.write(pfs, out, writtenNames);
1199  comma = true;
1200  }
1201 
1202  // Specific case of CodonFitnessSubstitutionModel
1203 
1205  if (pCF)
1206  {
1207  if (comma)
1208  out << ",";
1209  out << "fitness=";
1210 
1211  BppOFrequenciesSetFormat bIOFreq(alphabetCode_, false, warningLevel_);
1212  bIOFreq.write(pCF->getFitness(), out, writtenNames);
1213  comma = true;
1214  }
1215 
1216  // and the other parameters
1217 
1218  BppOParametrizableFormat bIO;
1219 
1220  bIO.write(&model, out, globalAliases, model.getIndependentParameters().getParameterNames(), writtenNames, true, comma);
1221  out << ")";
1222 }
1223 
1224 
1226  OutputStream& out,
1227  std::map<std::string, std::string>& globalAliases,
1228  std::vector<std::string>& writtenNames) const
1229 {
1230  if (dynamic_cast<const MixtureOfSubstitutionModels*>(&model) != NULL)
1231  {
1232  const MixtureOfSubstitutionModels* pMS = dynamic_cast<const MixtureOfSubstitutionModels*>(&model);
1233 
1234  for (unsigned int i = 0; i < pMS->getNumberOfModels(); i++)
1235  {
1236  const SubstitutionModel* eM = pMS->getNModel(i);
1237 
1238  vector<string> vpl = eM->getIndependentParameters().getParameterNames();
1239  for (unsigned j = 0; j < vpl.size(); j++)
1240  {
1241  if (eM->getParameterNameWithoutNamespace(vpl[j]) == "rate")
1242  writtenNames.push_back(vpl[j]);
1243  }
1244  }
1245 
1246  out << "Mixture(";
1247  for (unsigned int i = 0; i < pMS->getNumberOfModels(); i++)
1248  {
1249  if (i != 0)
1250  out << ", ";
1251  out << "model" + TextTools::toString(i + 1) + "=";
1252  write(*pMS->getNModel(i), out, globalAliases, writtenNames);
1253  }
1254  }
1255  else
1256  {
1257  const MixtureOfASubstitutionModel* pMS = dynamic_cast<const MixtureOfASubstitutionModel*>(&model);
1258  out << "MixedModel(model=";
1259  const SubstitutionModel* eM = pMS->getNModel(0);
1260 
1261  ParameterList pl = eM->getIndependentParameters();
1262  vector<string> vpl = pl.getParameterNames();
1263 
1264  for (unsigned j = 0; j < vpl.size(); j++)
1265  {
1266  if (find(writtenNames.begin(), writtenNames.end(), vpl[j]) == writtenNames.end())
1267  {
1268  if (eM->getParameterNameWithoutNamespace(vpl[j]) == "rate")
1269  writtenNames.push_back(vpl[j]);
1270  else
1271  {
1272  const DiscreteDistribution* pDD = pMS->getDistribution(vpl[j]);
1273  if (pDD && dynamic_cast<const ConstantDistribution*>(pDD) == NULL)
1274  {
1275  const BppODiscreteDistributionFormat* bIO = new BppODiscreteDistributionFormat();
1276  StdStr sout;
1277  bIO->write(*pDD, sout, globalAliases, writtenNames);
1278  globalAliases[vpl[j]] = sout.str();
1279  delete bIO;
1280  }
1281  }
1282  }
1283  }
1284 
1285  write(*eM, out, globalAliases, writtenNames);
1286 
1287  if (pMS->from() != -1)
1288  out << ",from=" << model.getAlphabet()->intToChar(pMS->from()) << ",to=" << model.getAlphabet()->intToChar(pMS->to());
1289  }
1290 
1291  const BppOParametrizableFormat* bIO = new BppOParametrizableFormat();
1292  bIO->write(&model, out, globalAliases, model.getIndependentParameters().getParameterNames(), writtenNames, true, true);
1293  delete bIO;
1294 
1295  out << ")";
1296 }
1297 
1299  SubstitutionModel& model,
1300  const SiteContainer* data) throw (Exception)
1301 {
1302  string initFreqs = ApplicationTools::getStringParameter(model.getNamespace() + "initFreqs", unparsedArguments_, "", "", true, warningLevel_);
1303  if (verbose_)
1304  ApplicationTools::displayResult("External frequencies initialization for model", (initFreqs == "") ? "None" : initFreqs);
1305 
1306  if (initFreqs != "")
1307  {
1308  if (initFreqs == "observed")
1309  {
1310  if (!data)
1311  throw Exception("BppOSubstitutionModelFormat::initialize_(). Missing data for observed frequencies");
1312  unsigned int psi = ApplicationTools::getParameter<unsigned int>(model.getNamespace() + "initFreqs.observedPseudoCount", unparsedArguments_, 0, "", true, warningLevel_);
1313  model.setFreqFromData(*data, psi);
1314  }
1315  else if (initFreqs.substr(0, 6) == "values")
1316  {
1317  // Initialization using the "values" argument
1318  map<int, double> frequencies;
1319 
1320  string rf = initFreqs.substr(6);
1321  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
1322  int i = 0;
1323  while (strtok.hasMoreToken())
1324  frequencies[i++] = TextTools::toDouble(strtok.nextToken());
1325  model.setFreq(frequencies);
1326  }
1327  else
1328  throw Exception("Unknown initFreqs argument");
1329  }
1330 
1331  ParameterList pl = model.getIndependentParameters();
1332  for (size_t i = 0; i < pl.size(); i++)
1333  {
1334  AutoParameter ap(pl[i]);
1335  ap.setMessageHandler(ApplicationTools::warning);
1336  pl.setParameter(i, ap);
1337  }
1338  size_t posp;
1339  for (size_t i = 0; i < pl.size(); i++)
1340  {
1341  const string pName = pl[i].getName();
1342  posp = pName.rfind(".");
1343  bool test1 = (initFreqs == "");
1344  bool test2 = (model.getParameterNameWithoutNamespace(pName).size() < posp + 6) || (model.getParameterNameWithoutNamespace(pName).substr(posp + 1, 5) != "theta");
1345  bool test3 = (unparsedArguments_.find(pName) != unparsedArguments_.end());
1346  try {
1347  if (test1 || test2 || test3)
1348  {
1349  if (!test1 && !test2 && test3)
1350  ApplicationTools::displayWarning("Warning, initFreqs argument is set and a value is set for parameter " + pName);
1351 
1352  double value = ApplicationTools::getDoubleParameter(pName, unparsedArguments_, pl[i].getValue(), "", true, warningLevel_);
1353  pl[i].setValue(value);
1354 
1355  if (unparsedArguments_.find(pName) != unparsedArguments_.end())
1356  unparsedArguments_.erase(unparsedArguments_.find(pName));
1357  }
1358  if (verbose_)
1359  ApplicationTools::displayResult("Parameter found", pName + "=" + TextTools::toString(pl[i].getValue()));
1360  }
1361 
1362  catch (Exception& e) {}
1363  }
1364 
1365 
1366  model.matchParametersValues(pl);
1367 }
1368 
1369 
The Le et al (2008) EX3 substitution model for proteins.
Definition: LLG08_EX3.h:79
The Le et al (2008) UL3 substitution model for proteins.
Definition: LLG08_UL3.h:78
The no-strand bias substitution model for nucleotides, from Lobry 1995. The point of this model is th...
Definition: L95.h:93
Interface for all substitution models.
SubstitutionModel * readWord_(const Alphabet *alphabet, const std::string &modelDescription, const SiteContainer *data)
void writeMixed_(const MixedSubstitutionModel &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
The Jukes-Cantor substitution model for proteins.
Definition: JCprot.h:134
const SubstitutionModel * getNestedModel() const
Definition: gBGC.h:118
Frequencies set I/O in BppO format.
The Strand Symmetric Reversible substitution model for nucleotides.
Definition: SSR.h:98
virtual size_t getNumberOfModels() const
returns the number of models in the mixture
int from() const
Numbers of the states between which the substitution rates of all the submodels must be equal...
Class for substitution models on non stop codons, with different rates on the models, depending on their phase.
size_t getNbrOfAxes() const
Definition: CoalaCore.h:85
The Whelan and Goldman substitution model for proteins.
Definition: WAG01.h:71
gBGC model.
Definition: gBGC.h:75
YpR model.
Definition: YpR.h:123
void write(const SubstitutionModel &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
Write a substitution model to a stream.
virtual const FrequenciesSet * getFrequenciesSet() const
If the model owns a FrequenciesSet, returns a pointer to it, otherwise return 0.
virtual const Alphabet * getAlphabet() const =0
The Le et al (2008) EH0 substitution model for proteins.
Definition: LLG08_EHO.h:78
STL namespace.
The Le and Gascuel substitution model for proteins.
Definition: LG08.h:64
The Kimura 2-rates substitution model for nucleotides.
Definition: K80.h:150
The Goldman and Yang (1994) substitution model for codons.
Definition: GY94.h:82
Class for asynonymous substitution models on codons with parameterized equilibrium frequencies and nu...
Class for neutral substitution models on triplets, which correspond to codons that do not have any si...
const std::map< std::string, std::string > & getUnparsedArguments() const
Parametrize a set of state frequencies.
void initialize_(SubstitutionModel &model, const SiteContainer *data)
Set parameter initial values of a given model according to options.
The Tamura (1992) substitution model for nucleotides.
Definition: T92.h:159
std::string getExch() const
Definition: Coala.h:101
virtual void setFreqFromData(const SequenceContainer &data, double pseudoCount=0)=0
Set equilibrium frequencies equal to the frequencies estimated from the data.
The Le et al (2008) CAT substitution model for proteins.
Definition: LGL08_CAT.h:74
void write(const FrequenciesSet *pfreqset, OutputStream &out, std::vector< std::string > &writtenNames) const
Write a substitution model to a stream.
The Le et al (2008) EX2 substitution model for proteins.
Definition: LLG08_EX2.h:74
The Le et al (2008) UL2 substitution model for proteins.
Definition: LLG08_UL2.h:74
Galtier&#39;s 2001 covarion model.
Definition: G2001.h:63
const SubstitutionModel * getNestedModel() const
Definition: RE08.h:194
The Le and Gascuel (2010) EX_EHO substitution model for proteins.
Definition: LG10_EX_EHO.h:75
symetrical YpR model.
Definition: YpR.h:211
The Tamura and Nei (1993) substitution model for nucleotides.
Definition: TN93.h:130
The Hasegawa M, Kishino H and Yano T (1985) substitution model for nucleotides.
Definition: HKY85.h:179
virtual std::string getName() const =0
Get the name of the model.
The Yang and Nielsen (1998) substitution model for codons.
Definition: YN98.h:88
The Yang et al (2000) M8 substitution model for codons.
Definition: YNGKP_M8.h:73
Abstract Basal class for words of substitution models.
FrequenciesSet * read(const Alphabet *alphabet, const std::string &freqDescription, const SiteContainer *data, bool parseArguments=true)
Read a frequencies set from a string.
Rate Distribution I/O in BppO format.
The General Time-Reversible substitution model for nucleotides.
Definition: GTR.h:138
Intersection of models RN95 and L95.
Definition: RN95s.h:110
Tuffley and Steel&#39;s 1998 covarion model.
Definition: TS98.h:77
The Yang et al (2000) M3 substitution model for codons.
Definition: YNGKP_M3.h:75
The Jones, Taylor and Thornton substitution model for proteins.
Definition: JTT92.h:66
MixedSubstitutionModel * readMixed_(const Alphabet *alphabet, const std::string &modelDescription, const SiteContainer *data)
Substitution models defined as a mixture of nested substitution models.
Substitution model I/O in BppO format.
const ReversibleSubstitutionModel * getNestedModel() const
const std::map< std::string, std::string > & getUnparsedArguments() const
Specialisation interface for nucleotide substitution model.
Partial implementation of the Markov-modulated class of substitution models.
The Yang et al (2000) M2 substitution model for codons, with the more realistic modification in Wong ...
Definition: YNGKP_M2.h:75
The Muse and Gaut (1994) substitution model for codons.
Definition: MG94.h:79
const DiscreteDistribution * getRateDistribution() const
Definition: G2001.h:129
The Coala branch-heterogeneous amino-acid substitution model.
Definition: Coala.h:72
The Yang et al (2000) M7 substitution model for codons.
Definition: YNGKP_M7.h:70
General YpR model.
Definition: YpR.h:256
const SubstitutionModel * getNModel(size_t i) const
returns the ith model, or Null if i is not a valid number.
SubstitutionModel * read(const Alphabet *alphabet, const std::string &modelDescription, const SiteContainer *data=0, bool parseArguments=true)
Read a substitution model from a string.
Basal class for words of reversible substitution models.
const SubstitutionModel * getNestedModel() const
Definition: YpR.h:176
The Jukes-Cantor substitution model for nucleotides.
Definition: JCnuc.h:129
const DiscreteDistribution * getDistribution(std::string &parName) const
returns the DiscreteDistribution associated with a given parameter name.
void setGeneticCode(const GeneticCode *gCode)
Set the genetic code to use in case a codon frequencies set should be built.
virtual size_t getNumberOfModels() const =0
The Felsenstein (1984) substitution model for nucleotides.
Definition: F84.h:178
virtual const SubstitutionModel * getNModel(size_t i) const
Returns a specific model from the mixture.
Class for asynonymous substitution models on codons with parameterized equilibrium frequencies and nu...
Build an empirical protein substitution model from a file.
The model described by Rhetsky & Nei, where the only hypothesis is that the transversion rates are on...
Definition: RN95.h:130
The Rivas-Eddy substitution model with gap characters.
Definition: RE08.h:96
The Yang et al (2000) M1 substitution model for codons, with the more realistic modification in Wong ...
Definition: YNGKP_M1.h:79
The Dayhoff, Schwartz and Orcutt substitution model for proteins.
Definition: DSO78.h:66
Substitution models defined as a mixture of several substitution models.
Class for substitution models on non stop codons, with different parametrized rates on the models...
Class for substitution models of codons with non-synonymous/synonymous ratios of substitution rates d...
Class for asynonymous and synonymous substitution models on codons with parameterized equilibrium fre...
Interface for Substitution models, defined as a mixture of "simple" substitution models.