43 #include <Bpp/Io/FileTools.h> 44 #include <Bpp/Text/TextTools.h> 45 #include <Bpp/Text/StringTokenizer.h> 46 #include <Bpp/Text/KeyvalTools.h> 48 #include "../Model/Codon/MG94.h" 49 #include "../Model/Codon/GY94.h" 50 #include "../Model/Codon/YNGKP_M1.h" 51 #include "../Model/Codon/YNGKP_M2.h" 52 #include "../Model/Codon/YNGKP_M3.h" 53 #include "../Model/Codon/YNGKP_M7.h" 54 #include "../Model/Codon/YNGKP_M8.h" 55 #include "../Model/Codon/YN98.h" 56 #include "../Model/Codon/TripletSubstitutionModel.h" 57 #include "../Model/Codon/CodonRateSubstitutionModel.h" 58 #include "../Model/Codon/CodonDistanceSubstitutionModel.h" 59 #include "../Model/Codon/CodonRateFrequenciesSubstitutionModel.h" 60 #include "../Model/Codon/CodonDistanceFrequenciesSubstitutionModel.h" 61 #include "../Model/Codon/CodonDistancePhaseFrequenciesSubstitutionModel.h" 62 #include "../Model/Codon/CodonDistanceFitnessPhaseFrequenciesSubstitutionModel.h" 63 #include "../Model/RE08.h" 64 #include "../Model/TS98.h" 65 #include "../Model/G2001.h" 66 #include "../Model/Nucleotide/F84.h" 67 #include "../Model/Nucleotide/NucleotideSubstitutionModel.h" 68 #include "../Model/Nucleotide/gBGC.h" 69 #include "../Model/Nucleotide/RN95.h" 70 #include "../Model/Nucleotide/GTR.h" 71 #include "../Model/Nucleotide/RN95s.h" 72 #include "../Model/Nucleotide/HKY85.h" 73 #include "../Model/Nucleotide/SSR.h" 74 #include "../Model/Nucleotide/JCnuc.h" 75 #include "../Model/Nucleotide/T92.h" 76 #include "../Model/Nucleotide/K80.h" 77 #include "../Model/Nucleotide/TN93.h" 78 #include "../Model/Nucleotide/L95.h" 79 #include "../Model/Nucleotide/YpR.h" 80 #include "../Model/Protein/CoalaCore.h" 81 #include "../Model/Protein/LLG08_EX2.h" 82 #include "../Model/Protein/Coala.h" 83 #include "../Model/Protein/LLG08_EX3.h" 84 #include "../Model/Protein/DSO78.h" 85 #include "../Model/Protein/LLG08_UL2.h" 86 #include "../Model/Protein/JCprot.h" 87 #include "../Model/Protein/LLG08_UL3.h" 88 #include "../Model/Protein/JTT92.h" 89 #include "../Model/Protein/ProteinSubstitutionModel.h" 90 #include "../Model/Protein/LG08.h" 91 #include "../Model/Protein/UserProteinSubstitutionModel.h" 92 #include "../Model/Protein/LGL08_CAT.h" 93 #include "../Model/Protein/WAG01.h" 94 #include "../Model/Protein/LLG08_EHO.h" 95 #include "../Model/Protein/LG10_EX_EHO.h" 96 #include "../Model/BinarySubstitutionModel.h" 98 #include "../App/PhylogeneticsApplicationTools.h" 102 #include <Bpp/Seq/App/SequenceApplicationTools.h> 103 #include <Bpp/Seq/Alphabet/AlphabetTools.h> 105 #include <Bpp/Io/OutputStream.h> 106 #include <Bpp/Io/BppOParametrizableFormat.h> 107 #include <Bpp/Io/BppODiscreteDistributionFormat.h> 111 #include <Bpp/Numeric/Prob/ConstantDistribution.h> 112 #include <Bpp/Numeric/AutoParameter.h> 132 const Alphabet* alphabet,
133 const std::string& modelDescription,
134 const SiteContainer* data,
137 unparsedArguments_.clear();
138 auto_ptr<SubstitutionModel> model;
139 string modelName =
"";
140 map<string, string> args;
141 KeyvalTools::parseProcedure(modelDescription, modelName, args);
147 if ((modelName ==
"MixedModel" || (modelName ==
"Mixture")) && allowMixed_)
148 model.reset(readMixed_(alphabet, modelDescription, data));
155 else if ((modelName ==
"Word") || (modelName ==
"Triplet") || (modelName.substr(0, 5) ==
"Codon"))
156 model.reset(readWord_(alphabet, modelDescription, data));
163 else if (((modelName ==
"MG94") || (modelName ==
"YN98") ||
164 (modelName ==
"GY94") || (modelName.substr(0, 5) ==
"YNGKP")) && (alphabetCode_ & CODON))
166 if (!(alphabetCode_ & CODON))
167 throw Exception(
"BppOSubstitutionModelFormat::read. Codon alphabet not supported.");
169 throw Exception(
"BppOSubstitutionModelFormat::read(). No genetic code specified! Consider using 'setGeneticCode'.");
171 if (!AlphabetTools::isCodonAlphabet(alphabet))
172 throw Exception(
"Alphabet should be Codon Alphabet.");
174 const CodonAlphabet* pCA =
dynamic_cast<const CodonAlphabet*
>(alphabet);
176 if (args.find(
"genetic_code") != args.end()) {
177 ApplicationTools::displayWarning(
"'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
178 throw Exception(
"BppOSubstitutionModelFormat::read. Deprecated 'genetic_code' argument.");
181 if (geneticCode_->getSourceAlphabet()->getAlphabetType() != pCA->getAlphabetType())
182 throw Exception(
"Mismatch between genetic code and codon alphabet");
184 string freqOpt = ApplicationTools::getStringParameter(
"frequencies", args,
"F0",
"",
true, warningLevel_);
187 auto_ptr<FrequenciesSet> codonFreqs(freqReader.
read(pCA, freqOpt, data,
false));
190 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
192 unparsedArguments_[modelName +
"." + it->first] = it->second;
195 if (modelName ==
"MG94")
196 model.reset(
new MG94(geneticCode_, codonFreqs.release()));
197 else if (modelName ==
"GY94")
198 model.reset(
new GY94(geneticCode_, codonFreqs.release()));
199 else if ((modelName ==
"YN98") || (modelName ==
"YNGKP_M0"))
200 model.reset(
new YN98(geneticCode_, codonFreqs.release()));
201 else if (modelName ==
"YNGKP_M1")
202 model.reset(
new YNGKP_M1(geneticCode_, codonFreqs.release()));
203 else if (modelName ==
"YNGKP_M2")
204 model.reset(
new YNGKP_M2(geneticCode_, codonFreqs.release()));
205 else if (modelName ==
"YNGKP_M3")
206 if (args.find(
"n") == args.end())
207 model.reset(
new YNGKP_M3(geneticCode_, codonFreqs.release()));
209 model.reset(
new YNGKP_M3(geneticCode_, codonFreqs.release(), TextTools::to<unsigned int>(args[
"n"])));
210 else if ((modelName ==
"YNGKP_M7") || modelName ==
"YNGKP_M8")
212 if (args.find(
"n") == args.end())
213 throw Exception(
"Missing argument 'n' (number of classes) in " + modelName +
" distribution");
214 unsigned int nbClasses = TextTools::to<unsigned int>(args[
"n"]);
216 ApplicationTools::displayResult(
"Number of classes in model", nbClasses);
218 if (modelName ==
"YNGKP_M7")
219 model.reset(
new YNGKP_M7(geneticCode_, codonFreqs.release(), nbClasses));
220 else if (modelName ==
"YNGKP_M8")
221 model.reset(
new YNGKP_M8(geneticCode_, codonFreqs.release(), nbClasses));
224 throw Exception(
"Unknown Codon model: " + modelName);
232 else if (modelName ==
"gBGC")
234 if (!(alphabetCode_ & NUCLEOTIDE))
235 throw Exception(
"BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
237 string nestedModelDescription = args[
"model"];
238 if (TextTools::isEmpty(nestedModelDescription))
239 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'gBGC'.");
241 ApplicationTools::displayResult(
"Biased gene conversion", modelName);
243 auto_ptr<NucleotideSubstitutionModel> nestedModel(dynamic_cast<NucleotideSubstitutionModel*>(nestedReader.
read(alphabet, nestedModelDescription, data,
false)));
247 model.reset(
new gBGC(dynamic_cast<const NucleicAlphabet*>(alphabet), nestedModel.release()));
250 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
252 unparsedArguments_[
"gBGC." + it->first] = it->second;
260 else if (modelName ==
"YpR_Sym")
262 if (!(alphabetCode_ & NUCLEOTIDE))
263 throw Exception(
"BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
264 if (alphabet->getAlphabetType() !=
"RNY alphabet")
265 throw Exception(
"Mismatch alphabet: " + alphabet->getAlphabetType() +
" for model: " + modelName);
266 const RNY* prny =
dynamic_cast<const RNY*
>(alphabet);
268 string nestedModelDescription = args[
"model"];
269 if (TextTools::isEmpty(nestedModelDescription))
270 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'YpR_sym'.");
272 ApplicationTools::displayResult(
"Symetric YpR model", modelName);
274 auto_ptr<NucleotideSubstitutionModel> nestedModel(dynamic_cast<NucleotideSubstitutionModel*>(nestedReader.
read(&prny->getLetterAlphabet(), nestedModelDescription, data,
false)));
276 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
278 unparsedArguments_[
"YpR_Sym." + it->first] = it->second;
281 model.reset(
new YpR_Sym(prny, nestedModel.release()));
283 else if (modelName ==
"YpR_Gen")
285 if (!(alphabetCode_ & NUCLEOTIDE))
286 throw Exception(
"BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
287 if (alphabet->getAlphabetType() !=
"RNY alphabet")
288 throw Exception(
"Mismatch alphabet: " + alphabet->getAlphabetType() +
" for model: " + modelName);
289 const RNY* prny =
dynamic_cast<const RNY*
>(alphabet);
291 string nestedModelDescription = args[
"model"];
292 if (TextTools::isEmpty(nestedModelDescription))
293 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'YpR_gen'.");
295 ApplicationTools::displayResult(
"General YpR model", modelName);
297 auto_ptr<NucleotideSubstitutionModel> nestedModel(dynamic_cast<NucleotideSubstitutionModel*>(nestedReader.
read(&prny->getLetterAlphabet(), nestedModelDescription, data,
false)));
300 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
302 unparsedArguments_[
"YpR_Gen." + it->first] = it->second;
305 model.reset(
new YpR_Gen(prny, nestedModel.release()));
313 else if (modelName ==
"RE08")
316 throw Exception(
"BppOSubstitutionModelFormat::read. No Gap model allowed here.");
319 string nestedModelDescription = args[
"model"];
320 if (TextTools::isEmpty(nestedModelDescription))
321 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'RE08'.");
323 ApplicationTools::displayResult(
"Gap model", modelName);
325 auto_ptr<ReversibleSubstitutionModel> nestedModel(dynamic_cast<ReversibleSubstitutionModel*>(nestedReader.
read(alphabet, nestedModelDescription, data,
false)));
329 model.reset(
new RE08(nestedModel.release()));
332 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
334 unparsedArguments_[
"RE08.model_" + it->first] = it->second;
342 else if (modelName ==
"TS98")
344 if (!allowCovarions_)
345 throw Exception(
"BppOSubstitutionModelFormat::read. No Covarion model allowed here.");
348 string nestedModelDescription = args[
"model"];
349 if (TextTools::isEmpty(nestedModelDescription))
350 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'TS98'.");
352 ApplicationTools::displayResult(
"Covarion model", modelName);
354 auto_ptr<ReversibleSubstitutionModel> nestedModel(dynamic_cast<ReversibleSubstitutionModel*>(nestedReader.
read(alphabet, nestedModelDescription, data,
false)));
358 model.reset(
new TS98(nestedModel.release()));
361 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
363 unparsedArguments_[
"TS98.model_" + it->first] = it->second;
371 else if (modelName ==
"G01")
373 if (!allowCovarions_)
374 throw Exception(
"BppOSubstitutionModelFormat::read. No Covarion model allowed here.");
377 string nestedModelDescription = args[
"model"];
378 if (TextTools::isEmpty(nestedModelDescription))
379 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'model' for model 'G01'.");
380 string nestedRateDistDescription = args[
"rdist"];
381 if (TextTools::isEmpty(nestedRateDistDescription))
382 throw Exception(
"BppOSubstitutionModelFormat::read. Missing argument 'rdist' for model 'G01'.");
384 ApplicationTools::displayResult(
"Covarion model", modelName);
386 auto_ptr<ReversibleSubstitutionModel> nestedModel(dynamic_cast<ReversibleSubstitutionModel*>(nestedReader.
read(alphabet, nestedModelDescription, data,
false)));
389 auto_ptr<DiscreteDistribution> nestedRDist(rateReader.read(nestedRateDistDescription,
false));
390 map<string, string> unparsedParameterValuesNestedDist(rateReader.getUnparsedArguments());
393 model.reset(
new G2001(nestedModel.release(), nestedRDist.release()));
396 for (map<string, string>::iterator it = unparsedParameterValuesNestedModel.begin(); it != unparsedParameterValuesNestedModel.end(); it++)
398 unparsedArguments_[
"G01.model_" + it->first] = it->second;
400 for (map<string, string>::iterator it = unparsedParameterValuesNestedDist.begin(); it != unparsedParameterValuesNestedDist.end(); it++)
402 unparsedArguments_[
"G01.rdist_" + it->first] = it->second;
408 if (AlphabetTools::isNucleicAlphabet(alphabet))
410 if (!(alphabetCode_ & NUCLEOTIDE))
411 throw Exception(
"BppOSubstitutionModelFormat::read. Nucleotide alphabet not supported.");
412 const NucleicAlphabet* alpha =
dynamic_cast<const NucleicAlphabet*
>(alphabet);
418 if (modelName ==
"GTR")
420 model.reset(
new GTR(alpha));
428 else if (modelName ==
"SSR")
430 model.reset(
new SSR(alpha));
437 else if (modelName ==
"L95")
439 model.reset(
new L95(alpha));
446 else if (modelName ==
"RN95")
448 model.reset(
new RN95(alpha));
455 else if (modelName ==
"RN95s")
457 model.reset(
new RN95s(alpha));
464 else if (modelName ==
"TN93")
466 model.reset(
new TN93(alpha));
473 else if (modelName ==
"HKY85")
475 model.reset(
new HKY85(alpha));
482 else if (modelName ==
"F84")
484 model.reset(
new F84(alpha));
491 else if (modelName ==
"T92")
493 model.reset(
new T92(alpha));
500 else if (modelName ==
"K80")
502 model.reset(
new K80(alpha));
510 else if (modelName ==
"JC69")
512 model.reset(
new JCnuc(alpha));
516 throw Exception(
"Model '" + modelName +
"' unknown.");
521 if (!(alphabetCode_ & PROTEIN))
522 throw Exception(
"BppOSubstitutionModelFormat::read. Protein alphabet not supported.");
523 const ProteicAlphabet* alpha =
dynamic_cast<const ProteicAlphabet*
>(alphabet);
525 if (modelName.find(
"+F") != string::npos) {
526 string freqOpt = ApplicationTools::getStringParameter(
"frequencies", args,
"Full",
"",
true, warningLevel_);
528 auto_ptr<FrequenciesSet> protFreq(freqReader.
read(alpha, freqOpt, data,
true));
531 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
533 unparsedArguments_[modelName +
"." + it->first] = it->second;
536 if (modelName ==
"JC69+F")
537 model.reset(
new JCprot(alpha, dynamic_cast<ProteinFrequenciesSet*>(protFreq.release()),
true));
538 else if (modelName ==
"DSO78+F")
539 model.reset(
new DSO78(alpha, dynamic_cast<ProteinFrequenciesSet*>(protFreq.release()),
true));
540 else if (modelName ==
"JTT92+F")
541 model.reset(
new JTT92(alpha, dynamic_cast<ProteinFrequenciesSet*>(protFreq.release()),
true));
542 else if (modelName ==
"LG08+F")
543 model.reset(
new LG08(alpha, dynamic_cast<ProteinFrequenciesSet*>(protFreq.release()),
true));
544 else if (modelName ==
"WAG01+F")
545 model.reset(
new WAG01(alpha, dynamic_cast<ProteinFrequenciesSet*>(protFreq.release()),
true));
546 else if (modelName ==
"Empirical+F")
548 string prefix = args[
"name"];
549 if (TextTools::isEmpty(prefix))
550 throw Exception(
"'name' argument missing for user-defined substitution model.");
551 model.reset(
new UserProteinSubstitutionModel(alpha, args[
"file"], dynamic_cast<ProteinFrequenciesSet*>(protFreq.release()), prefix +
"+F.",
true));
554 else if (modelName ==
"JC69")
555 model.reset(
new JCprot(alpha));
556 else if (modelName ==
"DSO78")
557 model.reset(
new DSO78(alpha));
558 else if (modelName ==
"JTT92")
559 model.reset(
new JTT92(alpha));
560 else if (modelName ==
"LG08")
561 model.reset(
new LG08(alpha));
562 else if (modelName ==
"WAG01")
563 model.reset(
new WAG01(alpha));
564 else if (modelName ==
"LLG08_EHO")
566 else if (modelName ==
"LLG08_EX2")
568 else if (modelName ==
"LLG08_EX3")
570 else if (modelName ==
"LLG08_UL2")
572 else if (modelName ==
"LLG08_UL3")
574 else if (modelName ==
"LG10_EX_EHO")
576 else if (modelName ==
"LGL08_CAT")
578 unsigned int nbCat = TextTools::to<unsigned int>(args[
"nbCat"]);
579 model.reset(
new LGL08_CAT(alpha, nbCat));
581 else if (modelName ==
"Empirical")
583 string prefix = args[
"name"];
584 if (TextTools::isEmpty(prefix))
585 throw Exception(
"'name' argument missing for user-defined substitution model.");
588 else if (modelName ==
"Coala")
590 string exchangeability = args[
"exch"];
591 if (TextTools::isEmpty(exchangeability))
592 throw Exception(
"BppOSubstitutionModelFormat::read. missing argument 'exch' for model 'Coala'.");
593 string prefix = args[
"name"];
594 if (exchangeability ==
"Empirical" && TextTools::isEmpty(prefix))
595 throw Exception(
"'name' argument missing to specify the exchangeabilities of the user-defined empirical model.");
597 auto_ptr<ProteinSubstitutionModel> nestedModel(dynamic_cast<ProteinSubstitutionModel*>(nestedReader.
read(alphabet, exchangeability, data,
false)));
598 string nbrOfParametersPerBranch = args[
"nbrAxes"];
599 if (TextTools::isEmpty(nbrOfParametersPerBranch))
600 throw Exception(
"'nbrAxes' argument missing to define the number of axis of the Correspondence Analysis.");
602 model.reset(
new Coala(alpha, *nestedModel, TextTools::to<unsigned int>(nbrOfParametersPerBranch)));
606 else if (AlphabetTools::isBinaryAlphabet(alphabet))
608 if (!(alphabetCode_ & BINARY))
609 throw Exception(
"BppOSubstitutionModelFormat::read. Binary alphabet not supported.");
610 const BinaryAlphabet* balpha =
dynamic_cast<const BinaryAlphabet*
>(alphabet);
612 if (modelName ==
"Binary")
616 throw Exception(
"Model '" + modelName +
"' unknown.");
619 ApplicationTools::displayResult(
"Substitution model", modelName);
623 vector<string> pnames = model->getParameters().getParameterNames();
625 string pref = model->getNamespace();
627 for (
size_t i = 0; i < pnames.size(); i++)
629 string name = model->getParameterNameWithoutNamespace(pnames[i]);
630 if (args.find(name) != args.end())
631 unparsedArguments_[pref + name] = args[name];
635 ParameterList pl = model->getIndependentParameters();
636 string pname, pval, pname2;
637 for (
size_t i = 0; i < pl.size(); i++)
639 pname = model->getParameterNameWithoutNamespace(pl[i].getName());
641 if (args.find(pname) == args.end())
645 if (((pval.rfind(
"_") != string::npos) && (TextTools::isDecimalInteger(pval.substr(pval.rfind(
"_")+1,string::npos)))) ||
646 (pval.find(
"(") != string::npos))
649 for (
unsigned int j = 0; j < pl.size() && !found; j++)
651 pname2 = model->getParameterNameWithoutNamespace(pl[j].getName());
660 model->aliasParameters(pname2, pname);
662 ApplicationTools::displayResult(
"Parameter alias found", pname +
"->" + pname2);
671 if (args.find(
"useObservedFreqs") != args.end())
672 throw Exception(
"useObservedFreqs argument is obsolete. Please use 'initFreqs=observed' instead.");
673 if (args.find(
"useObservedFreqs.pseudoCount") != args.end())
674 throw Exception(
"useObservedFreqs.pseudoCount argument is obsolete. Please use 'initFreqs.observedPseudoCount' instead.");
677 if (args.find(
"initFreqs") != args.end())
678 unparsedArguments_[pref +
"initFreqs"] = args[
"initFreqs"];
679 if (args.find(
"initFreqs.observedPseudoCount") != args.end())
680 unparsedArguments_[pref +
"initFreqs.observedPseudoCount"] = args[
"initFreqs.observedPseudoCount"];
683 initialize_(*model, data);
685 return model.release();
691 auto_ptr<SubstitutionModel> model;
692 string modelName =
"";
693 map<string, string> args;
694 KeyvalTools::parseProcedure(modelDescription, modelName, args);
696 vector<string> v_nestedModelDescription;
697 vector<SubstitutionModel*> v_pSM;
698 const WordAlphabet* pWA;
700 string s, nestedModelDescription;
701 unsigned int nbmodels;
703 if ((modelName ==
"Word" && !AlphabetTools::isWordAlphabet(alphabet)) ||
704 (modelName !=
"Word" && !AlphabetTools::isCodonAlphabet(alphabet)))
705 throw Exception(
"Bad alphabet type " 706 + alphabet->getAlphabetType() +
" for model " + modelName +
".");
708 pWA =
dynamic_cast<const WordAlphabet*
>(alphabet);
710 if (args.find(
"model") != args.end())
712 v_nestedModelDescription.push_back(args[
"model"]);
713 nbmodels = (modelName ==
"Word") ? pWA->getLength() : 3;
717 if (args.find(
"model1") == args.end())
718 throw Exception(
"Missing argument 'model' or 'model1' for model " + modelName +
".");
722 while (args.find(
"model" + TextTools::toString(nbmodels + 1)) != args.end())
723 v_nestedModelDescription.push_back(args[
"model" + TextTools::toString(++nbmodels)]);
727 throw Exception(
"Missing nested models for model " + modelName +
".");
729 if (pWA->getLength() != nbmodels)
730 throw Exception(
"Bad alphabet type " 731 + alphabet->getAlphabetType() +
" for model " + modelName +
".");
734 if (v_nestedModelDescription.size() != nbmodels)
737 model.reset(nestedReader.
read(pWA->getNAlphabet(0), v_nestedModelDescription[0], data,
false));
740 for (
unsigned int i = 0; i < nbmodels; i++)
742 pref += TextTools::toString(i + 1);
745 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
747 unparsedArguments_[modelName +
"." + pref +
"_" + it->first] = it->second;
750 v_pSM.push_back(model.release());
754 for (
unsigned i = 0; i < v_nestedModelDescription.size(); i++)
757 model.reset(nestedReader.
read(pWA->getNAlphabet(i), v_nestedModelDescription[i], data,
false));
759 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
761 unparsedArguments_[modelName +
"." + TextTools::toString(i + 1) +
"_" + it->first] = it->second;
764 v_pSM.push_back(model.release());
772 if (modelName ==
"Word")
774 model.reset((v_nestedModelDescription.size() != nbmodels)
785 const CodonAlphabet* pCA =
dynamic_cast<const CodonAlphabet*
>(pWA);
787 throw Exception(
"Non codon Alphabet for model" + modelName +
".");
789 auto_ptr< AlphabetIndex2 > pai2;
790 auto_ptr<FrequenciesSet> pFS;
792 if ((dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]) == 0) ||
793 ((v_nestedModelDescription.size() == 3) &&
795 throw Exception(
"Non simple NucleotideSubstitutionModel imbedded in " + modelName +
" model.");
797 if (args.find(
"genetic_code") != args.end()) {
798 ApplicationTools::displayWarning(
"'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
799 throw Exception(
"BppOSubstitutionModelFormat::read. Deprecated 'genetic_code' argument.");
803 throw Exception(
"BppOSubstitutionModelFormat::readWord_(). No genetic code specified! Consider using 'setGeneticCode'.");
806 if (modelName.find(
"Dist") != string::npos)
807 pai2.reset((args.find(
"aadistance") == args.end()) ? 0 : SequenceApplicationTools::getAlphabetIndex2(&AlphabetTools::PROTEIN_ALPHABET, args[
"aadistance"]));
810 if (modelName.find(
"Freq") != string::npos)
812 if (args.find(
"frequencies") == args.end())
813 throw Exception(
"Missing equilibrium frequencies.");
817 pFS.reset(bIOFreq.
read(pCA, args[
"frequencies"], data,
false));
820 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
822 unparsedArguments_[modelName +
"." + it->first] = it->second;
829 if (modelName ==
"Triplet")
830 model.reset((v_nestedModelDescription.size() != 3)
833 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]))
836 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
838 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2])));
840 else if (modelName ==
"CodonRate")
841 model.reset((v_nestedModelDescription.size() != 3)
844 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]))
847 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
849 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2])));
852 else if (modelName ==
"CodonDist")
854 if (v_nestedModelDescription.size() != 3)
859 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
860 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
861 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]), pai2.release()));
864 else if (modelName ==
"CodonRateFreq")
866 if (v_nestedModelDescription.size() != 3)
870 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
876 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
877 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
878 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]),
882 else if (modelName ==
"CodonDistFreq")
884 if (v_nestedModelDescription.size() != 3)
886 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
892 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
893 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
894 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]),
899 else if (modelName ==
"CodonDistPhasFreq")
901 if (v_nestedModelDescription.size() != 3)
903 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
909 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
910 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
911 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]),
915 else if (modelName ==
"CodonDistFitPhasFreq")
917 if (args.find(
"fitness") == args.end())
918 throw Exception(
"Missing fitness in model " + modelName +
".");
922 auto_ptr<FrequenciesSet> pFit(bIOFreq.
read(pCA, args[
"fitness"], data,
false));
925 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
927 unparsedArguments_[modelName +
".fit_" + it->first] = it->second;
930 if (v_nestedModelDescription.size() != 3)
933 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
941 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[0]),
942 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[1]),
943 dynamic_cast<NucleotideSubstitutionModel*>(v_pSM[2]),
949 return model.release();
955 auto_ptr<MixedSubstitutionModel> model;
957 string modelName =
"";
958 map<string, string> args;
959 KeyvalTools::parseProcedure(modelDescription, modelName, args);
960 auto_ptr<SubstitutionModel> pSM;
962 if (modelName ==
"MixedModel")
964 if (args.find(
"model") == args.end())
965 throw Exception(
"The argument 'model' is missing from MixedSubstitutionModel description");
966 string nestedModelDescription = args[
"model"];
968 pSM.reset(nestedReader.
read(alphabet, nestedModelDescription, data,
false));
971 map<string, DiscreteDistribution*> mdist;
972 map<string, string> unparsedParameterValuesNested2;
974 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin();
975 it != unparsedParameterValuesNested.end();
978 if (it->second.find(
"(") != string::npos)
980 BppODiscreteDistributionFormat bIO(
false);
981 mdist[pSM->getParameterNameWithoutNamespace(it->first)] = bIO.read(it->second,
false);
982 map<string, string> unparsedParameterValuesNested3(bIO.getUnparsedArguments());
983 for (map<string, string>::iterator it2 = unparsedParameterValuesNested3.begin();
984 it2 != unparsedParameterValuesNested3.end();
987 unparsedParameterValuesNested2[it->first +
"_" + it2->first] = it2->second;
991 unparsedParameterValuesNested2[it->first] = it->second;
994 for (map<string, string>::iterator it = unparsedParameterValuesNested2.begin();
995 it != unparsedParameterValuesNested2.end();
998 unparsedArguments_[it->first] = it->second;
1004 if (args.find(
"from") != args.end())
1005 fi = alphabet->charToInt(args[
"from"]);
1006 if (args.find(
"to") != args.end())
1007 ti = alphabet->charToInt(args[
"to"]);
1012 vector<string> v = model->getParameters().getParameterNames();
1014 for (map<string, DiscreteDistribution*>::iterator it = mdist.begin();
1015 it != mdist.end(); it++)
1022 ApplicationTools::displayResult(
"Mixture Of A Substitution Model", sModN);
1023 ApplicationTools::displayResult(
"Number of classes", model->
getNumberOfModels());
1028 else if (modelName ==
"Mixture")
1030 vector<string> v_nestedModelDescription;
1031 vector<SubstitutionModel*> v_pSM;
1033 if (args.find(
"model1") == args.end())
1035 throw Exception(
"Missing argument 'model1' for model " + modelName +
".");
1037 unsigned int nbmodels = 0;
1039 while (args.find(
"model" + TextTools::toString(nbmodels + 1)) != args.end())
1041 v_nestedModelDescription.push_back(args[
"model" + TextTools::toString(++nbmodels)]);
1045 throw Exception(
"Missing nested models for model " + modelName +
".");
1047 for (
unsigned i = 0; i < v_nestedModelDescription.size(); i++)
1050 pSM.reset(nestedReader.
read(alphabet, v_nestedModelDescription[i], data,
false));
1052 for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
1054 unparsedArguments_[modelName +
"." + TextTools::toString(i + 1) +
"_" + it->first] = it->second;
1056 v_pSM.push_back(pSM.release());
1061 ApplicationTools::displayResult(
"Mixture Of Substitution Models", modelName );
1064 throw Exception(
"Unknown model name for mixture " + modelName);
1066 return model.release();
1072 std::map<std::string, std::string>& globalAliases,
1073 std::vector<std::string>& writtenNames)
const 1079 if ((dynamic_cast<const MixedSubstitutionModel*>(&model) != NULL) && (dynamic_cast<const AbstractBiblioMixedSubstitutionModel*>(&model) == NULL))
1081 writeMixed_(*dynamic_cast<const MixedSubstitutionModel*>(&model), out, globalAliases, writtenNames);
1091 out <<
"file=" << userModel->
getPath();
1101 write(*nestedModel, out, globalAliases, writtenNames);
1103 const G2001* gModel =
dynamic_cast<const G2001*
>(&model);
1109 const BppODiscreteDistributionFormat* bIO =
new BppODiscreteDistributionFormat();
1111 bIO->write(*nestedDist, out, globalAliases, writtenNames);
1118 const RE08* reModel =
dynamic_cast<const RE08*
>(&model);
1123 write(*nestedModel, out, globalAliases, writtenNames);
1128 const YpR* yprModel =
dynamic_cast<const YpR*
>(&model);
1133 write(*nestedModel, out, globalAliases, writtenNames);
1138 const gBGC* gbgcModel =
dynamic_cast<const gBGC*
>(&model);
1143 write(*nestedModel, out, globalAliases, writtenNames);
1157 write(*mod0, out, globalAliases, writtenNames);
1165 write(*mod0, out, globalAliases, writtenNames);
1170 write(*mod0, out, globalAliases, writtenNames);
1171 for (
unsigned int i = 1; i < nmod; i++)
1173 out <<
",model" + TextTools::toString(i + 1) +
"=";
1174 write(*wM->
getNModel(i), out, globalAliases, writtenNames);
1182 const Coala* coalaModel =
dynamic_cast<const Coala*
>(&model);
1195 out <<
"frequencies=";
1198 bIOFreq.
write(pfs, out, writtenNames);
1218 BppOParametrizableFormat bIO;
1220 bIO.write(&model, out, globalAliases, model.getIndependentParameters().getParameterNames(), writtenNames,
true, comma);
1227 std::map<std::string, std::string>& globalAliases,
1228 std::vector<std::string>& writtenNames)
const 1230 if (dynamic_cast<const MixtureOfSubstitutionModels*>(&model) != NULL)
1238 vector<string> vpl = eM->getIndependentParameters().getParameterNames();
1239 for (
unsigned j = 0; j < vpl.size(); j++)
1241 if (eM->getParameterNameWithoutNamespace(vpl[j]) ==
"rate")
1242 writtenNames.push_back(vpl[j]);
1251 out <<
"model" + TextTools::toString(i + 1) +
"=";
1252 write(*pMS->
getNModel(i), out, globalAliases, writtenNames);
1258 out <<
"MixedModel(model=";
1261 ParameterList pl = eM->getIndependentParameters();
1262 vector<string> vpl = pl.getParameterNames();
1264 for (
unsigned j = 0; j < vpl.size(); j++)
1266 if (find(writtenNames.begin(), writtenNames.end(), vpl[j]) == writtenNames.end())
1268 if (eM->getParameterNameWithoutNamespace(vpl[j]) ==
"rate")
1269 writtenNames.push_back(vpl[j]);
1273 if (pDD && dynamic_cast<const ConstantDistribution*>(pDD) == NULL)
1275 const BppODiscreteDistributionFormat* bIO =
new BppODiscreteDistributionFormat();
1277 bIO->write(*pDD, sout, globalAliases, writtenNames);
1278 globalAliases[vpl[j]] = sout.str();
1285 write(*eM, out, globalAliases, writtenNames);
1287 if (pMS->
from() != -1)
1291 const BppOParametrizableFormat* bIO =
new BppOParametrizableFormat();
1292 bIO->write(&model, out, globalAliases, model.getIndependentParameters().getParameterNames(), writtenNames,
true,
true);
1300 const SiteContainer* data)
throw (Exception)
1302 string initFreqs = ApplicationTools::getStringParameter(model.getNamespace() +
"initFreqs", unparsedArguments_,
"",
"",
true, warningLevel_);
1304 ApplicationTools::displayResult(
"External frequencies initialization for model", (initFreqs ==
"") ?
"None" : initFreqs);
1306 if (initFreqs !=
"")
1308 if (initFreqs ==
"observed")
1311 throw Exception(
"BppOSubstitutionModelFormat::initialize_(). Missing data for observed frequencies");
1312 unsigned int psi = ApplicationTools::getParameter<unsigned int>(model.getNamespace() +
"initFreqs.observedPseudoCount", unparsedArguments_, 0,
"",
true, warningLevel_);
1313 model.setFreqFromData(*data, psi);
1315 else if (initFreqs.substr(0, 6) ==
"values")
1318 map<int, double> frequencies;
1320 string rf = initFreqs.substr(6);
1321 StringTokenizer strtok(rf.substr(1, rf.length() - 2),
",");
1323 while (strtok.hasMoreToken())
1324 frequencies[i++] = TextTools::toDouble(strtok.nextToken());
1325 model.setFreq(frequencies);
1328 throw Exception(
"Unknown initFreqs argument");
1331 ParameterList pl = model.getIndependentParameters();
1332 for (
size_t i = 0; i < pl.size(); i++)
1334 AutoParameter ap(pl[i]);
1335 ap.setMessageHandler(ApplicationTools::warning);
1336 pl.setParameter(i, ap);
1339 for (
size_t i = 0; i < pl.size(); i++)
1341 const string pName = pl[i].getName();
1342 posp = pName.rfind(
".");
1343 bool test1 = (initFreqs ==
"");
1344 bool test2 = (model.getParameterNameWithoutNamespace(pName).size() < posp + 6) || (model.getParameterNameWithoutNamespace(pName).substr(posp + 1, 5) !=
"theta");
1345 bool test3 = (unparsedArguments_.find(pName) != unparsedArguments_.end());
1347 if (test1 || test2 || test3)
1349 if (!test1 && !test2 && test3)
1350 ApplicationTools::displayWarning(
"Warning, initFreqs argument is set and a value is set for parameter " + pName);
1352 double value = ApplicationTools::getDoubleParameter(pName, unparsedArguments_, pl[i].getValue(),
"",
true, warningLevel_);
1353 pl[i].setValue(value);
1355 if (unparsedArguments_.find(pName) != unparsedArguments_.end())
1356 unparsedArguments_.erase(unparsedArguments_.find(pName));
1359 ApplicationTools::displayResult(
"Parameter found", pName +
"=" + TextTools::toString(pl[i].getValue()));
1362 catch (Exception& e) {}
1366 model.matchParametersValues(pl);
The Le et al (2008) EX3 substitution model for proteins.
The Le et al (2008) UL3 substitution model for proteins.
The no-strand bias substitution model for nucleotides, from Lobry 1995. The point of this model is th...
Interface for all substitution models.
The Jukes-Cantor substitution model for proteins.
const SubstitutionModel * getNestedModel() const
The Strand Symmetric Reversible substitution model for nucleotides.
virtual size_t getNumberOfModels() const
returns the number of models in the mixture
int from() const
Numbers of the states between which the substitution rates of all the submodels must be equal...
Class for substitution models on non stop codons, with different rates on the models, depending on their phase.
size_t getNbrOfAxes() const
The Whelan and Goldman substitution model for proteins.
virtual const FrequenciesSet * getFrequenciesSet() const
If the model owns a FrequenciesSet, returns a pointer to it, otherwise return 0.
virtual const Alphabet * getAlphabet() const =0
The Le et al (2008) EH0 substitution model for proteins.
The Le and Gascuel substitution model for proteins.
The Kimura 2-rates substitution model for nucleotides.
The Goldman and Yang (1994) substitution model for codons.
Class for asynonymous substitution models on codons with parameterized equilibrium frequencies and nu...
Class for neutral substitution models on triplets, which correspond to codons that do not have any si...
Parametrize a set of state frequencies.
The Tamura (1992) substitution model for nucleotides.
size_t getNumberOfModels() const
std::string getExch() const
virtual void setFreqFromData(const SequenceContainer &data, double pseudoCount=0)=0
Set equilibrium frequencies equal to the frequencies estimated from the data.
The Le et al (2008) CAT substitution model for proteins.
The Le et al (2008) EX2 substitution model for proteins.
The Le et al (2008) UL2 substitution model for proteins.
Galtier's 2001 covarion model.
const SubstitutionModel * getNestedModel() const
The Le and Gascuel (2010) EX_EHO substitution model for proteins.
The Tamura and Nei (1993) substitution model for nucleotides.
The Hasegawa M, Kishino H and Yano T (1985) substitution model for nucleotides.
virtual std::string getName() const =0
Get the name of the model.
The Yang and Nielsen (1998) substitution model for codons.
The Yang et al (2000) M8 substitution model for codons.
Abstract Basal class for words of substitution models.
The General Time-Reversible substitution model for nucleotides.
Intersection of models RN95 and L95.
Tuffley and Steel's 1998 covarion model.
The Yang et al (2000) M3 substitution model for codons.
The Jones, Taylor and Thornton substitution model for proteins.
Substitution models defined as a mixture of nested substitution models.
const ReversibleSubstitutionModel * getNestedModel() const
Specialisation interface for nucleotide substitution model.
Partial implementation of the Markov-modulated class of substitution models.
The Yang et al (2000) M2 substitution model for codons, with the more realistic modification in Wong ...
The Muse and Gaut (1994) substitution model for codons.
const DiscreteDistribution * getRateDistribution() const
const std::string & getPath() const
The Coala branch-heterogeneous amino-acid substitution model.
The Yang et al (2000) M7 substitution model for codons.
const SubstitutionModel * getNModel(size_t i) const
returns the ith model, or Null if i is not a valid number.
const FrequenciesSet * getFitness() const
Basal class for words of reversible substitution models.
const SubstitutionModel * getNestedModel() const
The Jukes-Cantor substitution model for nucleotides.
const DiscreteDistribution * getDistribution(std::string &parName) const
returns the DiscreteDistribution associated with a given parameter name.
virtual size_t getNumberOfModels() const =0
The Felsenstein (1984) substitution model for nucleotides.
virtual const SubstitutionModel * getNModel(size_t i) const
Returns a specific model from the mixture.
Class for asynonymous substitution models on codons with parameterized equilibrium frequencies and nu...
Build an empirical protein substitution model from a file.
The model described by Rhetsky & Nei, where the only hypothesis is that the transversion rates are on...
The Rivas-Eddy substitution model with gap characters.
The Yang et al (2000) M1 substitution model for codons, with the more realistic modification in Wong ...
The Dayhoff, Schwartz and Orcutt substitution model for proteins.
Substitution models defined as a mixture of several substitution models.
Class for substitution models on non stop codons, with different parametrized rates on the models...
Class for substitution models of codons with non-synonymous/synonymous ratios of substitution rates d...
Class for asynonymous and synonymous substitution models on codons with parameterized equilibrium fre...
Interface for Substitution models, defined as a mixture of "simple" substitution models.