bpp-phyl  2.2.0
BppOFrequenciesSetFormat.cpp
Go to the documentation of this file.
1 //
2 // File: BppOFrequenciesSetFormatFormat.cpp
3 // Created by: Laurent Guéguen
4 // Created on: lundi 9 juillet 2012, à 12h 56
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
41 
42 
43 #include "../Model/FrequenciesSet/NucleotideFrequenciesSet.h"
44 #include "../Model/FrequenciesSet/ProteinFrequenciesSet.h"
45 #include "../Model/FrequenciesSet/CodonFrequenciesSet.h"
46 #include "../Model/FrequenciesSet/MvaFrequenciesSet.h"
47 
48 //From bpp-core:
49 #include <Bpp/Io/FileTools.h>
50 #include <Bpp/Text/TextTools.h>
51 #include <Bpp/Text/StringTokenizer.h>
52 #include <Bpp/Text/KeyvalTools.h>
53 // #include <Bpp/Numeric/Prob.all>
54 #include <Bpp/Numeric/AutoParameter.h>
55 
56 //From bpp-seq:
57 #include <Bpp/Seq/Alphabet/AlphabetTools.h>
58 #include <Bpp/Seq/App/SequenceApplicationTools.h>
59 #include <Bpp/Seq/Container/SequenceContainerTools.h>
60 
61 using namespace bpp;
62 
63 // From the STL:
64 #include <iomanip>
65 
66 using namespace std;
67 
68 unsigned char BppOFrequenciesSetFormat::DNA = 1;
69 unsigned char BppOFrequenciesSetFormat::RNA = 2;
70 unsigned char BppOFrequenciesSetFormat::NUCLEOTIDE = 1 | 2;
71 unsigned char BppOFrequenciesSetFormat::PROTEIN = 4;
72 unsigned char BppOFrequenciesSetFormat::CODON = 8;
73 unsigned char BppOFrequenciesSetFormat::WORD = 16;
74 unsigned char BppOFrequenciesSetFormat::ALL = 1 | 2 | 4 | 8 | 16;
75 
76 FrequenciesSet* BppOFrequenciesSetFormat::read(const Alphabet* alphabet, const std::string& freqDescription, const SiteContainer* data, bool parseArguments)
77 {
78  unparsedArguments_.clear();
79  string freqName;
80  map<string, string> args;
81  KeyvalTools::parseProcedure(freqDescription, freqName, args);
82  auto_ptr<FrequenciesSet> pFS;
83 
84  if (freqName == "Fixed")
85  {
86  if (AlphabetTools::isNucleicAlphabet(alphabet))
87  {
88  if (alphabetCode_ & NUCLEOTIDE)
89  pFS.reset(new FixedNucleotideFrequenciesSet(dynamic_cast<const NucleicAlphabet*>(alphabet)));
90  else
91  throw Exception("Nucleotide alphabet not supported.");
92  }
93  else if (AlphabetTools::isProteicAlphabet(alphabet))
94  {
95  if (alphabetCode_ & PROTEIN)
96  pFS.reset(new FixedProteinFrequenciesSet(dynamic_cast<const ProteicAlphabet*>(alphabet)));
97  else
98  throw Exception("Protein alphabet not supported.");
99  }
100  else if (AlphabetTools::isCodonAlphabet(alphabet))
101  {
102  if (alphabetCode_ & CODON) {
103  if (!geneticCode_)
104  throw Exception("BppOFrequenciesSetFormat::read(). No genetic code specified! Consider using 'setGeneticCode'.");
105  pFS.reset(new FixedCodonFrequenciesSet(geneticCode_));
106  } else {
107  throw Exception("Codon alphabet not supported.");
108  }
109  }
110  else
111  {
112  pFS.reset(new FixedFrequenciesSet(new CanonicalStateMap(alphabet, false)));
113  }
114  }
115  else if (freqName == "Full")
116  {
117  unsigned short method = 1;
118  if (args.find("method") != args.end()){
119  if (args["method"] == "local")
120  method=2;
121  else
122  if (args["method"] == "binary")
123  method=3;
124  }
125 
126  if (AlphabetTools::isNucleicAlphabet(alphabet))
127  {
128  if (alphabetCode_ & NUCLEOTIDE)
129  pFS.reset(new FullNucleotideFrequenciesSet(dynamic_cast<const NucleicAlphabet*>(alphabet)));
130  else
131  throw Exception("Nucleotide alphabet not supported.");
132  }
133  else if (AlphabetTools::isProteicAlphabet(alphabet))
134  {
135  if (alphabetCode_ & PROTEIN)
136  pFS.reset(new FullProteinFrequenciesSet(dynamic_cast<const ProteicAlphabet*>(alphabet), false, method));
137  else
138  throw Exception("Protein alphabet not supported.");
139  }
140  else if (AlphabetTools::isCodonAlphabet(alphabet))
141  {
142  if (alphabetCode_ & CODON) {
143  if (!geneticCode_)
144  throw Exception("BppOFrequenciesSetFormat::read(). No genetic code specified! Consider using 'setGeneticCode'.");
145  pFS.reset(new FullCodonFrequenciesSet(geneticCode_));
146  } else {
147  throw Exception("Codon alphabet not supported.");
148  }
149  }
150  else
151  {
152  //NB: jdutheil 25/09/14 => gap models will not be supported before we add the appropriate option!
153  pFS.reset(new FullFrequenciesSet(new CanonicalStateMap(alphabet, false), false, method));
154  }
155  }
156  else if (freqName == "GC")
157  {
158  if (!AlphabetTools::isNucleicAlphabet(alphabet))
159  throw Exception("Error, unvalid frequencies " + freqName + " with non-nucleic alphabet.");
160  if (alphabetCode_ & NUCLEOTIDE)
161  pFS.reset(new GCFrequenciesSet(dynamic_cast<const NucleicAlphabet*>(alphabet)));
162  else
163  throw Exception("Nucleotide alphabet not supported.");
164  }
165 
166  // INDEPENDENTWORD
167  else if (freqName == "Word")
168  {
169  if (!(alphabetCode_& WORD))
170  throw Exception("Word alphabet not supported.");
171  if (!AlphabetTools::isWordAlphabet(alphabet))
172  throw Exception("BppOFrequenciesSetFormat::read(...).\n\t Bad alphabet type "
173  + alphabet->getAlphabetType() + " for frequencies set " + freqName + ".");
174 
175  const WordAlphabet* pWA = dynamic_cast<const WordAlphabet*>(alphabet);
176 
177  if (args.find("frequency") != args.end())
178  {
179  string sAFS = args["frequency"];
180 
181  unsigned int nbfreq = pWA->getLength();
182  string st = "";
183  for (unsigned i = 0; i < nbfreq; i++)
184  {
185  st += TextTools::toString(i + 1);
186  }
187 
188  BppOFrequenciesSetFormat nestedReader(alphabetCode_, false, warningLevel_);
189  auto_ptr<FrequenciesSet> pFS2(nestedReader.read(pWA->getNAlphabet(0), sAFS, data, false));
190  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
191  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
192  {
193  unparsedArguments_[st + "_" + it->first] = it->second;
194  }
195  pFS.reset(new WordFromUniqueFrequenciesSet(pWA, pFS2.release()));
196  }
197  else
198  {
199  if (args.find("frequency1") == args.end())
200  throw Exception("BppOFrequenciesSetFormat::read. Missing argument 'frequency' or 'frequency1' for frequencies set 'Word'.");
201  vector<string> v_sAFS;
202  vector<FrequenciesSet*> v_AFS;
203  unsigned int nbfreq = 1;
204 
205  while (args.find("frequency" + TextTools::toString(nbfreq)) != args.end())
206  {
207  v_sAFS.push_back(args["frequency" + TextTools::toString(nbfreq++)]);
208  }
209 
210  if (v_sAFS.size() != pWA->getLength())
211  throw Exception("BppOFrequenciesSetFormat::read. Number of frequencies (" + TextTools::toString(v_sAFS.size()) + ") does not match length of the words (" + TextTools::toString(pWA->getLength()) + ")");
212 
213  for (size_t i = 0; i < v_sAFS.size(); ++i)
214  {
215  BppOFrequenciesSetFormat nestedReader(alphabetCode_, false, warningLevel_);
216  pFS.reset(nestedReader.read(pWA->getNAlphabet(i), v_sAFS[i], data, false));
217  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
218  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
219  {
220  unparsedArguments_[TextTools::toString(i + 1) + "_" + it->first] = it->second;
221  }
222  v_AFS.push_back(pFS.release());
223  }
224 
225  pFS.reset(new WordFromIndependentFrequenciesSet(pWA, v_AFS));
226  }
227  }
228  // INDEPENDENT CODON
229  else if (freqName == "Codon")
230  {
231  if (!(alphabetCode_ & CODON))
232  throw Exception("Codon alphabet not supported.");
233  if (!AlphabetTools::isCodonAlphabet(alphabet))
234  throw Exception("BppOFrequenciesSetFormat::read.\n\t Bad alphabet type "
235  + alphabet->getAlphabetType() + " for frequenciesset " + freqName + ".");
236  if (!geneticCode_)
237  throw Exception("BppOFrequenciesSetFormat::read(). No genetic code specified! Consider using 'setGeneticCode'.");
238 
239  const CodonAlphabet* pWA = dynamic_cast<const CodonAlphabet*>(alphabet);
240 
241  if (args.find("frequency") != args.end())
242  {
243  string sAFS = args["frequency"];
244 
245  BppOFrequenciesSetFormat nestedReader(alphabetCode_, false, warningLevel_);
246  auto_ptr<FrequenciesSet> pFS2(nestedReader.read(pWA->getNAlphabet(0), sAFS, data, false));
247  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
248 
249  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
250  {
251  unparsedArguments_["123_" + it->first] = it->second;
252  }
253 
254  pFS.reset(new CodonFromUniqueFrequenciesSet(geneticCode_, pFS2.release(), "Codon"));
255  }
256  else
257  {
258  if (args.find("frequency1") == args.end())
259  throw Exception("BppOFrequenciesSetFormat::read. Missing argument 'frequency' or 'frequency1' for frequencies set.");
260  vector<string> v_sAFS;
261  vector<FrequenciesSet*> v_AFS;
262  unsigned int nbfreq = 1;
263 
264  while (args.find("frequency" + TextTools::toString(nbfreq)) != args.end())
265  {
266  v_sAFS.push_back(args["frequency" + TextTools::toString(nbfreq++)]);
267  }
268 
269  if (v_sAFS.size() != 3)
270  throw Exception("BppOFrequenciesSetFormat::read. Number of frequencies (" + TextTools::toString(v_sAFS.size()) + ") is not three");
271 
272  for (size_t i = 0; i < v_sAFS.size(); ++i)
273  {
274  BppOFrequenciesSetFormat nestedReader(alphabetCode_, false, warningLevel_);
275  pFS.reset(nestedReader.read(pWA->getNAlphabet(i), v_sAFS[i], data, false));
276  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
277  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
278  {
279  unparsedArguments_[TextTools::toString(i + 1) + "_" + it->first] = it->second;
280  }
281  v_AFS.push_back(pFS.release());
282  }
283 
284  if (!geneticCode_)
285  throw Exception("BppOFrequenciesSetFormat::read(). No genetic code specified! Consider using 'setGeneticCode'.");
286  pFS.reset(new CodonFromIndependentFrequenciesSet(geneticCode_, v_AFS, "Codon"));
287  }
288  }
289 
290  // CODON PER AA Frequencies
291  else if (freqName == "FullPerAA")
292  {
293  if (!(alphabetCode_ & CODON))
294  throw Exception("Codon alphabet not supported.");
295  if (!AlphabetTools::isCodonAlphabet(alphabet))
296  throw Exception("BppOFrequenciesSetFormat::read.\n\t Bad alphabet type "
297  + alphabet->getAlphabetType() + " for frequenciesset " + freqName + ".");
298 
299  if (!geneticCode_)
300  throw Exception("BppOFrequenciesSetFormat::read(). No genetic code specified! Consider using 'setGeneticCode'.");
301 
302  const ProteicAlphabet* pPA = dynamic_cast<const ProteicAlphabet*>(geneticCode_->getTargetAlphabet());
303 
304  unsigned short method=1;
305  if (args.find("method") != args.end()){
306  if (args["method"]=="local")
307  method=2;
308  else
309  if (args["method"]=="binary")
310  method=3;
311  }
312 
313  if (args.find("protein_frequencies") != args.end())
314  {
315  string sPFS = args["protein_frequencies"];
316  BppOFrequenciesSetFormat nestedReader(alphabetCode_, false, warningLevel_);
317  auto_ptr<ProteinFrequenciesSet> pPFS(dynamic_cast<ProteinFrequenciesSet*>(nestedReader.read(pPA, sPFS, data, false)));
318  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
319 
320  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
321  {
322  unparsedArguments_["FullPerAA." + it->first] = it->second;
323  }
324  pFS.reset(new FullPerAACodonFrequenciesSet(geneticCode_, pPFS.release(), method));
325  }
326  else
327  pFS.reset(new FullPerAACodonFrequenciesSet(geneticCode_, method));
328  }
329 
330  // codeml frequencies syntax
331 
332  else if (AlphabetTools::isCodonAlphabet(alphabet))
333  {
334  if (!(alphabetCode_ & CODON))
335  throw Exception("Codon alphabet not supported.");
336  if (!geneticCode_)
337  throw Exception("BppOFrequenciesSetFormat::read(). No genetic code specified! Consider using 'setGeneticCode'.");
338 
339  const CodonAlphabet* pWA = dynamic_cast<const CodonAlphabet*>(alphabet);
340 
341  if (args.find("genetic_code") != args.end()) {
342  ApplicationTools::displayWarning("'genetic_code' argument is no longer supported inside model description, and has been supersided by a global 'genetic_code' option.");
343  throw Exception("BppOFrequenciesSetFormat::read. Deprecated 'genetic_code' argument.");
344  }
345  short opt = -1;
346  string mgmtStopCodon = "quadratic";
347 
348  if (freqName == "F0")
349  {
351  }
352  else if (freqName == "F1X4")
353  {
355 
356  if (args.find("mgmtStopCodon") != args.end()){
357  mgmtStopCodon = args["mgmtStopCodon"];
358  ApplicationTools::displayResult("StopCodon frequencies distribution ", mgmtStopCodon);
359  }
360  if (args.find("frequency") != args.end())
361  {
362  string sAFS = args["frequency"];
363 
364  BppOFrequenciesSetFormat nestedReader(alphabetCode_, false, warningLevel_);
365  auto_ptr<FrequenciesSet> pFS2(nestedReader.read(pWA->getNAlphabet(0), sAFS, data, false));
366  if (pFS2->getName()!="Full")
367  throw Exception("BppOFrequenciesSetFormat::read. The frequency option in F1X4 can only be Full");
368 
369  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
370 
371  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
372  {
373  unparsedArguments_["123_" + it->first] = it->second;
374  }
375  }
376  if (args.find("123_theta") != args.end())
377  unparsedArguments_["123_Full.theta"] = args["123_theta"];
378  if (args.find("123_theta1") != args.end())
379  unparsedArguments_["123_Full.theta1"] = args["123_theta1"];
380  if (args.find("123_theta2") != args.end())
381  unparsedArguments_["123_Full.theta2"] = args["123_theta2"];
382  if (args.find("theta") != args.end())
383  unparsedArguments_["123_Full.theta"] = args["123_theta"];
384  if (args.find("theta1") != args.end())
385  unparsedArguments_["123_Full.theta1"] = args["123_theta1"];
386  if (args.find("theta2") != args.end())
387  unparsedArguments_["123_Full.theta2"] = args["123_theta2"];
388  }
389  else if (freqName == "F3X4")
390  {
392 
393  if (args.find("mgmtStopCodon") != args.end()){
394  mgmtStopCodon = args["mgmtStopCodon"];
395  ApplicationTools::displayResult("StopCodon frequencies distribution ", mgmtStopCodon);
396  }
397 
398  if (args.find("frequency1") != args.end() ||
399  args.find("frequency2") != args.end() ||
400  args.find("frequency3") != args.end())
401  {
402  vector<string> v_sAFS;
403 
404  for (unsigned int nbfreq = 1; nbfreq<=3; nbfreq++)
405  if (args.find("frequency" + TextTools::toString(nbfreq)) != args.end())
406  v_sAFS.push_back(args["frequency" + TextTools::toString(nbfreq)]);
407  else
408  v_sAFS.push_back("");
409 
410  for (unsigned i = 0; i < v_sAFS.size(); i++)
411  {
412  BppOFrequenciesSetFormat nestedReader(alphabetCode_, false, warningLevel_);
413  if (v_sAFS[i]!=""){
414  pFS.reset(nestedReader.read(pWA->getNAlphabet(i), v_sAFS[i], data, false));
415  if (pFS->getName()!="Full")
416  throw Exception("BppOFrequenciesSetFormat::read. The frequency options in F3X4 can only be Full");
417 
418  map<string, string> unparsedParameterValuesNested(nestedReader.getUnparsedArguments());
419  for (map<string, string>::iterator it = unparsedParameterValuesNested.begin(); it != unparsedParameterValuesNested.end(); it++)
420  {
421  unparsedArguments_[TextTools::toString(i + 1) + "_" + it->first] = it->second;
422  }
423  }
424  }
425  }
426  for (unsigned int i = 1 ; i <= 3; i++){
427 
428  if (args.find(TextTools::toString(i)+"_theta") != args.end())
429  unparsedArguments_[TextTools::toString(i)+"_Full.theta"] = args[TextTools::toString(i)+"_theta"];
430  if (args.find(TextTools::toString(i)+"_theta1") != args.end())
431  unparsedArguments_[TextTools::toString(i)+"_Full.theta1"] = args[TextTools::toString(i)+"_theta1"];
432  if (args.find(TextTools::toString(i)+"_theta2") != args.end())
433  unparsedArguments_[TextTools::toString(i)+"_Full.theta2"] = args[TextTools::toString(i)+"_theta2"];
434  }
435  }
436  else if (freqName == "F61")
437  {
439  }
440  if (opt != -1){
441  unsigned short method=1;
442  if (args.find("method") != args.end()){
443  if (args["method"]=="local")
444  method=2;
445  else
446  if (args["method"]=="binary")
447  method=3;
448  }
449  pFS.reset(CodonFrequenciesSet::getFrequenciesSetForCodons(opt, geneticCode_, mgmtStopCodon, method));
450  }
451  else
452  throw Exception("Unknown frequency option: " + freqName);
453  }
454 
455  //MVAprotein freq set for COaLA model
456  else if (freqName == "MVAprotein")
457  {
458  pFS.reset(new MvaFrequenciesSet(dynamic_cast<const ProteicAlphabet*>(alphabet)));
459  dynamic_cast<MvaFrequenciesSet*>(pFS.get())->setParamValues(args);
460  }
461  else
462  throw Exception("Unknown frequency option: " + freqName);
463 
464  vector<string> pnames = pFS->getParameters().getParameterNames();
465 
466  string pref = pFS->getNamespace();
467  for (size_t i = 0; i < pnames.size(); i++)
468  {
469  string name = pFS->getParameterNameWithoutNamespace(pnames[i]);
470  if (args.find(name) != args.end())
471  unparsedArguments_[pref + name] = args[name];
472  }
473 
474  // Forward arguments:
475  if (args.find("init") != args.end())
476  {
477  unparsedArguments_["init"] = args["init"];
478  unparsedArguments_["initFreqs"] = args["init"];
479  }
480  if (args.find("init.observedPseudoCount") != args.end())
481  {
482  unparsedArguments_["init.observedPseudoCount"] = args["init.observedPseudoCount"];
483  unparsedArguments_["initFreqs.observedPseudoCount"] = args["init.observedPseudoCount"];
484  }
485  if (args.find("values") != args.end())
486  {
487  unparsedArguments_["initFreqs"] = "values" + args["values"];
488  unparsedArguments_["init"] = "values" + args["values"];
489  }
490 
491  if (parseArguments)
492  initialize_(*pFS, data);
493  return pFS.release();
494 }
495 
497  OutputStream& out,
498  std::vector<std::string>& writtenNames) const
499 {
500  if (!pfreqset)
501  {
502  out << "None";
503  return;
504  }
505  ParameterList pl = pfreqset->getParameters();
506  int p = out.getPrecision();
507  out.setPrecision(12);
508  bool flag(false);
509  string name = pfreqset->getName();
510  out << name << "(";
511 
512 
513  if ((name == "Fixed") || (name == "F0"))
514  {
515  vector<double> vf = pfreqset->getFrequencies();
516  out << "values=(";
517  for (size_t i = 0; i < vf.size(); i++)
518  {
519  if (i != 0)
520  out << ", ";
521  out << vf[i];
522  }
523  out << ")";
524  }
525  else
526  {
527  if (dynamic_cast<const FullProteinFrequenciesSet*>(pfreqset))
528  {
529  size_t meth=dynamic_cast<const FullProteinFrequenciesSet*>(pfreqset)->getMethod();
530  if (meth>1){
531  if (flag)
532  out << ",";
533  out << "method=" << ((meth==2)?"local":"binary");
534  flag=true;
535  }
536  }
537  else
538  {
539  if (name != "F1X4" && name != "F3X4" && name != "F61")
540  {
541  const WordFromIndependentFrequenciesSet* pWFI = dynamic_cast<const WordFromIndependentFrequenciesSet*>(pfreqset);
542  if (pWFI != NULL)
543  {
544  for (size_t i = 0; i < pWFI->getLength(); i++)
545  {
546  if (i != 0)
547  out << ", ";
548  out << "frequency" << i + 1 << "=";
549  write(&pWFI->getFrequenciesSetForLetter(i), out, writtenNames);
550  }
551  flag = true;
552  }
553  const WordFromUniqueFrequenciesSet* pWFU = dynamic_cast<const WordFromUniqueFrequenciesSet*>(pfreqset);
554  if (pWFU != NULL)
555  {
556  for (size_t i = 0; i < pWFU->getLength(); i++)
557  {
558  if (i != 0)
559  out << ", ";
560  out << "frequency=";
561  write(&pWFU->getFrequenciesSetForLetter(i), out, writtenNames);
562  }
563  flag = true;
564  }
565  const FullPerAACodonFrequenciesSet* pFPA=dynamic_cast<const FullPerAACodonFrequenciesSet*>(pfreqset);
566  if (pFPA != NULL)
567  {
569  out << "protein_frequencies=";
570 
571  write(ppfs, out, writtenNames);
572 
573  flag = true;
574 
575  unsigned short meth=pFPA->getMethod();
576  if (meth>1){
577  if (flag)
578  out << ",";
579  out << "method=" << ((meth==2)?"local":"binary");
580  flag=true;
581  }
582  }
583  }
584  }
585 
586 
587  for (size_t i = 0; i < pl.size(); i++)
588  {
589  if (find(writtenNames.begin(), writtenNames.end(), pl[i].getName()) == writtenNames.end())
590  {
591  if (flag)
592  out << ",";
593  else
594  flag = true;
595  string pname = pfreqset->getParameterNameWithoutNamespace(pl[i].getName());
596  (out << pname << "=").enableScientificNotation(false) << pl[i].getValue();
597  writtenNames.push_back(pl[i].getName());
598  }
599  }
600  const FullCodonFrequenciesSet* pF=dynamic_cast<const FullCodonFrequenciesSet*>(pfreqset);
601  if (pF != NULL)
602  {
603  unsigned short meth=pF->getMethod();
604  if (meth>1){
605  if (flag)
606  out << ",";
607  out << "method=" << ((meth==2)?"local":"binary");
608  flag=true;
609  }
610  }
611  }
612 
613  out << ")";
614  out.setPrecision(p);
615 }
616 
617 void BppOFrequenciesSetFormat::initialize_(FrequenciesSet& freqSet, const SiteContainer* data)
618 {
619  if (unparsedArguments_.find("init") != unparsedArguments_.end())
620  {
621  // Initialization using the "init" option
622  string init = unparsedArguments_["init"];
623  if (init == "observed")
624  {
625  if (!data)
626  throw Exception("Missing data for observed frequencies");
627  unsigned int psc = 0;
628  if (unparsedArguments_.find("observedPseudoCount") != unparsedArguments_.end())
629  psc = TextTools::to<unsigned int>(unparsedArguments_["observedPseudoCount"]);
630 
631  map<int, double> freqs;
632  SequenceContainerTools::getFrequencies(*data, freqs, psc);
633 
635  }
636  else if (init.substr(0, 6) == "values")
637  {
638  // Initialization using the "values" argument
639  vector<double> frequencies;
640  string rf = init.substr(6);
641 
642  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
643  while (strtok.hasMoreToken())
644  frequencies.push_back(TextTools::toDouble(strtok.nextToken()));
645  freqSet.setFrequencies(frequencies);
646  }
647  else if (init == "balanced")
648  {
649  // Nothing to do here, this is the default instanciation.
650  }
651  else
652  throw Exception("Unknown init argument");
653  }
654  else
655  {
656  // Explicit initialization of each parameter
657  ParameterList pl = freqSet.getParameters();
658 
659  for (size_t i = 0; i < pl.size(); ++i)
660  {
661  AutoParameter ap(pl[i]);
662  if (verbose_)
663  ap.setMessageHandler(ApplicationTools::warning);
664  pl.setParameter(i, ap);
665  }
666 
667  for (size_t i = 0; i < pl.size(); ++i)
668  {
669  const string pName = pl[i].getName();
670  double value = ApplicationTools::getDoubleParameter(pName, unparsedArguments_, pl[i].getValue(), "", true, warningLevel_);
671 
672  pl[i].setValue(value);
673  if (verbose_)
674  ApplicationTools::displayResult("Parameter found", pName + "=" + TextTools::toString(pl[i].getValue()));
675  }
676 
677  freqSet.matchParametersValues(pl);
678  }
679 }
680 
681 
A generic FrequenciesSet for Full Codon alphabets.
FrequenciesSet useful for homogeneous and stationary models.
Frequencies set I/O in BppO format.
const FrequenciesSet & getFrequenciesSetForLetter(size_t i) const
This class implements a state map where all resolved states are modeled.
Definition: StateMap.h:161
void initialize_(FrequenciesSet &freqSet, const SiteContainer *data)
the Frequencies in codons are the product of Independent Frequencies in letters with the frequencies ...
FrequenciesSet integrating ProteinFrequenciesSet inside CodonFrequenciesSet. In this case...
virtual void setFrequencies(const std::vector< double > &frequencies)=0
Set the parameters in order to match a given set of frequencies.
FrequenciesSet useful for homogeneous and stationary models, protein implementation.
STL namespace.
virtual const std::vector< double > getFrequencies() const =0
Parametrize a set of state frequencies.
void write(const FrequenciesSet *pfreqset, OutputStream &out, std::vector< std::string > &writtenNames) const
Write a substitution model to a stream.
const FrequenciesSet & getFrequenciesSetForLetter(size_t i) const
const ProteinFrequenciesSet * getProteinFrequenciesSet() const
Nucleotide FrequenciesSet using three independent parameters (theta, theta1, theta2) to modelize the ...
A generic FrequenciesSet allowing for the estimation of all frequencies.
Protein FrequenciesSet using 19 independent parameters to model the 20 frequencies.
virtual std::string getName() const =0
FrequenciesSet useful for homogeneous and stationary models, codon implementation.
FrequenciesSet * read(const Alphabet *alphabet, const std::string &freqDescription, const SiteContainer *data, bool parseArguments=true)
Read a frequencies set from a string.
Parametrize a set of state frequencies for proteins.
const std::map< std::string, std::string > & getUnparsedArguments() const
FrequenciesSet useful for homogeneous and stationary models, nucleotide implementation.
static FrequenciesSet * getFrequenciesSetForCodons(short option, const GeneticCode *gCode, const std::string &mgmtStopFreq="quadratic", unsigned short method=1)
A helper function that provide frequencies set for codon models according to PAML option...
unsigned short getMethod() const
the Frequencies in words are the product of Independent Frequencies in letters
virtual void setFrequenciesFromAlphabetStatesFrequencies(const std::map< int, double > &frequencies)=0
Set the Frequencies from the one of the map which keys match with a letter of the Alphabet...
A frequencies set used to estimate frequencies at the root with the COaLA model. Frequencies at the r...
Nucleotide FrequenciesSet using only one parameter, the GC content.
the Frequencies in codons are the product of the frequencies for a unique FrequenciesSet in letters...