bpp-seq  2.2.0
SequenceApplicationTools.cpp
Go to the documentation of this file.
1 //
2 // File: SequenceApplicationTools.cpp
3 // Created by: Julien Dutheil
4 // Created on: Fri Oct 21 13:13
5 // from file old ApplicationTools.h created on Sun Dec 14 09:36:26 2003
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for sequences analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39  */
40 
42 #include "../Alphabet/BinaryAlphabet.h"
43 #include "../Alphabet/DefaultAlphabet.h"
44 #include "../Alphabet/CodonAlphabet.h"
45 #include "../Alphabet/AlphabetTools.h"
46 #include "../GeneticCode/EchinodermMitochondrialGeneticCode.h"
47 #include "../GeneticCode/InvertebrateMitochondrialGeneticCode.h"
48 #include "../GeneticCode/StandardGeneticCode.h"
49 #include "../GeneticCode/VertebrateMitochondrialGeneticCode.h"
50 #include "../GeneticCode/YeastMitochondrialGeneticCode.h"
51 #include "../GeneticCode/AscidianMitochondrialGeneticCode.h"
52 #include "../GeneticCode/MoldMitochondrialGeneticCode.h"
53 #include "../Io/BppOSequenceReaderFormat.h"
54 #include "../Io/BppOAlignmentReaderFormat.h"
55 #include "../Io/BppOSequenceWriterFormat.h"
56 #include "../Io/BppOAlignmentWriterFormat.h"
57 #include "../Io/BppOAlphabetIndex1Format.h"
58 #include "../Io/BppOAlphabetIndex2Format.h"
59 #include "../Io/MaseTools.h"
60 #include "../SiteTools.h"
61 #include "../SequenceTools.h"
62 #include <Bpp/App/ApplicationTools.h>
63 #include <Bpp/Text/TextTools.h>
64 #include <Bpp/Text/KeyvalTools.h>
65 #include <Bpp/App/NumCalcApplicationTools.h>
66 #include <Bpp/Numeric/Random/RandomTools.h>
67 
68 using namespace bpp;
69 using namespace std;
70 
71 /******************************************************************************/
72 
74  map<string, string>& params,
75  const string& suffix,
76  bool suffixIsOptional,
77  bool verbose,
78  bool allowGeneric,
79  int warn) throw (Exception)
80 {
81  Alphabet* chars;
82  string alphtt = ApplicationTools::getStringParameter("alphabet", params, "DNA", suffix, suffixIsOptional, warn);
83 
84  string alphabet = "";
85  map<string, string> args;
86  int flag = 0;
87 
88  KeyvalTools::parseProcedure(alphtt, alphabet, args);
89  unsigned int lg = 1;
90 
91  if (alphabet == "Word")
92  {
93  if (args.find("length") == args.end())
94  throw Exception("Missing length parameter for Word alphabet");
95  lg = TextTools::to<unsigned int>(args["length"]);
96  if (args.find("letter") == args.end())
97  throw Exception("Missing letter alphabet for Word alphabet");
98  alphabet = args["letter"];
99  flag = 1;
100  }
101  else if (alphabet == "RNY")
102  {
103  if (args.find("letter") == args.end())
104  throw Exception("Missing letter alphabet for RNY alphabet");
105  alphabet = args["letter"];
106  flag = 2;
107  }
108 
109  if (alphabet == "Binary")
110  chars = new BinaryAlphabet();
111  else if (alphabet == "DNA")
112  {
113  bool mark = ApplicationTools::getBooleanParameter("bangAsGap", args, false, "", true, warn + 1);
114  chars = new DNA(mark);
115  }
116  else if (alphabet == "RNA")
117  {
118  bool mark = ApplicationTools::getBooleanParameter("bangAsGap", args, false, "", true, warn + 1);
119  chars = new RNA(mark);
120  }
121  else if (alphabet == "Protein")
122  chars = new ProteicAlphabet();
123  else if (allowGeneric && alphabet == "Generic")
124  chars = new DefaultAlphabet();
125  else if (alphabet == "Codon")
126  {
127  if (args.find("letter") == args.end())
128  throw Exception("Missing 'letter' argument in Codon :" + alphabet);
129  if (args.find("type") != args.end())
130  throw Exception("'type' argument in Codon is deprecated and has been superseded by the 'genetic_code' option.");
131 
132  string alphnDesc = ApplicationTools::getStringParameter("letter", args, "RNA");
133  string alphn;
134  map<string, string> alphnArgs;
135  KeyvalTools::parseProcedure(alphnDesc, alphn, alphnArgs);
136 
137  NucleicAlphabet* pnalph;
138  if (alphn == "RNA")
139  {
140  bool mark = ApplicationTools::getBooleanParameter("bangAsGap", alphnArgs, false, "", true, warn + 1);
141  pnalph = new RNA(mark);
142  }
143  else if (alphn == "DNA")
144  {
145  bool mark = ApplicationTools::getBooleanParameter("bangAsGap", alphnArgs, false, "", true, warn + 1);
146  pnalph = new DNA(mark);
147  }
148  else
149  throw Exception("Alphabet not known in Codon : " + alphn);
150 
151 
152  chars = new CodonAlphabet(pnalph);
153  alphabet = alphabet + "(" + alphn + ")";
154  }
155  else
156  throw Exception("Alphabet not known: " + alphabet);
157 
158  if (flag == 1)
159  {
160  chars = new WordAlphabet(chars, lg);
161  string al = " ";
162  for (unsigned i = 0; i < lg; i++)
163  {
164  al += alphabet + " ";
165  }
166  alphabet = "Word(" + al + ")";
167  }
168  else if (flag == 2)
169  {
171  {
172  chars = new RNY(*(dynamic_cast<NucleicAlphabet*>(chars)));
173  alphabet = "RNY(" + alphabet + ")";
174  }
175  else
176  throw Exception("RNY needs a Nucleic Alphabet, instead of " + alphabet);
177  }
178 
179 
180  if (verbose)
181  ApplicationTools::displayResult("Alphabet type ", alphabet);
182  return chars;
183 }
184 
185 /******************************************************************************/
186 
188  const NucleicAlphabet* alphabet,
189  const string& description) throw (Exception)
190 {
191  GeneticCode* geneCode;
192  if (description.find("EchinodermMitochondrial") != string::npos || description.find("9") != string::npos)
193  geneCode = new EchinodermMitochondrialGeneticCode(alphabet);
194  else if (description.find("InvertebrateMitochondrial") != string::npos || description.find("5") != string::npos)
195  geneCode = new InvertebrateMitochondrialGeneticCode(alphabet);
196  else if (description.find("Standard") != string::npos || description.find("1") != string::npos)
197  geneCode = new StandardGeneticCode(alphabet);
198  else if (description.find("VertebrateMitochondrial") != string::npos || description.find("2") != string::npos)
199  geneCode = new VertebrateMitochondrialGeneticCode(alphabet);
200  else if (description.find("YeastMitochondrial") != string::npos || description.find("3") != string::npos)
201  geneCode = new YeastMitochondrialGeneticCode(alphabet);
202  else if (description.find("AscidianMitochondrial") != string::npos || description.find("13") != string::npos)
203  geneCode = new AscidianMitochondrialGeneticCode(alphabet);
204  else if (description.find("MoldMitochondrial") != string::npos || description.find("4") != string::npos)
205  geneCode = new MoldMitochondrialGeneticCode(alphabet);
206  else
207  throw Exception("Unknown GeneticCode: " + description);
208  return geneCode;
209 }
210 
211 /******************************************************************************/
212 
213 AlphabetIndex1* SequenceApplicationTools::getAlphabetIndex1(const Alphabet* alphabet, const string& description, const string& message, bool verbose)
214 throw (Exception)
215 {
216  BppOAlphabetIndex1Format reader(alphabet, message, verbose);
217  return reader.read(description);
218 }
219 
220 AlphabetIndex2* SequenceApplicationTools::getAlphabetIndex2(const Alphabet* alphabet, const string& description, const string& message, bool verbose)
221 throw (Exception)
222 {
223  BppOAlphabetIndex2Format reader(alphabet, message, verbose);
224  return reader.read(description);
225 }
226 
227 /******************************************************************************/
229  const Alphabet* alpha,
230  map<string, string>& params,
231  const string& suffix,
232  bool suffixIsOptional,
233  bool verbose,
234  int warn)
235 {
236  string sequenceFilePath = ApplicationTools::getAFilePath("input.sequence.file", params, true, true, suffix, suffixIsOptional, "none", warn);
237  string sequenceFormat = ApplicationTools::getStringParameter("input.sequence.format", params, "Fasta()", suffix, suffixIsOptional, warn);
238  BppOSequenceReaderFormat bppoReader(warn);
239  auto_ptr<ISequence> iSeq(bppoReader.read(sequenceFormat));
240  if (verbose)
241  {
242  ApplicationTools::displayResult("Sequence file " + suffix, sequenceFilePath);
243  ApplicationTools::displayResult("Sequence format " + suffix, iSeq->getFormatName());
244  }
245  SequenceContainer* sequences = iSeq->readSequences(sequenceFilePath, alpha);
246 
247  return sequences;
248 }
249 
250 /******************************************************************************/
251 
253  const Alphabet* alpha,
254  map<string, string>& params,
255  const string& suffix,
256  bool suffixIsOptional,
257  bool verbose,
258  int warn)
259 {
260  string sequenceFilePath = ApplicationTools::getAFilePath("input.sequence.file", params, true, true, suffix, suffixIsOptional, "none", warn);
261  string sequenceFormat = ApplicationTools::getStringParameter("input.sequence.format", params, "Fasta()", suffix, suffixIsOptional, warn);
262  BppOAlignmentReaderFormat bppoReader(warn);
263  auto_ptr<IAlignment> iAln(bppoReader.read(sequenceFormat));
264  map<string, string> args(bppoReader.getUnparsedArguments());
265  if (verbose)
266  {
267  ApplicationTools::displayResult("Sequence file " + suffix, sequenceFilePath);
268  ApplicationTools::displayResult("Sequence format " + suffix, iAln->getFormatName());
269  }
270 
271  const Alphabet* alpha2;
272  if (AlphabetTools::isRNYAlphabet(alpha))
273  alpha2 = &dynamic_cast<const RNY*>(alpha)->getLetterAlphabet();
274  else
275  alpha2 = alpha;
276 
277  const SequenceContainer* seqCont = iAln->readAlignment(sequenceFilePath, alpha2);
278 
279  VectorSiteContainer* sites2 = new VectorSiteContainer(*dynamic_cast<const OrderedSequenceContainer*>(seqCont));
280  delete seqCont;
281 
282  VectorSiteContainer* sites;
283 
284  if (AlphabetTools::isRNYAlphabet(alpha))
285  {
286  const SequenceTools ST;
287  sites = new VectorSiteContainer(alpha);
288  for (unsigned int i = 0; i < sites2->getNumberOfSequences(); i++)
289  {
290  sites->addSequence(*(ST.RNYslice(sites2->getSequence(i))));
291  }
292  delete sites2;
293  }
294  else
295  sites = sites2;
296 
297 
298 
299  // Look for site selection:
300  if (iAln->getFormatName() == "MASE file")
301  {
302  // getting site set:
303  string siteSet = ApplicationTools::getStringParameter("siteSelection", args, "none", suffix, suffixIsOptional, warn + 1);
304  if (siteSet != "none")
305  {
306  VectorSiteContainer* selectedSites;
307  try
308  {
309  selectedSites = dynamic_cast<VectorSiteContainer*>(MaseTools::getSelectedSites(*sites, siteSet));
310  if (verbose)
311  ApplicationTools::displayResult("Set found", TextTools::toString(siteSet) + " sites.");
312  }
313  catch (IOException& ioe)
314  {
315  throw ioe;
316  }
317  if (selectedSites->getNumberOfSites() == 0)
318  {
319  throw Exception("Site set '" + siteSet + "' is empty.");
320  }
321  delete sites;
322  sites = selectedSites;
323  }
324  }
325  else
326  {
327  // getting site set:
328  size_t nbSites = sites->getNumberOfSites();
329 
330  string siteSet = ApplicationTools::getStringParameter("input.site.selection", params, "none", suffix, suffixIsOptional, warn + 1);
331 
332  VectorSiteContainer* selectedSites=0;
333  if (siteSet != "none")
334  {
335  vector<size_t> vSite;
336  try {
337  vector<int> vSite1 = NumCalcApplicationTools::seqFromString(siteSet);
338  for (size_t i = 0; i < vSite1.size(); ++i){
339  int x = (vSite1[i] >= 0 ? vSite1[i] : static_cast<int>(nbSites) + vSite1[i]);
340  if (x >= 0)
341  vSite.push_back(static_cast<size_t>(x-1));
342  else
343  throw Exception("SequenceApplicationTools::getSiteContainer(). Incorrect negative index: " + TextTools::toString(x));
344  }
345  selectedSites = dynamic_cast<VectorSiteContainer*>(SiteContainerTools::getSelectedSites(*sites, vSite));
346  }
347  catch (Exception& e)
348  {
349  string seln;
350  map<string, string> selArgs;
351  KeyvalTools::parseProcedure(siteSet, seln, selArgs);
352  if (seln == "Sample")
353  {
354  size_t n = ApplicationTools::getParameter<size_t>("n", selArgs, nbSites, "", true, warn + 1);
355  bool replace = ApplicationTools::getBooleanParameter("replace", selArgs, false, "", true, warn + 1);
356 
357  vSite.resize(n);
358  vector<size_t> vPos;
359  for (size_t p = 0; p < nbSites; ++p)
360  vPos.push_back(p);
361 
362  RandomTools::getSample(vPos, vSite, replace);
363 
364  selectedSites = dynamic_cast<VectorSiteContainer*>(SiteContainerTools::getSelectedSites(*sites, vSite));
365  if (replace)
366  selectedSites->reindexSites();
367  }
368  }
369 
370  if (verbose)
371  ApplicationTools::displayResult("Selected sites", TextTools::toString(siteSet));
372 
373  if (selectedSites && (selectedSites->getNumberOfSites() == 0))
374  {
375  throw Exception("Site set '" + siteSet + "' is empty.");
376  }
377  delete sites;
378  sites = selectedSites;
379  }
380  }
381  return sites;
382 }
383 
384 /******************************************************************************/
385 
387  const SiteContainer& allSites,
388  map<string, string>& params,
389  string suffix,
390  bool suffixIsOptional,
391  bool gapAsUnknown,
392  bool verbose,
393  int warn)
394 {
395  // Fully resolved sites, i.e. without jokers and gaps:
396  SiteContainer* sitesToAnalyse;
397  VectorSiteContainer* sitesToAnalyse2;
398 
399  string option = ApplicationTools::getStringParameter("input.sequence.sites_to_use", params, "complete", suffix, suffixIsOptional, warn);
400  if (verbose)
401  ApplicationTools::displayResult("Sites to use", option);
402  if (option == "all")
403  {
404  sitesToAnalyse = new VectorSiteContainer(allSites);
405  string maxGapOption = ApplicationTools::getStringParameter("input.sequence.max_gap_allowed", params, "100%", suffix, suffixIsOptional, warn);
406 
407  if (maxGapOption[maxGapOption.size() - 1] == '%')
408  {
409  double gapFreq = TextTools::toDouble(maxGapOption.substr(0, maxGapOption.size() - 1)) / 100.;
410  if (gapFreq < 1)
411  {
412  if (verbose)
413  ApplicationTools::displayTask("Remove sites with gaps", true);
414  for (size_t i = sitesToAnalyse->getNumberOfSites(); i > 0; --i)
415  {
416  if (verbose)
417  ApplicationTools::displayGauge(sitesToAnalyse->getNumberOfSites() - i, sitesToAnalyse->getNumberOfSites() - 1, '=');
418  map<int, double> freq;
419  SiteTools::getFrequencies(sitesToAnalyse->getSite(i - 1), freq);
420  if (freq[-1] > gapFreq)
421  sitesToAnalyse->deleteSite(i - 1);
422  }
423  if (verbose)
424  ApplicationTools::displayTaskDone();
425  }
426  }
427  else
428  {
429  size_t gapNum = TextTools::to<size_t>(maxGapOption);
430  if (gapNum < sitesToAnalyse->getNumberOfSequences())
431  {
432  if (verbose)
433  ApplicationTools::displayTask("Remove sites with gaps", true);
434  for (size_t i = sitesToAnalyse->getNumberOfSites(); i > 0; i--)
435  {
436  if (verbose)
437  ApplicationTools::displayGauge(sitesToAnalyse->getNumberOfSites() - i, sitesToAnalyse->getNumberOfSites() - 1, '=');
438  map<int, size_t> counts;
439  SiteTools::getCounts(sitesToAnalyse->getSite(i - 1), counts);
440  if (counts[-1] > gapNum)
441  sitesToAnalyse->deleteSite(i - 1);
442  }
443  if (verbose)
444  ApplicationTools::displayTaskDone();
445  }
446  }
447 
448  string maxUnresolvedOption = ApplicationTools::getStringParameter("input.sequence.max_unresolved_allowed", params, "100%", suffix, suffixIsOptional, warn);
449 
450  int sAlph = static_cast<int>(sitesToAnalyse->getAlphabet()->getSize());
451 
452  if (maxUnresolvedOption[maxUnresolvedOption.size() - 1] == '%')
453  {
454  double unresolvedFreq = TextTools::toDouble(maxUnresolvedOption.substr(0, maxUnresolvedOption.size() - 1)) / 100.;
455  if (unresolvedFreq < 1)
456  {
457  if (verbose)
458  ApplicationTools::displayTask("Remove unresolved sites", true);
459  for (size_t i = sitesToAnalyse->getNumberOfSites(); i > 0; --i)
460  {
461  if (verbose)
462  ApplicationTools::displayGauge(sitesToAnalyse->getNumberOfSites() - i, sitesToAnalyse->getNumberOfSites() - 1, '=');
463  map<int, double> freq;
464  SiteTools::getFrequencies(sitesToAnalyse->getSite(i - 1), freq);
465  double x = 0;
466  for (int l = 0; l < sAlph; ++l)
467  {
468  x += freq[l];
469  }
470  if (1 - x > unresolvedFreq)
471  sitesToAnalyse->deleteSite(i - 1);
472  }
473  if (verbose)
474  ApplicationTools::displayTaskDone();
475  }
476  }
477  else
478  {
479  size_t nbSeq = sitesToAnalyse->getNumberOfSequences();
480  size_t unresolvedNum = TextTools::to<size_t>(maxUnresolvedOption);
481  if (unresolvedNum < nbSeq)
482  {
483  if (verbose)
484  ApplicationTools::displayTask("Remove sites with gaps", true);
485  for (size_t i = sitesToAnalyse->getNumberOfSites(); i > 0; i--)
486  {
487  if (verbose)
488  ApplicationTools::displayGauge(sitesToAnalyse->getNumberOfSites() - i, sitesToAnalyse->getNumberOfSites() - 1, '=');
489  map<int, size_t> counts;
490  SiteTools::getCounts(sitesToAnalyse->getSite(i - 1), counts);
491  size_t x = 0;
492  for (int l = 0; l < sAlph; l++)
493  {
494  x += counts[l];
495  }
496 
497  if (nbSeq - x > unresolvedNum)
498  sitesToAnalyse->deleteSite(i - 1);
499  }
500  if (verbose)
501  ApplicationTools::displayTaskDone();
502  }
503  }
504 
505  if (gapAsUnknown)
506  {
508  }
509  }
510  else if (option == "complete")
511  {
512  sitesToAnalyse = SiteContainerTools::getCompleteSites(allSites);
513  size_t nbSites = sitesToAnalyse->getNumberOfSites();
514  if (verbose)
515  ApplicationTools::displayResult("Complete sites", TextTools::toString(nbSites));
516  }
517  else if (option == "nogap")
518  {
519  sitesToAnalyse = SiteContainerTools::getSitesWithoutGaps(allSites);
520  size_t nbSites = sitesToAnalyse->getNumberOfSites();
521  if (verbose)
522  ApplicationTools::displayResult("Sites without gap", TextTools::toString(nbSites));
523  }
524  else
525  {
526  throw Exception("Option '" + option + "' unknown in parameter 'sequence.sites_to_use'.");
527  }
528 
529  const CodonAlphabet* ca = dynamic_cast<const CodonAlphabet*>(sitesToAnalyse->getAlphabet());
530  if (ca)
531  {
532  option = ApplicationTools::getStringParameter("input.sequence.remove_stop_codons", params, "no", suffix, true, warn);
533  if ((option != "") && verbose)
534  ApplicationTools::displayResult("Remove Stop Codons", option);
535 
536  if (option == "yes")
537  {
538  string codeDesc = ApplicationTools::getStringParameter("genetic_code", params, "Standard", "", true, warn);
539  auto_ptr<GeneticCode> gCode(getGeneticCode(ca->getNucleicAlphabet(), codeDesc));
540  sitesToAnalyse2 = dynamic_cast<VectorSiteContainer*>(SiteContainerTools::removeStopCodonSites(*sitesToAnalyse, *gCode));
541  delete sitesToAnalyse;
542  }
543  else
544  sitesToAnalyse2 = dynamic_cast<VectorSiteContainer*>(sitesToAnalyse);
545  }
546  else
547  sitesToAnalyse2 = dynamic_cast<VectorSiteContainer*>(sitesToAnalyse);
548 
549  return sitesToAnalyse2;
550 }
551 
552 /******************************************************************************/
553 
555  const SequenceContainer& sequences,
556  map<string, string>& params,
557  const string& suffix,
558  bool verbose,
559  int warn)
560 {
561  string sequenceFilePath = ApplicationTools::getAFilePath("output.sequence.file", params, true, false, suffix, false, "none", warn);
562  string sequenceFormat = ApplicationTools::getStringParameter("output.sequence.format", params, "Fasta", suffix, false, warn);
563  BppOSequenceWriterFormat bppoWriter(warn);
564  auto_ptr<OSequence> oSeq(bppoWriter.read(sequenceFormat));
565  if (verbose)
566  {
567  ApplicationTools::displayResult("Output sequence file " + suffix, sequenceFilePath);
568  ApplicationTools::displayResult("Output sequence format " + suffix, oSeq->getFormatName());
569  }
570 
571  // Write sequences:
572  oSeq->writeSequences(sequenceFilePath, sequences, true);
573 }
574 
575 /******************************************************************************/
576 
578  const SiteContainer& sequences,
579  map<string, string>& params,
580  const string& suffix,
581  bool verbose,
582  int warn)
583 {
584  string sequenceFilePath = ApplicationTools::getAFilePath("output.sequence.file", params, true, false, suffix, false, "none", warn);
585  string sequenceFormat = ApplicationTools::getStringParameter("output.sequence.format", params, "Fasta", suffix, false, warn);
586  BppOAlignmentWriterFormat bppoWriter(warn);
587  auto_ptr<OAlignment> oAln(bppoWriter.read(sequenceFormat));
588  if (verbose)
589  {
590  ApplicationTools::displayResult("Output alignment file " + suffix, sequenceFilePath);
591  ApplicationTools::displayResult("Output alignment format " + suffix, oAln->getFormatName());
592  }
593 
594  // Write sequences:
595  oAln->writeAlignment(sequenceFilePath, sequences, true);
596 }
597 
598 /******************************************************************************/
599 
Sequence I/O in BppO format.
This class implements the mold, protozoan, and coelenterate mitochondrial code and the Mycoplasma/Spi...
This class implements the Echinoderm and Faltworms Mitochondrial genetic code as describe on the NCBI...
ISequence * read(const std::string &description)
Read a ISequence object from a string.
static void getCounts(const SymbolList &list, std::map< int, size_t > &counts)
Count all states in the list.
Sequence I/O in BppO format.
void addSequence(const Sequence &sequence, bool checkName=true)
Add a sequence to the container.
This class implements the Invertebrate Mitochondrial genetic code as describe on the NCBI website: ht...
Definition: RNY.h:65
static GeneticCode * getGeneticCode(const NucleicAlphabet *alphabet, const std::string &description)
Build a GeneticCode object according to options.
The SiteContainer interface.
Definition: SiteContainer.h:63
AlphabetIndex2 I/O in BppO format.
const Sequence & getSequence(size_t sequenceIndex) const
Retrieve a sequence object from the container.
static Sequence * RNYslice(const Sequence &sequence, int ph)
Get the RNY decomposition of a DNA sequence; with a given phase between 1 and 3, it gives the decompo...
This alphabet is used to deal NumericAlphabet.
IAlignment * read(const std::string &description)
Read a IAlignment object from a string.
static void changeGapsToUnknownCharacters(SiteContainer &sites)
Change all gaps to unknown state in a container, according to its alphabet.
static bool isRNYAlphabet(const Alphabet *alphabet)
virtual unsigned int getSize() const =0
Get the number of resolved states in the alphabet (e.g. return 4 for DNA alphabet). This is the method you&#39;ll need in most cases.
The Alphabet interface.
Definition: Alphabet.h:130
virtual const std::map< std::string, std::string > & getUnparsedArguments() const
STL namespace.
static VectorSiteContainer * getSiteContainer(const Alphabet *alpha, std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a SiteContainer object according to options.
SequenceTools static class.
Definition: SequenceTools.h:97
static void getFrequencies(const SymbolList &list, std::map< int, double > &frequencies, bool resolveUnknowns=false)
Get all states frequencies in the list.
static SiteContainer * getCompleteSites(const SiteContainer &sites)
Retrieves complete sites from SiteContainer.
static AlphabetIndex2 * getAlphabetIndex2(const Alphabet *alphabet, const std::string &description, const std::string &message="Alphabet distance:", bool verbose=true)
Build a AlphabetIndex2 object for a given alphabet.
AlphabetIndex1 I/O in BppO format.
The base class for word alphabets.
Definition: WordAlphabet.h:66
static SiteContainer * removeStopCodonSites(const SiteContainer &sites, const GeneticCode &gCode)
Get a site set without stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception...
static SiteContainer * getSelectedSites(const SiteContainer &sequences, const SiteSelection &selection)
Create a new container with a specified set of sites.
This class implements the vertebrate mitochondrial genetic code as describe on the NCBI web site: htt...
This alphabet is used to deal with proteins.
This class implements the Invertebrate Mitochondrial genetic code as describe on the NCBI website: ht...
One dimensionnal alphabet index interface.
Codon alphabet class.
Definition: CodonAlphabet.h:63
Sequence I/O in BppO format.
This class implements the ascidian mitochondrial genetic code as describe on the NCBI web site: http:...
static bool isNucleicAlphabet(const Alphabet *alphabet)
The DefaultAlphabet class.
static SequenceContainer * getSequenceContainer(const Alphabet *alpha, std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a SequenceContainer object according to options.
OAlignment * read(const std::string &description)
Read a OAlignment object from a string.
virtual const Site & getSite(size_t siteIndex) const =0
Get a site from the container.
static void writeAlignmentFile(const SiteContainer &sequences, std::map< std::string, std::string > &params, const std::string &suffix="", bool verbose=true, int warn=1)
Write a sequence alignment file according to options.
OSequence * read(const std::string &description)
Read a OSequence object from a string.
Sequence I/O in BppO format.
size_t getNumberOfSites() const
Get the number of sites in the container.
AlphabetIndex2 * read(const std::string &description)
Read a AlphabetIndex1 object from a string.
void reindexSites()
Set all positions attributes.
Two dimensionnal alphabet index interface.
virtual size_t getNumberOfSites() const =0
Get the number of sites in the container.
This alphabet is used to deal with DNA sequences.
Definition: DNA.h:60
AlphabetIndex1 * read(const std::string &description)
Read a AlphabetIndex1 object from a string.
static AlphabetIndex1 * getAlphabetIndex1(const Alphabet *alphabet, const std::string &description, const std::string &message="Alphabet distance:", bool verbose=true)
Build a AlphabetIndex1 object for a given alphabet.
virtual const NucleicAlphabet *const getNucleicAlphabet() const
static VectorSiteContainer * getSitesToAnalyse(const SiteContainer &allSites, std::map< std::string, std::string > &params, std::string suffix="", bool suffixIsOptional=true, bool gapAsUnknown=true, bool verbose=true, int warn=1)
Retrieves sites suitable for the analysis.
The BinaryAlphabet class, letters are 0 and 1.
static SiteContainer * getSitesWithoutGaps(const SiteContainer &sites)
Retrieves sites without gaps from SiteContainer.
This alphabet is used to deal with RNA sequences.
Definition: RNA.h:58
virtual const Alphabet * getAlphabet() const =0
Get sequence container&#39;s alphabet.
Partial implementation of the Transliterator interface for genetic code object.
Definition: GeneticCode.h:79
static Alphabet * getAlphabet(std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, bool allowGeneric=false, int warn=1)
Build an Alphabet object according to options.
static void writeSequenceFile(const SequenceContainer &sequences, std::map< std::string, std::string > &params, const std::string &suffix="", bool verbose=true, int warn=1)
Write a sequence file according to options.
static SiteContainer * getSelectedSites(const SiteContainer &sequences, const std::string &setName)
Create a new container corresponding to a site set given in the mase+ format.
Definition: MaseTools.cpp:151
The SequenceContainer interface.
This class implements the standard genetic code as describe on the NCBI web site: http://www...
The abstract base class for nucleic alphabets.
The VectorSiteContainer class.
size_t getNumberOfSequences() const
Get the number of sequences in the container.
virtual size_t getNumberOfSequences() const =0
Get the number of sequences in the container.
virtual void deleteSite(size_t siteIndex)=0
Delete a site in the container.