42 #include "../Alphabet/BinaryAlphabet.h" 43 #include "../Alphabet/DefaultAlphabet.h" 44 #include "../Alphabet/CodonAlphabet.h" 45 #include "../Alphabet/AlphabetTools.h" 46 #include "../GeneticCode/EchinodermMitochondrialGeneticCode.h" 47 #include "../GeneticCode/InvertebrateMitochondrialGeneticCode.h" 48 #include "../GeneticCode/StandardGeneticCode.h" 49 #include "../GeneticCode/VertebrateMitochondrialGeneticCode.h" 50 #include "../GeneticCode/YeastMitochondrialGeneticCode.h" 51 #include "../GeneticCode/AscidianMitochondrialGeneticCode.h" 52 #include "../GeneticCode/MoldMitochondrialGeneticCode.h" 53 #include "../Io/BppOSequenceReaderFormat.h" 54 #include "../Io/BppOAlignmentReaderFormat.h" 55 #include "../Io/BppOSequenceWriterFormat.h" 56 #include "../Io/BppOAlignmentWriterFormat.h" 57 #include "../Io/BppOAlphabetIndex1Format.h" 58 #include "../Io/BppOAlphabetIndex2Format.h" 59 #include "../Io/MaseTools.h" 60 #include "../SiteTools.h" 61 #include "../SequenceTools.h" 62 #include <Bpp/App/ApplicationTools.h> 63 #include <Bpp/Text/TextTools.h> 64 #include <Bpp/Text/KeyvalTools.h> 65 #include <Bpp/App/NumCalcApplicationTools.h> 66 #include <Bpp/Numeric/Random/RandomTools.h> 74 map<string, string>& params,
76 bool suffixIsOptional,
79 int warn)
throw (Exception)
82 string alphtt = ApplicationTools::getStringParameter(
"alphabet", params,
"DNA", suffix, suffixIsOptional, warn);
85 map<string, string> args;
88 KeyvalTools::parseProcedure(alphtt, alphabet, args);
91 if (alphabet ==
"Word")
93 if (args.find(
"length") == args.end())
94 throw Exception(
"Missing length parameter for Word alphabet");
95 lg = TextTools::to<unsigned int>(args[
"length"]);
96 if (args.find(
"letter") == args.end())
97 throw Exception(
"Missing letter alphabet for Word alphabet");
98 alphabet = args[
"letter"];
101 else if (alphabet ==
"RNY")
103 if (args.find(
"letter") == args.end())
104 throw Exception(
"Missing letter alphabet for RNY alphabet");
105 alphabet = args[
"letter"];
109 if (alphabet ==
"Binary")
111 else if (alphabet ==
"DNA")
113 bool mark = ApplicationTools::getBooleanParameter(
"bangAsGap", args,
false,
"",
true, warn + 1);
114 chars =
new DNA(mark);
116 else if (alphabet ==
"RNA")
118 bool mark = ApplicationTools::getBooleanParameter(
"bangAsGap", args,
false,
"",
true, warn + 1);
119 chars =
new RNA(mark);
121 else if (alphabet ==
"Protein")
123 else if (allowGeneric && alphabet ==
"Generic")
125 else if (alphabet ==
"Codon")
127 if (args.find(
"letter") == args.end())
128 throw Exception(
"Missing 'letter' argument in Codon :" + alphabet);
129 if (args.find(
"type") != args.end())
130 throw Exception(
"'type' argument in Codon is deprecated and has been superseded by the 'genetic_code' option.");
132 string alphnDesc = ApplicationTools::getStringParameter(
"letter", args,
"RNA");
134 map<string, string> alphnArgs;
135 KeyvalTools::parseProcedure(alphnDesc, alphn, alphnArgs);
140 bool mark = ApplicationTools::getBooleanParameter(
"bangAsGap", alphnArgs,
false,
"",
true, warn + 1);
141 pnalph =
new RNA(mark);
143 else if (alphn ==
"DNA")
145 bool mark = ApplicationTools::getBooleanParameter(
"bangAsGap", alphnArgs,
false,
"",
true, warn + 1);
146 pnalph =
new DNA(mark);
149 throw Exception(
"Alphabet not known in Codon : " + alphn);
153 alphabet = alphabet +
"(" + alphn +
")";
156 throw Exception(
"Alphabet not known: " + alphabet);
162 for (
unsigned i = 0; i < lg; i++)
164 al += alphabet +
" ";
166 alphabet =
"Word(" + al +
")";
172 chars =
new RNY(*(dynamic_cast<NucleicAlphabet*>(chars)));
173 alphabet =
"RNY(" + alphabet +
")";
176 throw Exception(
"RNY needs a Nucleic Alphabet, instead of " + alphabet);
181 ApplicationTools::displayResult(
"Alphabet type ", alphabet);
189 const string& description)
throw (Exception)
192 if (description.find(
"EchinodermMitochondrial") != string::npos || description.find(
"9") != string::npos)
194 else if (description.find(
"InvertebrateMitochondrial") != string::npos || description.find(
"5") != string::npos)
196 else if (description.find(
"Standard") != string::npos || description.find(
"1") != string::npos)
198 else if (description.find(
"VertebrateMitochondrial") != string::npos || description.find(
"2") != string::npos)
200 else if (description.find(
"YeastMitochondrial") != string::npos || description.find(
"3") != string::npos)
202 else if (description.find(
"AscidianMitochondrial") != string::npos || description.find(
"13") != string::npos)
204 else if (description.find(
"MoldMitochondrial") != string::npos || description.find(
"4") != string::npos)
207 throw Exception(
"Unknown GeneticCode: " + description);
217 return reader.
read(description);
224 return reader.
read(description);
230 map<string, string>& params,
231 const string& suffix,
232 bool suffixIsOptional,
236 string sequenceFilePath = ApplicationTools::getAFilePath(
"input.sequence.file", params,
true,
true, suffix, suffixIsOptional,
"none", warn);
237 string sequenceFormat = ApplicationTools::getStringParameter(
"input.sequence.format", params,
"Fasta()", suffix, suffixIsOptional, warn);
239 auto_ptr<ISequence> iSeq(bppoReader.
read(sequenceFormat));
242 ApplicationTools::displayResult(
"Sequence file " + suffix, sequenceFilePath);
243 ApplicationTools::displayResult(
"Sequence format " + suffix, iSeq->getFormatName());
254 map<string, string>& params,
255 const string& suffix,
256 bool suffixIsOptional,
260 string sequenceFilePath = ApplicationTools::getAFilePath(
"input.sequence.file", params,
true,
true, suffix, suffixIsOptional,
"none", warn);
261 string sequenceFormat = ApplicationTools::getStringParameter(
"input.sequence.format", params,
"Fasta()", suffix, suffixIsOptional, warn);
263 auto_ptr<IAlignment> iAln(bppoReader.
read(sequenceFormat));
267 ApplicationTools::displayResult(
"Sequence file " + suffix, sequenceFilePath);
268 ApplicationTools::displayResult(
"Sequence format " + suffix, iAln->getFormatName());
273 alpha2 = &dynamic_cast<const RNY*>(alpha)->getLetterAlphabet();
277 const SequenceContainer* seqCont = iAln->readAlignment(sequenceFilePath, alpha2);
300 if (iAln->getFormatName() ==
"MASE file")
303 string siteSet = ApplicationTools::getStringParameter(
"siteSelection", args,
"none", suffix, suffixIsOptional, warn + 1);
304 if (siteSet !=
"none")
311 ApplicationTools::displayResult(
"Set found", TextTools::toString(siteSet) +
" sites.");
313 catch (IOException& ioe)
319 throw Exception(
"Site set '" + siteSet +
"' is empty.");
322 sites = selectedSites;
330 string siteSet = ApplicationTools::getStringParameter(
"input.site.selection", params,
"none", suffix, suffixIsOptional, warn + 1);
333 if (siteSet !=
"none")
335 vector<size_t> vSite;
337 vector<int> vSite1 = NumCalcApplicationTools::seqFromString(siteSet);
338 for (
size_t i = 0; i < vSite1.size(); ++i){
339 int x = (vSite1[i] >= 0 ? vSite1[i] :
static_cast<int>(nbSites) + vSite1[i]);
341 vSite.push_back(static_cast<size_t>(x-1));
343 throw Exception(
"SequenceApplicationTools::getSiteContainer(). Incorrect negative index: " + TextTools::toString(x));
350 map<string, string> selArgs;
351 KeyvalTools::parseProcedure(siteSet, seln, selArgs);
352 if (seln ==
"Sample")
354 size_t n = ApplicationTools::getParameter<size_t>(
"n", selArgs, nbSites,
"",
true, warn + 1);
355 bool replace = ApplicationTools::getBooleanParameter(
"replace", selArgs,
false,
"",
true, warn + 1);
359 for (
size_t p = 0; p < nbSites; ++p)
362 RandomTools::getSample(vPos, vSite, replace);
371 ApplicationTools::displayResult(
"Selected sites", TextTools::toString(siteSet));
375 throw Exception(
"Site set '" + siteSet +
"' is empty.");
378 sites = selectedSites;
388 map<string, string>& params,
390 bool suffixIsOptional,
399 string option = ApplicationTools::getStringParameter(
"input.sequence.sites_to_use", params,
"complete", suffix, suffixIsOptional, warn);
401 ApplicationTools::displayResult(
"Sites to use", option);
405 string maxGapOption = ApplicationTools::getStringParameter(
"input.sequence.max_gap_allowed", params,
"100%", suffix, suffixIsOptional, warn);
407 if (maxGapOption[maxGapOption.size() - 1] ==
'%')
409 double gapFreq = TextTools::toDouble(maxGapOption.substr(0, maxGapOption.size() - 1)) / 100.;
413 ApplicationTools::displayTask(
"Remove sites with gaps",
true);
418 map<int, double> freq;
420 if (freq[-1] > gapFreq)
424 ApplicationTools::displayTaskDone();
429 size_t gapNum = TextTools::to<size_t>(maxGapOption);
430 if (gapNum < sitesToAnalyse->getNumberOfSequences())
433 ApplicationTools::displayTask(
"Remove sites with gaps",
true);
438 map<int, size_t> counts;
440 if (counts[-1] > gapNum)
444 ApplicationTools::displayTaskDone();
448 string maxUnresolvedOption = ApplicationTools::getStringParameter(
"input.sequence.max_unresolved_allowed", params,
"100%", suffix, suffixIsOptional, warn);
452 if (maxUnresolvedOption[maxUnresolvedOption.size() - 1] ==
'%')
454 double unresolvedFreq = TextTools::toDouble(maxUnresolvedOption.substr(0, maxUnresolvedOption.size() - 1)) / 100.;
455 if (unresolvedFreq < 1)
458 ApplicationTools::displayTask(
"Remove unresolved sites",
true);
463 map<int, double> freq;
466 for (
int l = 0; l < sAlph; ++l)
470 if (1 - x > unresolvedFreq)
474 ApplicationTools::displayTaskDone();
480 size_t unresolvedNum = TextTools::to<size_t>(maxUnresolvedOption);
481 if (unresolvedNum < nbSeq)
484 ApplicationTools::displayTask(
"Remove sites with gaps",
true);
489 map<int, size_t> counts;
492 for (
int l = 0; l < sAlph; l++)
497 if (nbSeq - x > unresolvedNum)
501 ApplicationTools::displayTaskDone();
510 else if (option ==
"complete")
515 ApplicationTools::displayResult(
"Complete sites", TextTools::toString(nbSites));
517 else if (option ==
"nogap")
522 ApplicationTools::displayResult(
"Sites without gap", TextTools::toString(nbSites));
526 throw Exception(
"Option '" + option +
"' unknown in parameter 'sequence.sites_to_use'.");
532 option = ApplicationTools::getStringParameter(
"input.sequence.remove_stop_codons", params,
"no", suffix,
true, warn);
533 if ((option !=
"") && verbose)
534 ApplicationTools::displayResult(
"Remove Stop Codons", option);
538 string codeDesc = ApplicationTools::getStringParameter(
"genetic_code", params,
"Standard",
"",
true, warn);
541 delete sitesToAnalyse;
549 return sitesToAnalyse2;
556 map<string, string>& params,
557 const string& suffix,
561 string sequenceFilePath = ApplicationTools::getAFilePath(
"output.sequence.file", params,
true,
false, suffix,
false,
"none", warn);
562 string sequenceFormat = ApplicationTools::getStringParameter(
"output.sequence.format", params,
"Fasta", suffix,
false, warn);
564 auto_ptr<OSequence> oSeq(bppoWriter.
read(sequenceFormat));
567 ApplicationTools::displayResult(
"Output sequence file " + suffix, sequenceFilePath);
568 ApplicationTools::displayResult(
"Output sequence format " + suffix, oSeq->getFormatName());
572 oSeq->writeSequences(sequenceFilePath, sequences,
true);
579 map<string, string>& params,
580 const string& suffix,
584 string sequenceFilePath = ApplicationTools::getAFilePath(
"output.sequence.file", params,
true,
false, suffix,
false,
"none", warn);
585 string sequenceFormat = ApplicationTools::getStringParameter(
"output.sequence.format", params,
"Fasta", suffix,
false, warn);
587 auto_ptr<OAlignment> oAln(bppoWriter.
read(sequenceFormat));
590 ApplicationTools::displayResult(
"Output alignment file " + suffix, sequenceFilePath);
591 ApplicationTools::displayResult(
"Output alignment format " + suffix, oAln->getFormatName());
595 oAln->writeAlignment(sequenceFilePath, sequences,
true);
This class implements the mold, protozoan, and coelenterate mitochondrial code and the Mycoplasma/Spi...
This class implements the Echinoderm and Faltworms Mitochondrial genetic code as describe on the NCBI...
void addSequence(const Sequence &sequence, bool checkName=true)
Add a sequence to the container.
This class implements the Invertebrate Mitochondrial genetic code as describe on the NCBI website: ht...
The SiteContainer interface.
const Sequence & getSequence(size_t sequenceIndex) const
Retrieve a sequence object from the container.
This alphabet is used to deal NumericAlphabet.
virtual unsigned int getSize() const =0
Get the number of resolved states in the alphabet (e.g. return 4 for DNA alphabet). This is the method you'll need in most cases.
The base class for word alphabets.
This class implements the vertebrate mitochondrial genetic code as describe on the NCBI web site: htt...
This alphabet is used to deal with proteins.
This class implements the Invertebrate Mitochondrial genetic code as describe on the NCBI website: ht...
One dimensionnal alphabet index interface.
This class implements the ascidian mitochondrial genetic code as describe on the NCBI web site: http:...
The DefaultAlphabet class.
virtual const Site & getSite(size_t siteIndex) const =0
Get a site from the container.
size_t getNumberOfSites() const
Get the number of sites in the container.
void reindexSites()
Set all positions attributes.
Two dimensionnal alphabet index interface.
virtual size_t getNumberOfSites() const =0
Get the number of sites in the container.
This alphabet is used to deal with DNA sequences.
virtual const NucleicAlphabet *const getNucleicAlphabet() const
The BinaryAlphabet class, letters are 0 and 1.
This alphabet is used to deal with RNA sequences.
virtual const Alphabet * getAlphabet() const =0
Get sequence container's alphabet.
Partial implementation of the Transliterator interface for genetic code object.
The SequenceContainer interface.
This class implements the standard genetic code as describe on the NCBI web site: http://www...
The abstract base class for nucleic alphabets.
The VectorSiteContainer class.
size_t getNumberOfSequences() const
Get the number of sequences in the container.
virtual size_t getNumberOfSequences() const =0
Get the number of sequences in the container.
virtual void deleteSite(size_t siteIndex)=0
Delete a site in the container.