bpp-seq
2.2.0
|
SequenceTools static class. More...
#include <Bpp/Seq/SequenceTools.h>
Public Member Functions | |
SequenceTools () | |
virtual | ~SequenceTools () |
Static Public Member Functions | |
static Sequence * | subseq (const Sequence &sequence, size_t begin, size_t end) throw (IndexOutOfBoundsException, Exception) |
Get a sub-sequence. More... | |
static Sequence * | concatenate (const Sequence &seq1, const Sequence &seq2) throw (AlphabetMismatchException, Exception) |
Concatenate two sequences. More... | |
static Sequence & | complement (Sequence &seq) throw (AlphabetException) |
Complement the nucleotide sequence itself. More... | |
static Sequence * | getComplement (const Sequence &sequence) throw (AlphabetException) |
Get the complementary sequence of a nucleotide sequence. More... | |
static Sequence * | transcript (const Sequence &sequence) throw (AlphabetException) |
Get the transcription sequence of a DNA sequence. More... | |
static Sequence * | reverseTranscript (const Sequence &sequence) throw (AlphabetException) |
Get the reverse-transcription sequence of a RNA sequence. More... | |
static Sequence & | invert (Sequence &seq) |
Inverse a sequence from 5'->3' to 3'->5' and vice-versa. More... | |
static Sequence * | getInvert (const Sequence &sequence) |
Inverse a sequence from 5'->3' to 3'->5' and vice-versa. More... | |
static Sequence & | invertComplement (Sequence &seq) |
Inverse and complement a sequence. More... | |
static double | getPercentIdentity (const Sequence &seq1, const Sequence &seq2, bool ignoreGaps=false) throw (AlphabetMismatchException, SequenceNotAlignedException) |
static size_t | getNumberOfSites (const Sequence &seq) |
static size_t | getNumberOfCompleteSites (const Sequence &seq) |
static Sequence * | getSequenceWithCompleteSites (const Sequence &seq) |
keep only complete sites in a sequence. More... | |
static size_t | getNumberOfUnresolvedSites (const Sequence &seq) |
static void | removeGaps (Sequence &seq) |
Remove gaps from a sequence. More... | |
static Sequence * | getSequenceWithoutGaps (const Sequence &seq) |
Get a copy of the sequence without gaps. More... | |
static void | removeStops (Sequence &seq, const GeneticCode &gCode) throw (Exception) |
Remove stops from a codon sequence. More... | |
static Sequence * | getSequenceWithoutStops (const Sequence &seq, const GeneticCode &gCode) throw (Exception) |
Get a copy of the codon sequence without stops. More... | |
static void | replaceStopsWithGaps (Sequence &seq, const GeneticCode &gCode) throw (Exception) |
Replace stop codons by gaps. More... | |
static BowkerTest * | bowkerTest (const Sequence &seq1, const Sequence &seq2) throw (SequenceNotAlignedException) |
Bowker's test for homogeneity. More... | |
static void | getPutativeHaplotypes (const Sequence &seq, std::vector< Sequence *> &hap, unsigned int level=2) |
Get all putatives haplotypes from an heterozygous sequence. More... | |
static Sequence * | combineSequences (const Sequence &s1, const Sequence &s2) throw (AlphabetMismatchException) |
Combine two sequences. More... | |
static Sequence * | subtractHaplotype (const Sequence &s, const Sequence &h, std::string name="", unsigned int level=1) throw (SequenceNotAlignedException) |
Subtract haplotype from an heterozygous sequence. More... | |
static Sequence * | RNYslice (const Sequence &sequence, int ph) throw (AlphabetException) |
Get the RNY decomposition of a DNA sequence; with a given phase between 1 and 3, it gives the decomposition in this phase; in phase 1, the first triplet is centered on the first character. Without a phase the function gives the alternative succession in phases 1, 2 and 3. More... | |
static Sequence * | RNYslice (const Sequence &sequence) throw (AlphabetException) |
static void | getCDS (Sequence &sequence, const GeneticCode &gCode, bool checkInit, bool checkStop, bool includeInit=true, bool includeStop=true) |
Extract CDS part from a codon sequence. Optionally check for intiator and stop codons, or both. More... | |
static size_t | findFirstOf (const Sequence &seq, const Sequence &motif, bool strict=true) |
Find the position of a motif in a sequence. More... | |
static Sequence * | getRandomSequence (const Alphabet *alphabet, size_t length) |
Get a random sequence of given size and alphabet, with all state with equal probability. More... | |
static void | getCounts (const SymbolList &list, std::map< int, size_t > &counts) |
Count all states in the list. More... | |
static void | getCounts (const SymbolList &list1, const SymbolList &list2, std::map< int, std::map< int, size_t > > &counts) throw (DimensionException) |
Count all pair of states for two lists of the same size. More... | |
static void | getCounts (const SymbolList &list, std::map< int, double > &counts, bool resolveUnknowns) |
Count all states in the list, optionaly resolving unknown characters. More... | |
static void | getCounts (const SymbolList &list1, const SymbolList &list2, std::map< int, std::map< int, double > > &counts, bool resolveUnknowns) throw (DimensionException) |
Count all pair of states for two lists of the same size, optionaly resolving unknown characters. More... | |
static void | getFrequencies (const SymbolList &list, std::map< int, double > &frequencies, bool resolveUnknowns=false) |
Get all states frequencies in the list. More... | |
static void | getFrequencies (const SymbolList &list1, const SymbolList &list2, std::map< int, std::map< int, double > > &frequencies, bool resolveUnknowns=false) throw (DimensionException) |
Get all state pairs frequencies for two lists of the same size.. More... | |
static double | getGCContent (const SymbolList &list, bool ignoreUnresolved=true, bool ignoreGap=true) throw (AlphabetException) |
Get the GC content of a symbol list. More... | |
static size_t | getNumberOfDistinctPositions (const SymbolList &l1, const SymbolList &l2) throw (AlphabetMismatchException) |
Get the number of distinct positions. More... | |
static size_t | getNumberOfPositionsWithoutGap (const SymbolList &l1, const SymbolList &l2) throw (AlphabetMismatchException) |
Get the number of positions without gap. More... | |
static void | changeGapsToUnknownCharacters (SymbolList &l) |
Change all gap elements to unknown characters. More... | |
static void | changeUnresolvedCharactersToGaps (SymbolList &l) |
Change all unknown characters to gap elements. More... | |
Static Private Attributes | |
static DNA | _DNA |
static RNA | _RNA |
static RNY | _RNY |
static NucleicAcidsReplication | _DNARep |
static NucleicAcidsReplication | _RNARep |
static NucleicAcidsReplication | _transc |
SequenceTools static class.
Implement methods to manipulate sequences
Definition at line 97 of file SequenceTools.h.
|
inline |
Definition at line 109 of file SequenceTools.h.
|
inlinevirtual |
Definition at line 110 of file SequenceTools.h.
|
static |
Bowker's test for homogeneity.
Computes the contingency table of occurrence of all pairs of states and test its symmetry using Bowker's (1948) test.
Reference:
seq1 | The first sequence. |
seq2 | The second sequence. |
SequenceNotAlignedException | If the two sequences do not have the same length. |
Definition at line 412 of file SequenceTools.cpp.
References bpp::Alphabet::getSize(), bpp::Alphabet::isGap(), bpp::Alphabet::isUnresolved(), bpp::BowkerTest::setPValue(), and bpp::BowkerTest::setStatistic().
|
staticinherited |
Change all gap elements to unknown characters.
l | The input list of characters. |
Definition at line 180 of file SymbolListTools.cpp.
References bpp::SymbolList::getAlphabet(), bpp::Alphabet::getUnknownCharacterCode(), bpp::Alphabet::isGap(), and bpp::SymbolList::size().
|
staticinherited |
Change all unknown characters to gap elements.
l | The input list of characters. |
Definition at line 189 of file SymbolListTools.cpp.
References bpp::SymbolList::getAlphabet(), bpp::Alphabet::getGapCharacterCode(), bpp::Alphabet::isUnresolved(), and bpp::SymbolList::size().
|
static |
Combine two sequences.
Definition at line 516 of file SequenceTools.cpp.
References bpp::Alphabet::getGeneric().
|
static |
Complement the nucleotide sequence itself.
seq | The sequence to be complemented. |
AlphabetException | if the sequence is not a nucleotide sequence. |
Definition at line 108 of file SequenceTools.cpp.
References bpp::NucleicAcidsReplication::translate().
|
static |
Concatenate two sequences.
Sequences must have the same name and alphabets. Only first sequence's commentaries are kept.
seq1 | The first sequence. |
seq2 | The second sequence. |
AlphabetMismatchException | If the two alphabets do not match. |
Exception | If the sequence names do not match. |
Definition at line 89 of file SequenceTools.cpp.
|
static |
Find the position of a motif in a sequence.
seq | The reference sequence |
motif | The motif to find |
strict | If true (default) find exactly the motif If false find compatible match |
Definition at line 684 of file SequenceTools.cpp.
References bpp::SymbolList::getAlphabet(), bpp::SymbolList::getValue(), bpp::AlphabetTools::match(), and bpp::SymbolList::size().
|
static |
Extract CDS part from a codon sequence. Optionally check for intiator and stop codons, or both.
sequence | The sequence to be reduced to CDS part. |
gCode | The genetic code according to which start and stop codons are specified. |
checkInit | If true, then everything before the initiator codon will be removed, together with the initiator codon if includeInit is false. |
checkStop | If true, then everything after the first stop codon will be removed, together with the stop codon if includeStop is false. |
includeInit | Tell if initiator codon should be kept or removed. No effect if checkInit is false. |
includeStop | Tell if stop codon should be kept or removed. No effect if checkStop is false. |
Definition at line 655 of file SequenceTools.cpp.
References bpp::SymbolList::deleteElement(), bpp::SymbolList::getAlphabet(), bpp::GeneticCode::isStart(), bpp::GeneticCode::isStop(), and bpp::SymbolList::size().
|
static |
Get the complementary sequence of a nucleotide sequence.
sequence | The sequence to complement. |
AlphabetException | If the sequence is not a nucleotide sequence. |
Definition at line 133 of file SequenceTools.cpp.
References bpp::NucleicAcidsReplication::translate().
|
inlinestaticinherited |
Count all states in the list.
list | The list. |
counts | The output map to store the counts (existing counts will be incremented). |
Definition at line 70 of file SymbolListTools.h.
References bpp::SymbolList::getContent().
Referenced by bpp::SiteTools::getNumberOfDistinctCharacters(), bpp::SequenceApplicationTools::getSitesToAnalyse(), bpp::SiteTools::isParsimonyInformativeSite(), and bpp::CodonSiteTools::numberOfNonSynonymousSubstitutions().
|
inlinestaticinherited |
Count all pair of states for two lists of the same size.
NB: The two lists do node need to share the same alphabet! The states of the first list will be used as the first index in the output, and the ones from the second list as the second index.
list1 | The first list. |
list2 | The second list. |
counts | The output map to store the counts (existing counts will be incremented). |
Definition at line 90 of file SymbolListTools.h.
|
staticinherited |
Count all states in the list, optionaly resolving unknown characters.
For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4.
list | The list. |
counts | The output map to store the counts (existing ocunts will be incremented). |
resolveUnknowns | Tell is unknown characters must be resolved. For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. |
Definition at line 51 of file SymbolListTools.cpp.
References bpp::Alphabet::getAlias(), bpp::SymbolList::getAlphabet(), and bpp::SymbolList::getContent().
|
staticinherited |
Count all pair of states for two lists of the same size, optionaly resolving unknown characters.
For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4.
NB: The two lists do node need to share the same alphabet! The states of the first list will be used as the first index in the output, and the ones from the second list as the second index.
list1 | The first list. |
list2 | The second list. |
counts | The output map to store the counts (existing ocunts will be incremented). |
resolveUnknowns | Tell is unknown characters must be resolved. For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. |
Definition at line 73 of file SymbolListTools.cpp.
|
staticinherited |
Get all states frequencies in the list.
list | The list. |
resolveUnknowns | Tell is unknown characters must be resolved. For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. |
frequencies | The output map with all states and corresponding frequencies. Existing frequencies will be erased if any. |
Definition at line 96 of file SymbolListTools.cpp.
References bpp::SymbolList::size().
Referenced by bpp::CodonSiteTools::generateCodonSiteWithoutRareVariant(), bpp::SiteContainerTools::getConsensus(), bpp::SequenceApplicationTools::getSitesToAnalyse(), bpp::CodonSiteTools::meanNumberOfSynonymousPositions(), bpp::CodonSiteTools::piNonSynonymous(), bpp::CodonSiteTools::piSynonymous(), and bpp::SiteContainerTools::removeGapSites().
|
staticinherited |
Get all state pairs frequencies for two lists of the same size..
list1 | The first list. |
list2 | The second list. |
resolveUnknowns | Tell is unknown characters must be resolved. For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. |
frequencies | The output map with all state pairs and corresponding frequencies. Existing frequencies will be erased if any. |
Definition at line 107 of file SymbolListTools.cpp.
|
staticinherited |
Get the GC content of a symbol list.
list | The list. |
ignoreUnresolved | Do not count unresolved states. Otherwise, weight by each state probability in case of ambiguity (e.g. the R state counts for 0.5). |
ignoreGap | Do not count gaps in total. |
AlphabetException | If the list is not made of nucleotide states. |
Definition at line 119 of file SymbolListTools.cpp.
Inverse a sequence from 5'->3' to 3'->5' and vice-versa.
ABCDEF becomes FEDCBA, and the sense attribute is changed (may be inhibited).
sequence | The sequence to inverse. |
Definition at line 198 of file SequenceTools.cpp.
References bpp::Sequence::clone().
|
static |
seq | The sequence to analyse. |
Definition at line 293 of file SequenceTools.cpp.
References bpp::SymbolList::getAlphabet(), bpp::Alphabet::isGap(), bpp::Alphabet::isUnresolved(), and bpp::SymbolList::size().
|
staticinherited |
Get the number of distinct positions.
The comparison in achieved from position 0 to the minimum size of the two vectors.
l1 | SymbolList 1. |
l2 | SymbolList 2. |
AlphabetMismatchException | if the two lists have not the same alphabet type. |
Definition at line 158 of file SymbolListTools.cpp.
|
staticinherited |
Get the number of positions without gap.
The comparison in achieved from position 0 to the minimum size of the two vectors.
l1 | SymbolList 1. |
l2 | SymbolList 2. |
AlphabetMismatchException | if the two lists have not the same alphabet type. |
Definition at line 169 of file SymbolListTools.cpp.
|
static |
seq | The sequence to analyse. |
Definition at line 279 of file SequenceTools.cpp.
References bpp::SymbolList::getAlphabet(), bpp::Alphabet::isGap(), and bpp::SymbolList::size().
|
static |
seq | The sequence to analyse. |
Definition at line 323 of file SequenceTools.cpp.
References bpp::SymbolList::getAlphabet(), bpp::Alphabet::isUnresolved(), and bpp::SymbolList::size().
|
static |
seq1 | The first sequence. |
seq2 | The second sequence. |
ignoreGaps | If true, only positions without gaps will be used for the counting. |
AlphabetMismatchException | If the two sequences do not have the same alphabet. |
SequenceNotAlignedException | If the two sequences do not have the same length. |
Definition at line 245 of file SequenceTools.cpp.
|
static |
Get all putatives haplotypes from an heterozygous sequence.
seq | The sequence to resolve |
hap | The vector to fill with the new sequences |
level | The maximum number of states that a generic char must code (if this number is higher than level, the state will not be resolved). For instance if level = 3 and Alphabet is DNA, all generic char will be resolved but N. |
Definition at line 463 of file SequenceTools.cpp.
References bpp::SymbolList::addElement(), bpp::Alphabet::getAlias(), bpp::SymbolList::getAlphabet(), bpp::Alphabet::getGapCharacterCode(), bpp::Sequence::getName(), bpp::Sequence::setName(), and bpp::SymbolList::size().
Get a random sequence of given size and alphabet, with all state with equal probability.
alphabet | The alphabet to use. |
length | The length of the sequence to generate. |
Definition at line 716 of file SequenceTools.cpp.
References bpp::Alphabet::getSize().
keep only complete sites in a sequence.
The deleteElement method of the Sequence object will be used where appropriate.
seq | The sequence to analyse. |
Definition at line 307 of file SequenceTools.cpp.
References bpp::Sequence::clone(), bpp::SymbolList::getAlphabet(), bpp::Alphabet::isGap(), bpp::Alphabet::isUnresolved(), bpp::Sequence::setContent(), and bpp::SymbolList::size().
Get a copy of the sequence without gaps.
A whole new sequence will be created by adding all non-gap positions. The original sequence will be cloned to serve as a template.
seq | The sequence to analyse. |
Definition at line 337 of file SequenceTools.cpp.
References bpp::Sequence::clone(), bpp::SymbolList::getAlphabet(), bpp::Alphabet::isGap(), bpp::Sequence::setContent(), and bpp::SymbolList::size().
|
static |
Get a copy of the codon sequence without stops.
A whole new sequence will be created by adding all non-stop positions. The original sequence will be cloned to serve as a template.
seq | The sequence to analyse. |
gCode | The genetic code according to which stop codons are specified. |
Exception | if the input sequence does not have a codon alphabet. |
Definition at line 365 of file SequenceTools.cpp.
References bpp::Sequence::setContent().
Inverse a sequence from 5'->3' to 3'->5' and vice-versa.
ABCDEF becomes FEDCBA, and the sense attribute is changed (may be inhibited).
seq | The sequence to inverse. |
Definition at line 181 of file SequenceTools.cpp.
References bpp::SymbolList::getValue(), bpp::SymbolList::setElement(), and bpp::SymbolList::size().
Inverse and complement a sequence.
This methode is more accurate than calling invert and complement separatly.
seq | The sequence to inverse and complement. |
Definition at line 207 of file SequenceTools.cpp.
References bpp::SymbolList::getAlphabet(), bpp::Alphabet::getAlphabetType(), bpp::SymbolList::getValue(), bpp::SymbolList::setElement(), bpp::SymbolList::size(), and bpp::NucleicAcidsReplication::translate().
|
static |
Remove gaps from a sequence.
The deleteElement method of the Sequence object will be used where appropriate.
seq | The sequence to analyse. |
Definition at line 353 of file SequenceTools.cpp.
References bpp::SymbolList::deleteElement(), bpp::SymbolList::getAlphabet(), bpp::Alphabet::isGap(), and bpp::SymbolList::size().
Referenced by bpp::SiteContainerTools::alignNW().
|
static |
Remove stops from a codon sequence.
The deleteElement method of the Sequence object will be used where appropriate.
seq | The sequence to analyse. |
gCode | The genetic code according to which stop codons are specified. |
Exception | if the input sequence does not have a codon alphabet. |
Definition at line 383 of file SequenceTools.cpp.
|
static |
Replace stop codons by gaps.
The setElement method of the Sequence object will be used where appropriate.
seq | The sequence to analyse. |
gCode | The genetic code according to which stop codons are specified. |
Exception | if the input sequence does not have a codon alphabet. |
Definition at line 397 of file SequenceTools.cpp.
References bpp::AbstractAlphabet::getGapCharacterCode().
|
static |
Get the reverse-transcription sequence of a RNA sequence.
Translate RNA sequence into DNA sequence.
sequence | The sequence to reverse-transcript. |
AlphabetException | If the sequence is not a RNA sequence. |
Definition at line 168 of file SequenceTools.cpp.
|
static |
Get the RNY decomposition of a DNA sequence; with a given phase between 1 and 3, it gives the decomposition in this phase; in phase 1, the first triplet is centered on the first character. Without a phase the function gives the alternative succession in phases 1, 2 and 3.
sequence | The sequence to transcript. |
ph | The phase to use (1,2 or 3). |
AlphabetException | If the sequence is not a DNA sequence. |
Definition at line 575 of file SequenceTools.cpp.
Referenced by bpp::SequenceApplicationTools::getSiteContainer().
|
static |
Definition at line 617 of file SequenceTools.cpp.
|
static |
Get a sub-sequence.
sequence | The sequence to trunc. |
begin | The first position of the subsequence. |
end | The last position of the subsequence. |
IndexOutOfBoundsException,Exception | In case of bad indices. |
Definition at line 68 of file SequenceTools.cpp.
Referenced by bpp::GeneticCode::getCodingSequence().
|
static |
Subtract haplotype from an heterozygous sequence.
Subtract an haplotype (i.e. a fully resolved sequence) from an heterozygous sequence to get the other haplotype. The new haplotype could be an unresolved sequence if unresolved characters in the sequence code for more than 2 states.
For example:
s | The heterozygous sequence. |
h | The haplotype to subtract. |
name | The name of the new computed haplotype. |
level | The number of states from which the site is set to fully unresolved. |
SequenceNotAlignedException | if s and h don't have the same size. |
Definition at line 541 of file SequenceTools.cpp.
References bpp::Alphabet::getAlias(), bpp::Alphabet::getGeneric(), bpp::Alphabet::getName(), bpp::Alphabet::getUnknownCharacterCode(), bpp::Alphabet::intToChar(), and bpp::Alphabet::isUnresolved().
|
static |
Get the transcription sequence of a DNA sequence.
Translate DNA sequence into RNA sequence.
sequence | The sequence to transcript. |
AlphabetException | If the sequence is not a DNA sequence. |
Definition at line 155 of file SequenceTools.cpp.
|
staticprivate |
Definition at line 101 of file SequenceTools.h.
|
staticprivate |
Definition at line 104 of file SequenceTools.h.
|
staticprivate |
Definition at line 102 of file SequenceTools.h.
|
staticprivate |
Definition at line 105 of file SequenceTools.h.
|
staticprivate |
Definition at line 103 of file SequenceTools.h.
|
staticprivate |
Definition at line 106 of file SequenceTools.h.