bpp-seq  2.2.0
SequenceTools.h
Go to the documentation of this file.
1 //
2 // File: SequenceTools.h
3 // Authors: Guillaume Deuchst
4 // Julien Dutheil
5 // Sylvain Gaillard
6 // Created on: Tue Aug 21 2003
7 //
8 
9 /*
10  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
11 
12  This software is a computer program whose purpose is to provide classes
13  for sequences analysis.
14 
15  This software is governed by the CeCILL license under French law and
16  abiding by the rules of distribution of free software. You can use,
17  modify and/ or redistribute the software under the terms of the CeCILL
18  license as circulated by CEA, CNRS and INRIA at the following URL
19  "http://www.cecill.info".
20 
21  As a counterpart to the access to the source code and rights to copy,
22  modify and redistribute granted by the license, users are provided only
23  with a limited warranty and the software's author, the holder of the
24  economic rights, and the successive licensors have only limited
25  liability.
26 
27  In this respect, the user's attention is drawn to the risks associated
28  with loading, using, modifying and/or developing or reproducing the
29  software by the user in light of its specific status of free software,
30  that may mean that it is complicated to manipulate, and that also
31  therefore means that it is reserved for developers and experienced
32  professionals having in-depth computer knowledge. Users are therefore
33  encouraged to load and test the software's suitability as regards their
34  requirements in conditions enabling the security of their systems and/or
35  data to be ensured and, more generally, to use and operate it in the
36  same conditions as regards security.
37 
38  The fact that you are presently reading this means that you have had
39  knowledge of the CeCILL license and that you accept its terms.
40  */
41 
42 #ifndef _SEQUENCETOOLS_H_
43 #define _SEQUENCETOOLS_H_
44 
45 #include "Alphabet/Alphabet.h"
46 #include "Alphabet/DNA.h"
47 #include "Alphabet/RNA.h"
48 #include "Alphabet/RNY.h"
50 #include "Sequence.h"
51 #include "SymbolListTools.h"
53 #include <Bpp/Exceptions.h>
54 #include <Bpp/Numeric/Random/RandomTools.h>
55 #include <Bpp/Numeric/Stat/StatTest.h>
56 
57 // From the STL:
58 #include <string>
59 #include <map>
60 #include <vector>
61 #include <algorithm>
62 
63 namespace bpp
64 {
68 class BowkerTest :
69  public StatTest
70 {
71 private:
72  double pvalue_;
73  double stat_;
74 
75 public:
77  stat_(0.) {}
78 
79  virtual ~BowkerTest() {}
80 
81  BowkerTest* clone() const { return new BowkerTest(*this); }
82 
83 public:
84  std::string getName() const { return "Bowker's test for homogeneity."; }
85  double getStatistic() const { return stat_; }
86  double getPValue() const { return pvalue_; }
87 
88  void setStatistic(double stat) { stat_ = stat; }
89  void setPValue(double pvalue) { pvalue_ = pvalue; }
90 };
91 
98  public SymbolListTools
99 {
100 private:
101  static DNA _DNA;
102  static RNA _RNA;
103  static RNY _RNY;
107 
108 public:
110  virtual ~SequenceTools() {}
111 
112 public:
122  static Sequence* subseq(const Sequence& sequence, size_t begin, size_t end) throw (IndexOutOfBoundsException, Exception);
123 
136  static Sequence* concatenate(const Sequence& seq1, const Sequence& seq2)
137  throw (AlphabetMismatchException, Exception);
138 
147  static Sequence& complement(Sequence& seq) throw (AlphabetException);
148 
157  static Sequence* getComplement(const Sequence& sequence) throw (AlphabetException);
158 
169  static Sequence* transcript(const Sequence& sequence) throw (AlphabetException);
170 
181  static Sequence* reverseTranscript(const Sequence& sequence) throw (AlphabetException);
182 
193  static Sequence& invert(Sequence& seq);
194 
205  static Sequence* getInvert(const Sequence& sequence);
206 
217  static Sequence& invertComplement(Sequence& seq);
218 
228  static double getPercentIdentity(const Sequence& seq1, const Sequence& seq2, bool ignoreGaps = false) throw (AlphabetMismatchException, SequenceNotAlignedException);
229 
235  static size_t getNumberOfSites(const Sequence& seq);
236 
242  static size_t getNumberOfCompleteSites(const Sequence& seq);
243 
251  static Sequence* getSequenceWithCompleteSites(const Sequence& seq);
252 
261  static size_t getNumberOfUnresolvedSites(const Sequence& seq);
262 
263 
270  static void removeGaps(Sequence& seq);
271 
281  static Sequence* getSequenceWithoutGaps(const Sequence& seq);
282 
291  static void removeStops(Sequence& seq, const GeneticCode& gCode) throw (Exception);
292 
304  static Sequence* getSequenceWithoutStops(const Sequence& seq, const GeneticCode& gCode) throw (Exception);
305 
314  static void replaceStopsWithGaps(Sequence& seq, const GeneticCode& gCode) throw (Exception);
315 
331  static BowkerTest* bowkerTest(const Sequence& seq1, const Sequence& seq2) throw (SequenceNotAlignedException);
332 
345  static void getPutativeHaplotypes(const Sequence& seq, std::vector<Sequence*>& hap, unsigned int level = 2);
346 
353  static Sequence* combineSequences(const Sequence& s1, const Sequence& s2) throw (AlphabetMismatchException);
354 
380  static Sequence* subtractHaplotype(const Sequence& s, const Sequence& h, std::string name = "", unsigned int level = 1) throw (SequenceNotAlignedException);
381 
396  static Sequence* RNYslice(const Sequence& sequence, int ph) throw (AlphabetException);
397  static Sequence* RNYslice(const Sequence& sequence) throw (AlphabetException);
398 
409  static void getCDS(Sequence& sequence, const GeneticCode& gCode, bool checkInit, bool checkStop, bool includeInit = true, bool includeStop = true);
410 
421  static size_t findFirstOf(const Sequence& seq, const Sequence& motif, bool strict = true);
422 
430  static Sequence* getRandomSequence(const Alphabet* alphabet, size_t length);
431 };
432 } // end of namespace bpp.
433 
434 #endif // _SEQUENCETOOLS_H_
435 
static Sequence * getSequenceWithoutStops(const Sequence &seq, const GeneticCode &gCode)
Get a copy of the codon sequence without stops.
static size_t getNumberOfUnresolvedSites(const Sequence &seq)
static Sequence * concatenate(const Sequence &seq1, const Sequence &seq2)
Concatenate two sequences.
Definition: RNY.h:65
static void getPutativeHaplotypes(const Sequence &seq, std::vector< Sequence *> &hap, unsigned int level=2)
Get all putatives haplotypes from an heterozygous sequence.
BowkerTest * clone() const
Definition: SequenceTools.h:81
static Sequence * RNYslice(const Sequence &sequence, int ph)
Get the RNY decomposition of a DNA sequence; with a given phase between 1 and 3, it gives the decompo...
This alphabet is used to deal NumericAlphabet.
static Sequence * getInvert(const Sequence &sequence)
Inverse a sequence from 5&#39;->3&#39; to 3&#39;->5&#39; and vice-versa.
static void removeGaps(Sequence &seq)
Remove gaps from a sequence.
static Sequence * getComplement(const Sequence &sequence)
Get the complementary sequence of a nucleotide sequence.
std::string getName() const
Definition: SequenceTools.h:84
static size_t getNumberOfSites(const Sequence &seq)
The Alphabet interface.
Definition: Alphabet.h:130
STL namespace.
static Sequence * getSequenceWithoutGaps(const Sequence &seq)
Get a copy of the sequence without gaps.
SequenceTools static class.
Definition: SequenceTools.h:97
static size_t getNumberOfCompleteSites(const Sequence &seq)
static void getCDS(Sequence &sequence, const GeneticCode &gCode, bool checkInit, bool checkStop, bool includeInit=true, bool includeStop=true)
Extract CDS part from a codon sequence. Optionally check for intiator and stop codons, or both.
static Sequence * subtractHaplotype(const Sequence &s, const Sequence &h, std::string name="", unsigned int level=1)
Subtract haplotype from an heterozygous sequence.
static double getPercentIdentity(const Sequence &seq1, const Sequence &seq2, bool ignoreGaps=false)
static void removeStops(Sequence &seq, const GeneticCode &gCode)
Remove stops from a codon sequence.
static BowkerTest * bowkerTest(const Sequence &seq1, const Sequence &seq2)
Bowker&#39;s test for homogeneity.
static NucleicAcidsReplication _DNARep
Utilitary functions dealing with both sites and sequences.
static NucleicAcidsReplication _RNARep
static Sequence * getSequenceWithCompleteSites(const Sequence &seq)
keep only complete sites in a sequence.
static Sequence & complement(Sequence &seq)
Complement the nucleotide sequence itself.
double getPValue() const
Definition: SequenceTools.h:86
void setPValue(double pvalue)
Definition: SequenceTools.h:89
The alphabet exception base class.
static void replaceStopsWithGaps(Sequence &seq, const GeneticCode &gCode)
Replace stop codons by gaps.
static Sequence & invert(Sequence &seq)
Inverse a sequence from 5&#39;->3&#39; to 3&#39;->5&#39; and vice-versa.
static Sequence * transcript(const Sequence &sequence)
Get the transcription sequence of a DNA sequence.
static Sequence & invertComplement(Sequence &seq)
Inverse and complement a sequence.
virtual ~BowkerTest()
Definition: SequenceTools.h:79
This alphabet is used to deal with DNA sequences.
Definition: DNA.h:60
The sequence interface.
Definition: Sequence.h:74
virtual ~SequenceTools()
static Sequence * combineSequences(const Sequence &s1, const Sequence &s2)
Combine two sequences.
static Sequence * subseq(const Sequence &sequence, size_t begin, size_t end)
Get a sub-sequence.
This alphabet is used to deal with RNA sequences.
Definition: RNA.h:58
static Sequence * reverseTranscript(const Sequence &sequence)
Get the reverse-transcription sequence of a RNA sequence.
Partial implementation of the Transliterator interface for genetic code object.
Definition: GeneticCode.h:79
Exception thrown when two alphabets do not match.
static NucleicAcidsReplication _transc
Bowker&#39;s homogeneity test results class.
Definition: SequenceTools.h:68
static size_t findFirstOf(const Sequence &seq, const Sequence &motif, bool strict=true)
Find the position of a motif in a sequence.
Replication between to nucleic acids.
Exception thrown when a sequence is not align with others.
void setStatistic(double stat)
Definition: SequenceTools.h:88
double getStatistic() const
Definition: SequenceTools.h:85
static Sequence * getRandomSequence(const Alphabet *alphabet, size_t length)
Get a random sequence of given size and alphabet, with all state with equal probability.