bpp-seq  2.2.0
SiteContainerTools.h
Go to the documentation of this file.
1 //
2 // File: SiteContainerTools.h
3 // Created by: Julien Dutheil
4 // Created on: Fri Dec 12 18:55:06 2003
5 //
6 
7 #ifndef _SITECONTAINERTOOLS_H_
8 #define _SITECONTAINERTOOLS_H_
9 
10 /*
11  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
12 
13  This software is a computer program whose purpose is to provide classes
14  for sequences analysis.
15 
16  This software is governed by the CeCILL license under French law and
17  abiding by the rules of distribution of free software. You can use,
18  modify and/ or redistribute the software under the terms of the CeCILL
19  license as circulated by CEA, CNRS and INRIA at the following URL
20  "http://www.cecill.info".
21 
22  As a counterpart to the access to the source code and rights to copy,
23  modify and redistribute granted by the license, users are provided only
24  with a limited warranty and the software's author, the holder of the
25  economic rights, and the successive licensors have only limited
26  liability.
27 
28  In this respect, the user's attention is drawn to the risks associated
29  with loading, using, modifying and/or developing or reproducing the
30  software by the user in light of its specific status of free software,
31  that may mean that it is complicated to manipulate, and that also
32  therefore means that it is reserved for developers and experienced
33  professionals having in-depth computer knowledge. Users are therefore
34  encouraged to load and test the software's suitability as regards their
35  requirements in conditions enabling the security of their systems and/or
36  data to be ensured and, more generally, to use and operate it in the
37  same conditions as regards security.
38 
39  The fact that you are presently reading this means that you have had
40  knowledge of the CeCILL license and that you accept its terms.
41 */
42 
43 #include "SiteContainer.h"
44 #include "VectorSiteContainer.h"
46 #include "../AlphabetIndex/AlphabetIndex2.h"
47 #include "../DistanceMatrix.h"
48 #include "../GeneticCode/GeneticCode.h"
49 #include <Bpp/Numeric/Matrix/Matrix.h>
50 
51 //From the STL:
52 #include <vector>
53 #include <map>
54 
55 namespace bpp
56 {
57 
58  typedef std::vector<size_t> SiteSelection;
59 
64  {
65  public:
67  virtual ~SiteContainerTools() {}
68 
69  public:
70 
80  static SiteContainer* getSitesWithoutGaps(const SiteContainer& sites);
81 
92  static SiteContainer* getCompleteSites(const SiteContainer& sites);
93 
104  static SiteContainer* removeGapOnlySites(const SiteContainer& sites);
105 
111  static void removeGapOnlySites(SiteContainer& sites);
112 
123 
129  static void removeGapOrUnresolvedOnlySites(SiteContainer& sites);
130 
138  static SiteContainer* removeGapSites(const SiteContainer& sites, double maxFreqGaps);
139 
146  static void removeGapSites(SiteContainer& sites, double maxFreqGaps);
147 
158  static SiteContainer* removeStopCodonSites(const SiteContainer& sites, const GeneticCode& gCode) throw (AlphabetException);
159 
174  static SiteContainer* getSelectedSites(const SiteContainer& sequences, const SiteSelection& selection);
175 
195  static SiteContainer* getSelectedPositions(const SiteContainer& sequences, const SiteSelection& selection);
196 
209  static Sequence* getConsensus(const SiteContainer& sc, const std::string& name = "consensus", bool ignoreGap = true, bool resolveUnknown = false);
210 
218  static void changeGapsToUnknownCharacters(SiteContainer& sites);
219 
228 
257  static SiteContainer* resolveDottedAlignment(const SiteContainer& dottedAln, const Alphabet* resolvedAlphabet) throw (AlphabetException, Exception);
258 
275  static std::map<size_t, size_t> getSequencePositions(const Sequence& seq);
276 
286  static std::map<size_t, size_t> getAlignmentPositions(const Sequence& seq);
287 
297  static void getSequencePositions(const SiteContainer& sites, Matrix<size_t>& positions);
313  static std::map<size_t, size_t> translateAlignment(const Sequence& seq1, const Sequence& seq2) throw (AlphabetMismatchException, Exception);
314 
326  static std::map<size_t, size_t> translateSequence(const SiteContainer& sequences, size_t i1, size_t i2);
327 
342  static AlignedSequenceContainer* alignNW(const Sequence& seq1, const Sequence& seq2, const AlphabetIndex2& s, double gap) throw (AlphabetMismatchException);
343 
359  static AlignedSequenceContainer* alignNW(const Sequence& seq1, const Sequence& seq2, const AlphabetIndex2& s, double opening, double extending) throw (AlphabetMismatchException);
360 
374  static VectorSiteContainer* sampleSites(const SiteContainer& sites, size_t nbSites, std::vector<size_t>* index = 0);
375 
387  static VectorSiteContainer* bootstrapSites(const SiteContainer& sites);
388 
410  static double computeSimilarity(const Sequence& seq1, const Sequence& seq2, bool dist = false, const std::string& gapOption = SIMILARITY_NODOUBLEGAP, bool unresolvedAsGap = true) throw (SequenceNotAlignedException, AlphabetMismatchException, Exception);
411 
434  static DistanceMatrix* computeSimilarityMatrix(const SiteContainer& sites, bool dist = false, const std::string& gapOption = SIMILARITY_NOFULLGAP, bool unresolvedAsGap = true);
435 
436  static const std::string SIMILARITY_ALL;
437  static const std::string SIMILARITY_NOFULLGAP;
438  static const std::string SIMILARITY_NODOUBLEGAP;
439  static const std::string SIMILARITY_NOGAP;
440 
461  static void merge(SiteContainer& seqCont1, const SiteContainer& seqCont2, bool leavePositionAsIs = false) throw (AlphabetMismatchException, Exception);
462 
476  static std::vector<int> getColumnScores(const Matrix<size_t>& positions1, const Matrix<size_t>& positions2, int na = 0);
477 
491  static std::vector<double> getSumOfPairsScores(const Matrix<size_t>& positions1, const Matrix<size_t>& positions2, double na = 0);
492  };
493 
494 } //end of namespace bpp.
495 
496 #endif //_SITECONTAINERTOOLS_H_
497 
std::vector< size_t > SiteSelection
static SiteContainer * removeGapSites(const SiteContainer &sites, double maxFreqGaps)
Get a siteset with sites with less than a given amount of gaps.
static std::vector< double > getSumOfPairsScores(const Matrix< size_t > &positions1, const Matrix< size_t > &positions2, double na=0)
Compare an alignment to a reference alignment, and compute the sum-of-pairs scores.
The SiteContainer interface.
Definition: SiteContainer.h:63
Aligned sequences container.
This alphabet is used to deal NumericAlphabet.
static std::map< size_t, size_t > getSequencePositions(const Sequence &seq)
Get the index of each sequence position in an aligned sequence.
static void changeGapsToUnknownCharacters(SiteContainer &sites)
Change all gaps to unknown state in a container, according to its alphabet.
static DistanceMatrix * computeSimilarityMatrix(const SiteContainer &sites, bool dist=false, const std::string &gapOption=SIMILARITY_NOFULLGAP, bool unresolvedAsGap=true)
Compute the similarity matrix of an alignment.
The Alphabet interface.
Definition: Alphabet.h:130
static VectorSiteContainer * bootstrapSites(const SiteContainer &sites)
Bootstrap sites in an alignment.
STL namespace.
static std::vector< int > getColumnScores(const Matrix< size_t > &positions1, const Matrix< size_t > &positions2, int na=0)
Compare an alignment to a reference alignment, and compute the column scores.
static const std::string SIMILARITY_ALL
static SiteContainer * getCompleteSites(const SiteContainer &sites)
Retrieves complete sites from SiteContainer.
static SiteContainer * getSelectedPositions(const SiteContainer &sequences, const SiteSelection &selection)
Create a new container with a specified set of positions.
static SiteContainer * removeStopCodonSites(const SiteContainer &sites, const GeneticCode &gCode)
Get a site set without stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception...
static AlignedSequenceContainer * alignNW(const Sequence &seq1, const Sequence &seq2, const AlphabetIndex2 &s, double gap)
Align two sequences using the Needleman-Wunsch dynamic algorithm.
static SiteContainer * getSelectedSites(const SiteContainer &sequences, const SiteSelection &selection)
Create a new container with a specified set of sites.
static Sequence * getConsensus(const SiteContainer &sc, const std::string &name="consensus", bool ignoreGap=true, bool resolveUnknown=false)
create the consensus sequence of the alignment.
static SiteContainer * removeGapOrUnresolvedOnlySites(const SiteContainer &sites)
Get a site set without gap/unresolved-only sites.
static VectorSiteContainer * sampleSites(const SiteContainer &sites, size_t nbSites, std::vector< size_t > *index=0)
Sample sites in an alignment.
static double computeSimilarity(const Sequence &seq1, const Sequence &seq2, bool dist=false, const std::string &gapOption=SIMILARITY_NODOUBLEGAP, bool unresolvedAsGap=true)
Compute the similarity/distance score between two aligned sequences.
static const std::string SIMILARITY_NOGAP
static void changeUnresolvedCharactersToGaps(SiteContainer &sites)
Change all unresolved characters to gaps in a container, according to its alphabet.
The alphabet exception base class.
static std::map< size_t, size_t > getAlignmentPositions(const Sequence &seq)
Get the index of each alignment position in an aligned sequence.
Two dimensionnal alphabet index interface.
A Matrix class to store phylogenetic distances.
static std::map< size_t, size_t > translateAlignment(const Sequence &seq1, const Sequence &seq2)
Translate alignement positions from an aligned sequence to the same sequence in a different alignment...
static SiteContainer * removeGapOnlySites(const SiteContainer &sites)
Get a site set without gap-only sites.
The sequence interface.
Definition: Sequence.h:74
static const std::string SIMILARITY_NOFULLGAP
static void merge(SiteContainer &seqCont1, const SiteContainer &seqCont2, bool leavePositionAsIs=false)
Add the content of a site container to an exhisting one.
static const std::string SIMILARITY_NODOUBLEGAP
static SiteContainer * getSitesWithoutGaps(const SiteContainer &sites)
Retrieves sites without gaps from SiteContainer.
Exception thrown when two alphabets do not match.
Partial implementation of the Transliterator interface for genetic code object.
Definition: GeneticCode.h:79
Some utililitary methods to deal with site containers.
static SiteContainer * resolveDottedAlignment(const SiteContainer &dottedAln, const Alphabet *resolvedAlphabet)
Resolve a container with "." notations.
Exception thrown when a sequence is not align with others.
static std::map< size_t, size_t > translateSequence(const SiteContainer &sequences, size_t i1, size_t i2)
Translate sequence positions from a sequence to another in the same alignment.
The VectorSiteContainer class.