bpp-seq  2.2.0
SequenceContainerTools.h
Go to the documentation of this file.
1 //
2 // File: SequenceContainerTools.h
3 // Created by: Julien Dutheil
4 // Sylvain Gaillard
5 // Created on: Sat Oct 4 09:18:34 2003
6 //
7 
8 /*
9 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
10 
11 This software is a computer program whose purpose is to provide classes
12 for sequences analysis.
13 
14 This software is governed by the CeCILL license under French law and
15 abiding by the rules of distribution of free software. You can use,
16 modify and/ or redistribute the software under the terms of the CeCILL
17 license as circulated by CEA, CNRS and INRIA at the following URL
18 "http://www.cecill.info".
19 
20 As a counterpart to the access to the source code and rights to copy,
21 modify and redistribute granted by the license, users are provided only
22 with a limited warranty and the software's author, the holder of the
23 economic rights, and the successive licensors have only limited
24 liability.
25 
26 In this respect, the user's attention is drawn to the risks associated
27 with loading, using, modifying and/or developing or reproducing the
28 software by the user in light of its specific status of free software,
29 that may mean that it is complicated to manipulate, and that also
30 therefore means that it is reserved for developers and experienced
31 professionals having in-depth computer knowledge. Users are therefore
32 encouraged to load and test the software's suitability as regards their
33 requirements in conditions enabling the security of their systems and/or
34 data to be ensured and, more generally, to use and operate it in the
35 same conditions as regards security.
36 
37 The fact that you are presently reading this means that you have had
38 knowledge of the CeCILL license and that you accept its terms.
39 */
40 
41 #ifndef _SEQUENCECONTAINERTOOLS_H_
42 #define _SEQUENCECONTAINERTOOLS_H_
43 
44 // From the STL:
45 #include <string>
46 #include <vector>
47 #include <map>
48 #include <memory>
49 
50 #include "SequenceContainer.h"
52 
53 namespace bpp
54 {
55 
56  typedef std::vector<size_t> SequenceSelection;
57  typedef std::vector<size_t> SiteSelection;
62 {
63 
64  public:
67 
68  public:
80  static SequenceContainer* createContainerOfSpecifiedSize(const Alphabet* alphabet, size_t size);
81 
95  const Alphabet* alphabet,
96  const std::vector<std::string>& seqNames)
97  throw (Exception);
98 
111  template<class ContFrom, class ContTo, class Seq>
112  static void convertContainer(const ContFrom& input, ContTo& output) {
113  for (size_t i = 0; i < input.getNumberOfSequences(); ++i) {
114  std::auto_ptr<Seq> seq(new Seq(input.getSequence(i)));
115  output.addSequence(*seq);
116  }
117  }
118 
134  static void getSelectedSequences(const OrderedSequenceContainer& sequences, const SequenceSelection& selection, SequenceContainer& outputCont) throw (Exception);
135 
153  static void getSelectedSequences(const SequenceContainer& sequences, const std::vector<std::string>& selection, SequenceContainer& outputCont, bool strict = true) throw (Exception);
154 
167  static void keepOnlySelectedSequences(OrderedSequenceContainer& sequences, const SequenceSelection& selection);
168 
175  static bool sequencesHaveTheSameLength(const SequenceContainer& sequences);
176 
190  static void getCounts(const SequenceContainer& sequences, std::map<int, int>&);
191 
205  static void getFrequencies(const SequenceContainer& sequences, std::map<int, double>& f, double pseudoCount = 0);
206 
214  static void append(SequenceContainer& seqCont1, const SequenceContainer& seqCont2, bool checkNames = true)
215  throw (Exception)
216  {
217  std::vector<std::string> seqNames = seqCont2.getSequencesNames();
218  for (size_t i = 0; i < seqNames.size(); i++)
219  seqCont1.addSequence(seqCont2.getSequence(seqNames[i]), checkNames);
220  }
228  static void append(SequenceContainer& seqCont1, const OrderedSequenceContainer& seqCont2, bool checkNames=true)
229  throw (Exception)
230  {
231  for (size_t i = 0; i < seqCont2.getNumberOfSequences(); i++)
232  seqCont1.addSequence(seqCont2.getSequence(i), checkNames);
233  }
234 
249  static void merge(const SequenceContainer& seqCont1, const SequenceContainer& seqCont2, SequenceContainer& outputCont)
250  throw (Exception)
251  {
252  if (seqCont1.getAlphabet()->getAlphabetType() != seqCont2.getAlphabet()->getAlphabetType())
253  throw AlphabetMismatchException("SequenceContainerTools::merge.", seqCont1.getAlphabet(), seqCont2.getAlphabet());
254 
255  std::vector<std::string> seqNames = seqCont1.getSequencesNames();
256  for (size_t i = 0; i < seqNames.size(); i++)
257  {
258  BasicSequence tmp = seqCont1.getSequence(seqNames[i]);
259  tmp.append(seqCont2.getContent(seqNames[i]));
260  outputCont.addSequence(tmp, false);
261  }
262  }
263 
272  static void convertAlphabet(const SequenceContainer& seqCont, SequenceContainer& outputCont)
273  throw (Exception)
274  {
275  std::vector<std::string> seqNames = seqCont.getSequencesNames();
276  bool checkNames = outputCont.getNumberOfSequences() > 0;
277  for (size_t i = 0; i < seqNames.size(); i++)
278  {
279  BasicSequence seq(seqNames[i], seqCont.toString(seqNames[i]), outputCont.getAlphabet());
280  outputCont.addSequence(seq, checkNames);
281  }
282  }
283 
292  static SequenceContainer* getCodonPosition(const SequenceContainer& sequences, size_t pos) throw (AlphabetException);
293 
294 };
295 
296 } //end of namespace bpp.
297 
298 #endif //_SEQUENCECONTAINERTOOLS_H_
299 
std::vector< size_t > SiteSelection
static void append(SequenceContainer &seqCont1, const OrderedSequenceContainer &seqCont2, bool checkNames=true)
Append all the sequences of a SequenceContainer to the end of another, OrderedSequenceContainer imple...
static void convertAlphabet(const SequenceContainer &seqCont, SequenceContainer &outputCont)
Convert a SequenceContainer with a new alphabet.
static void getCounts(const SequenceContainer &sequences, std::map< int, int > &)
Compute base counts.
The OrderedSequenceContainer interface.
This alphabet is used to deal NumericAlphabet.
The Alphabet interface.
Definition: Alphabet.h:130
static bool sequencesHaveTheSameLength(const SequenceContainer &sequences)
Check if all sequences in a SequenceContainer have the same length.
STL namespace.
std::vector< size_t > SequenceSelection
static void keepOnlySelectedSequences(OrderedSequenceContainer &sequences, const SequenceSelection &selection)
Remove all sequences that are not in a given selection from a given container.
Utilitary methods dealing with sequence containers.
virtual const Sequence & getSequence(size_t sequenceIndex) const =0
Retrieve a sequence object from the container.
static SequenceContainer * getCodonPosition(const SequenceContainer &sequences, size_t pos)
Extract a certain position (1, 2 or 3) from a container of codon sequences and returns the resulting ...
static SequenceContainer * createContainerWithSequenceNames(const Alphabet *alphabet, const std::vector< std::string > &seqNames)
Create a container with specified names.
static void getSelectedSequences(const OrderedSequenceContainer &sequences, const SequenceSelection &selection, SequenceContainer &outputCont)
Add a specified set of sequences from a container to another.
The alphabet exception base class.
A basic implementation of the Sequence interface.
Definition: Sequence.h:207
virtual void append(const std::vector< int > &content)
Append the specified content to the sequence.
Definition: Sequence.cpp:193
virtual void addSequence(const Sequence &sequence, bool checkName)=0
Add a sequence to the container.
static void merge(const SequenceContainer &seqCont1, const SequenceContainer &seqCont2, SequenceContainer &outputCont)
Concatenate the sequences from two containers.
static void append(SequenceContainer &seqCont1, const SequenceContainer &seqCont2, bool checkNames=true)
Append all the sequences of a SequenceContainer to the end of another.
static void getFrequencies(const SequenceContainer &sequences, std::map< int, double > &f, double pseudoCount=0)
Compute base frequencies.
static void convertContainer(const ContFrom &input, ContTo &output)
Generic function which creates a new container from another one, by specifying the class of sequence ...
Exception thrown when two alphabets do not match.
The SequenceContainer interface.
static SequenceContainer * createContainerOfSpecifiedSize(const Alphabet *alphabet, size_t size)
Create a container with void sequences.
virtual size_t getNumberOfSequences() const =0
Get the number of sequences in the container.