bpp-popgen  2.2.0
DataSet.h
Go to the documentation of this file.
1 //
2 // File DataSet.h
3 // Author : Sylvain Gaillard
4 // Last modification : April 4, 2008
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for population genetics analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #ifndef _DATASET_H_
41 #define _DATASET_H_
42 
43 // From the STL
44 #include <algorithm>
45 #include <vector>
46 #include <map>
47 #include <string>
48 
49 #include <Bpp/Exceptions.h>
50 #include <Bpp/Graphics/Point2D.h>
51 #include <Bpp/Utils/MapTools.h>
52 
53 // From PopGenLib (local)
54 #include "Group.h"
55 #include "Individual.h"
56 #include "Locality.h"
57 #include "GeneralExceptions.h"
58 #include "AnalyzedLoci.h"
59 #include "AnalyzedSequences.h"
62 
63 namespace bpp
64 {
73 class DataSet
74 {
75 private:
78  std::vector<Locality<double>*> localities_;
79  std::vector<Group*> groups_;
80 
81 public:
82  // Constructor and destructor
86  DataSet();
87 
91  ~DataSet();
92 
96  DataSet(const DataSet& ds);
97 
98  DataSet& operator=(const DataSet& ds);
99 
100 public:
101  // Methodes
102 // ** Locality manipulation ***************************************************/
109  void addLocality(Locality<double>& locality) throw (BadIdentifierException);
110 
118  size_t getLocalityPosition(const std::string& name) const throw (LocalityNotFoundException);
119 
127  const Locality<double>& getLocalityAtPosition(size_t locality_position) const throw (IndexOutOfBoundsException);
128 
134  const Locality<double>& getLocalityByName(const std::string& name) const throw (LocalityNotFoundException);
135 
141  void deleteLocalityAtPosition(size_t locality_position) throw (IndexOutOfBoundsException);
142 
148  void deleteLocalityByName(const std::string& name) throw (LocalityNotFoundException);
149 
153  size_t getNumberOfLocalities() const;
154 
158  bool hasLocality() const;
159 
160  // ** Group manipulation ******************************************************/
168  void addGroup(const Group& group) throw (BadIdentifierException);
169 
173  void addEmptyGroup(size_t group_id) throw (BadIdentifierException);
174 
178  const Group& getGroupById(size_t group_id) const throw (GroupNotFoundException);
179 
185  size_t getGroupPosition(size_t group_id) const throw (GroupNotFoundException);
186 
192  std::string getGroupName(size_t group_id) const throw (GroupNotFoundException);
198  void setGroupName(size_t group_id, const std::string& group_name) const throw (GroupNotFoundException);
199 
205  const Group& getGroupAtPosition(size_t group_position) const throw (IndexOutOfBoundsException);
206 
212  void deleteGroupAtPosition(size_t group_position) throw (IndexOutOfBoundsException);
213 
217  size_t getNumberOfGroups() const;
218 
225  void mergeTwoGroups(size_t source_id, size_t target_id) throw (GroupNotFoundException);
226 
237  void mergeGroups(std::vector<size_t>& group_ids) throw (GroupNotFoundException);
238 
247  void splitGroup(size_t group_id, std::vector<size_t> individuals_selection) throw (Exception);
248 
249  // ** Individuals manipulation ************************************************/
256  void addIndividualToGroup(size_t group_position, const Individual& individual) throw (Exception);
257 
264  void addEmptyIndividualToGroup(size_t group_position, const std::string& individual_id) throw (Exception);
265 
271  size_t getNumberOfIndividualsInGroup(size_t group_position) const
272  throw (IndexOutOfBoundsException);
273 
280  size_t getIndividualPositionInGroup(size_t group_position, const std::string& individual_id) const
281  throw (Exception);
288  const Individual* getIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) const
289  throw (IndexOutOfBoundsException);
290 
297  const Individual* getIndividualByIdFromGroup(size_t group_position, const std::string& individual_id) const
298  throw (Exception);
299 
306  void deleteIndividualAtPositionFromGroup(size_t group_position, size_t individual_position)
307  throw (IndexOutOfBoundsException);
308 
315  void deleteIndividualByIdFromGroup(size_t group_position, const std::string& individual_id)
316  throw (Exception);
317 
324  void setIndividualSexInGroup(size_t group_position, size_t individual_position, const unsigned short sex)
325  throw (IndexOutOfBoundsException);
326 
333  unsigned short getIndividualSexInGroup(size_t group_position, size_t individual_position) const
334  throw (IndexOutOfBoundsException);
335 
342  void setIndividualDateInGroup(size_t group_position, size_t individual_position, const Date& date)
343  throw (IndexOutOfBoundsException);
344 
352  const Date* getIndividualDateInGroup(size_t group_position, size_t individual_position) const
353  throw (Exception);
354 
361  void setIndividualCoordInGroup(size_t group_position, size_t individual_position, const Point2D<double>& coord)
362  throw (IndexOutOfBoundsException);
363 
371  const Point2D<double>* getIndividualCoordInGroup(size_t group_position, size_t individual_position) const
372  throw (Exception);
373 
381  void setIndividualLocalityInGroupByName(size_t group_position, size_t individual_position, const std::string& locality_name)
382  throw (Exception);
383 
391  const Locality<double>* getIndividualLocalityInGroup(size_t group_position, size_t individual_position) const
392  throw (Exception);
393 
402  void addIndividualSequenceInGroup(size_t group_position, size_t individual_position,
403  size_t sequence_position, const Sequence& sequence)
404  throw (Exception);
405 
415  const Sequence& getIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) const
416  throw (Exception);
417 
426  const Sequence& getIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position) const
427  throw (Exception);
428 
437  void deleteIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name)
438  throw (Exception);
439 
448  void deleteIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position)
449  throw (Exception);
450 
458  std::vector<std::string> getIndividualSequencesNamesInGroup(size_t group_position, size_t individual_position) const
459  throw (Exception);
460 
469  size_t getIndividualSequencePositionInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) const
470  throw (Exception);
471 
479  size_t getIndividualNumberOfSequencesInGroup(size_t group_position, size_t individual_position) const
480  throw (Exception);
481 
488  void setIndividualGenotypeInGroup(size_t group_position, size_t individual_position, const MultilocusGenotype& genotype)
489  throw (Exception);
490 
500  void initIndividualGenotypeInGroup(size_t group_position, size_t individual_position)
501  throw (Exception);
502 
509  void deleteIndividualGenotypeInGroup(size_t group_position, size_t individual_position)
510  throw (IndexOutOfBoundsException);
511 
520  void setIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position, const MonolocusGenotype& monogen)
521  throw (Exception);
522 
532  void setIndividualMonolocusGenotypeByAlleleKeyInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector<size_t> allele_keys)
533  throw (Exception);
534 
544  void setIndividualMonolocusGenotypeByAlleleIdInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector<std::string> allele_id)
545  throw (Exception);
546 
556  const MonolocusGenotype* getIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position) const
557  throw (Exception);
558 
559  // ** AnalyzedSequences manipulation ******************************************/
563  void setAlphabet(const Alphabet* alpha);
564 
568  void setAlphabet(const std::string& alpha_type);
569 
575  const Alphabet* getAlphabet() const throw (NullPointerException);
576 
582  std::string getAlphabetType() const throw (NullPointerException);
583 
584  // ** AnalyzedLoci manipulation ***********************************************/
590  void setAnalyzedLoci(const AnalyzedLoci& analyzedLoci) throw (Exception);
591 
597  void initAnalyzedLoci(size_t number_of_loci) throw (Exception);
598 
604  const AnalyzedLoci* getAnalyzedLoci() const throw (NullPointerException);
605 
609  void deleteAnalyzedLoci();
610 
617  void setLocusInfo(size_t locus_position, const LocusInfo& locus)
618  throw (Exception);
619 
623  const LocusInfo& getLocusInfoByName(const std::string& locus_name) const
624  throw (Exception);
625 
629  const LocusInfo& getLocusInfoAtPosition(size_t locus_position) const
630  throw (Exception);
631 
635  void addAlleleInfoByLocusName(const std::string& locus_name, const AlleleInfo& allele)
636  throw (Exception);
637 
641  void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo& allele)
642  throw (Exception);
643 
647  size_t getNumberOfLoci() const throw (NullPointerException);
648 
652  size_t getPloidyByLocusName(const std::string& locus_name) const throw (Exception);
653 
657  size_t getPloidyByLocusPosition(size_t locus_position) const throw (Exception);
658 
659  // ** Container extraction ***************************************************/
664 
670  PolymorphismMultiGContainer* getPolymorphismMultiGContainer(const std::map<size_t, std::vector<size_t> >& selection) const throw (Exception);
671 
679  PolymorphismSequenceContainer* getPolymorphismSequenceContainer(const std::map<size_t, std::vector<size_t> >& selection, size_t sequence_position) const throw (Exception);
680 
681  // ** General tests **********************************************************/
685  bool hasSequenceData() const;
686 
690  bool hasAlleleicData() const;
691 };
692 } // end of namespace bpp;
693 
694 #endif // _DATASET_H_
695 
size_t getLocalityPosition(const std::string &name) const
Get the position of a locality in the container.
Definition: DataSet.cpp:131
void deleteIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position)
Delete a Sequence of an Individual of a Group.
Definition: DataSet.cpp:773
void deleteIndividualAtPositionFromGroup(size_t group_position, size_t individual_position)
Delete an Individual from a group.
Definition: DataSet.cpp:496
The GroupNotFoundException class.
void deleteIndividualByIdFromGroup(size_t group_position, const std::string &individual_id)
Delete an Individual from a group.
Definition: DataSet.cpp:512
void setIndividualMonolocusGenotypeByAlleleIdInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector< std::string > allele_id)
Set a MonolocusGenotype of an Individual from a group.
Definition: DataSet.cpp:973
The AnalyzedSequences class.
void deleteIndividualGenotypeInGroup(size_t group_position, size_t individual_position)
Delete the MultilocusGenotype of an Individual from a Group.
Definition: DataSet.cpp:905
The LocalityNotFoundException class.
unsigned short getIndividualSexInGroup(size_t group_position, size_t individual_position) const
Get the sex of an Individual in a Group.
Definition: DataSet.cpp:544
const AnalyzedLoci * getAnalyzedLoci() const
Get the AnalyzedLoci if there is one.
Definition: DataSet.cpp:1093
const Locality< double > * getIndividualLocalityInGroup(size_t group_position, size_t individual_position) const
Get the Locality of an Individual in a Group.
Definition: DataSet.cpp:652
bool hasLocality() const
Tell if there is at least one locality.
Definition: DataSet.cpp:197
const Locality< double > & getLocalityByName(const std::string &name) const
Get a Locality by name.
Definition: DataSet.cpp:152
const LocusInfo & getLocusInfoByName(const std::string &locus_name) const
Get a LocusInfo by its name.
Definition: DataSet.cpp:1126
The BadIdentifierException class.
STL namespace.
void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo &allele)
Add an AlleleInfo to a LocusInfo.
Definition: DataSet.cpp:1182
void deleteIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string &sequence_name)
Delete a Sequence of an Individual of a Group.
Definition: DataSet.cpp:749
The Individual class.
Definition: Individual.h:75
const Group & getGroupById(size_t group_id) const
Get a group by identifier.
Definition: DataSet.cpp:229
void deleteAnalyzedLoci()
Delete the AnalyzedLoci.
Definition: DataSet.cpp:1102
DataSet & operator=(const DataSet &ds)
Definition: DataSet.cpp:78
The MultilocusGenotype class.
void deleteGroupAtPosition(size_t group_position)
Delete a Group from the DataSet.
Definition: DataSet.cpp:290
void mergeGroups(std::vector< size_t > &group_ids)
Merge some Groups in one.
Definition: DataSet.cpp:338
const Date * getIndividualDateInGroup(size_t group_position, size_t individual_position) const
Get the Date of an Individual in a Group.
Definition: DataSet.cpp:576
std::string getAlphabetType() const
Get the alphabet type as a string.
Definition: DataSet.cpp:1055
PolymorphismMultiGContainer * getPolymorphismMultiGContainer() const
Get a PolymorphismMultiGContainer with all allelic data of the DataSet.
Definition: DataSet.cpp:1245
The Date class.
Definition: Date.h:56
void addGroup(const Group &group)
Add a Group to the DataSet.
Definition: DataSet.cpp:205
size_t getPloidyByLocusName(const std::string &locus_name) const
Get the ploidy of a locus.
Definition: DataSet.cpp:1211
const Sequence & getIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string &sequence_name) const
Get a Sequence from an Individual of a Group.
Definition: DataSet.cpp:701
void mergeTwoGroups(size_t source_id, size_t target_id)
Merge two groups.
Definition: DataSet.cpp:307
void addAlleleInfoByLocusName(const std::string &locus_name, const AlleleInfo &allele)
Add an AlleleInfo to a LocusInfo.
Definition: DataSet.cpp:1162
size_t getPloidyByLocusPosition(size_t locus_position) const
Get the ploidy of a locus.
Definition: DataSet.cpp:1227
void setIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position, const MonolocusGenotype &monogen)
Set a MonolocusGenotype of an Individual from a group.
Definition: DataSet.cpp:921
AnalyzedSequences * analyzedSequences_
Definition: DataSet.h:77
size_t getNumberOfLoci() const
Get the number of loci.
Definition: DataSet.cpp:1202
void setIndividualSexInGroup(size_t group_position, size_t individual_position, const unsigned short sex)
Set the sex of an Individual in a Group.
Definition: DataSet.cpp:528
void splitGroup(size_t group_id, std::vector< size_t > individuals_selection)
Split a group in two.
Definition: DataSet.cpp:369
size_t getIndividualNumberOfSequencesInGroup(size_t group_position, size_t individual_position) const
Get the number of Sequences in an Individual of a Group.
Definition: DataSet.cpp:841
bool hasAlleleicData() const
Tell if there is alelelic data.
Definition: DataSet.cpp:1356
void setIndividualDateInGroup(size_t group_position, size_t individual_position, const Date &date)
Set the Date of an Individual in a Group.
Definition: DataSet.cpp:560
~DataSet()
Destroy a DataSet.
Definition: DataSet.cpp:98
const Group & getGroupAtPosition(size_t group_position) const
Get a group by position.
Definition: DataSet.cpp:281
void setGroupName(size_t group_id, const std::string &group_name) const
set the name of a Group.
Definition: DataSet.cpp:254
const LocusInfo & getLocusInfoAtPosition(size_t locus_position) const
Get a LocusInfo by its position.
Definition: DataSet.cpp:1142
size_t getNumberOfLocalities() const
Get the number of Localities.
Definition: DataSet.cpp:190
bool hasSequenceData() const
Tell if at least one individual has at least one sequence.
Definition: DataSet.cpp:1349
void setLocusInfo(size_t locus_position, const LocusInfo &locus)
Set a LocusInfo.
Definition: DataSet.cpp:1110
void addIndividualToGroup(size_t group_position, const Individual &individual)
Add an Individual to a Group.
Definition: DataSet.cpp:405
size_t getGroupPosition(size_t group_id) const
Get the position of a Group.
Definition: DataSet.cpp:269
The PolymorphismMultiGContainer class.
void setIndividualLocalityInGroupByName(size_t group_position, size_t individual_position, const std::string &locality_name)
Set the Locality of an Individual in a Group.
Definition: DataSet.cpp:632
size_t getIndividualSequencePositionInGroup(size_t group_position, size_t individual_position, const std::string &sequence_name) const
Get the position of a Sequence in an Individual of a Group.
Definition: DataSet.cpp:817
const Alphabet * getAlphabet() const
Get the alphabet if there is sequence data.
Definition: DataSet.cpp:1046
void deleteLocalityAtPosition(size_t locality_position)
Delete a Locality from the DataSet.
Definition: DataSet.cpp:166
size_t getIndividualPositionInGroup(size_t group_position, const std::string &individual_id) const
Get the position of an Individual in a Group.
Definition: DataSet.cpp:448
const Individual * getIndividualByIdFromGroup(size_t group_position, const std::string &individual_id) const
Get an Individual from a Group.
Definition: DataSet.cpp:480
const Locality< double > & getLocalityAtPosition(size_t locality_position) const
Get a Locality by locality_position.
Definition: DataSet.cpp:143
std::string getGroupName(size_t group_id) const
Get the name of a Group. If the name is an empty string it just returns the group_id.
Definition: DataSet.cpp:241
The AnalyzedLoci class.
Definition: AnalyzedLoci.h:64
void setAlphabet(const Alphabet *alpha)
Set the alphabet of the AnalyzedSequences.
Definition: DataSet.cpp:1028
size_t getNumberOfGroups() const
Get the number of Groups.
Definition: DataSet.cpp:300
void setAnalyzedLoci(const AnalyzedLoci &analyzedLoci)
Set the AnalyzedLoci to the DataSet.
Definition: DataSet.cpp:1066
const Point2D< double > * getIndividualCoordInGroup(size_t group_position, size_t individual_position) const
Get the coordinate of an Individual in a Group.
Definition: DataSet.cpp:612
std::vector< std::string > getIndividualSequencesNamesInGroup(size_t group_position, size_t individual_position) const
Get the Sequences&#39; names from an Individual of a Group.
Definition: DataSet.cpp:797
const Individual * getIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) const
Get an Individual from a Group.
Definition: DataSet.cpp:464
void addLocality(Locality< double > &locality)
Add a locality to the DataSet.
Definition: DataSet.cpp:119
The Group class.
Definition: Group.h:70
The MonolocusGenotype virtual class.
void deleteLocalityByName(const std::string &name)
Delete a Locality from the DataSet.
Definition: DataSet.cpp:176
const Sequence & getIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position) const
Get a Sequence from an Individual of a Group.
Definition: DataSet.cpp:725
The AlleleInfo interface.
Definition: AlleleInfo.h:58
const MonolocusGenotype * getIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position) const
Get a MonolocusGenotype from an Individual of a Group.
Definition: DataSet.cpp:1002
void addEmptyIndividualToGroup(size_t group_position, const std::string &individual_id)
Add an empty Individual to a Group.
Definition: DataSet.cpp:423
The PolymorphismSequenceContainer class.
The LocusInfo class.
Definition: LocusInfo.h:63
std::vector< Group * > groups_
Definition: DataSet.h:79
size_t getNumberOfIndividualsInGroup(size_t group_position) const
Get the number of Individuals in a Group.
Definition: DataSet.cpp:439
void initAnalyzedLoci(size_t number_of_loci)
Initialize the AnalyzedLoci for number of loci.
Definition: DataSet.cpp:1084
void setIndividualGenotypeInGroup(size_t group_position, size_t individual_position, const MultilocusGenotype &genotype)
Set the MultilocusGenotype of an Individual in a Group.
Definition: DataSet.cpp:861
PolymorphismSequenceContainer * getPolymorphismSequenceContainer(const std::map< size_t, std::vector< size_t > > &selection, size_t sequence_position) const
Get a PolymorphismSequenceContainer from a selection of groups and individuals.
Definition: DataSet.cpp:1307
std::vector< Locality< double > * > localities_
Definition: DataSet.h:78
void addEmptyGroup(size_t group_id)
Add an empty Group to the DataSet.
Definition: DataSet.cpp:217
AnalyzedLoci * analyzedLoci_
Definition: DataSet.h:76
void setIndividualCoordInGroup(size_t group_position, size_t individual_position, const Point2D< double > &coord)
Set the coordinates of an Individual in a Group.
Definition: DataSet.cpp:596
DataSet()
Build a new void DataSet.
Definition: DataSet.cpp:48
void addIndividualSequenceInGroup(size_t group_position, size_t individual_position, size_t sequence_position, const Sequence &sequence)
Add a Sequence to an Individual in a Group.
Definition: DataSet.cpp:672
void initIndividualGenotypeInGroup(size_t group_position, size_t individual_position)
Initialyze the genotype of an Individual in a Group.
Definition: DataSet.cpp:877
void setIndividualMonolocusGenotypeByAlleleKeyInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector< size_t > allele_keys)
Set a MonolocusGenotype of an Individual from a group.
Definition: DataSet.cpp:945
The DataSet class.
Definition: DataSet.h:73