bpp-phyl  2.2.0
BipartitionTools.cpp
Go to the documentation of this file.
1 //
2 // File: BipartitionTools.cpp
3 // Created by: Nicolas Galtier & Julien Dutheil
4 // Created on: Tue Apr 13 15:09 2007
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 16, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for phylogenetic data analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #include "BipartitionList.h"
41 #include "BipartitionTools.h"
42 #include "TreeTemplate.h"
43 
44 #include <Bpp/Exceptions.h>
45 #include <Bpp/Text/TextTools.h>
46 #include <Bpp/Io/FileTools.h>
47 
48 // From SeqLib
49 #include <Bpp/Seq/Alphabet/DNA.h>
50 #include <Bpp/Seq/Alphabet/AlphabetTools.h>
51 
52 using namespace bpp;
53 
54 // From STL
55 #include <iostream>
56 #include <algorithm>
57 #include <limits.h> // defines CHAR_BIT
58 
59 using namespace std;
60 
61 /******************************************************************************/
62 
63 int BipartitionTools::LWORD = static_cast<int>(CHAR_BIT * sizeof(int));
64 
65 /******************************************************************************/
66 
67 /* functions dealing with int* seen as arrays of bits */
68 /* (provided by Manolo Gouy) */
69 
70 void BipartitionTools::bit1(int* plist, int num)
71 {
72  // num--;
73  plist += (num / LWORD);
74  *plist |= (1 << (num % LWORD));
75 }
76 
77 /******************************************************************************/
78 
79 void BipartitionTools::bit0(int* plist, int num)
80 {
81  // num--;
82  plist += (num / LWORD);
83  *plist &= ~(1 << (num % LWORD));
84 }
85 
86 /******************************************************************************/
87 
88 void BipartitionTools::bitAnd(int* listet, int* list1, int* list2, size_t len)
89 {
90  for (size_t i = 0; i < len; i++)
91  {
92  listet[i] = list1[i] & list2[i];
93  }
94 }
95 
96 /******************************************************************************/
97 
98 void BipartitionTools::bitOr(int* listou, int* list1, int* list2, size_t len)
99 {
100  for (size_t i = 0; i < len; i++)
101  {
102  listou[i] = list1[i] | list2[i];
103  }
104 }
105 
106 /******************************************************************************/
107 
108 void BipartitionTools::bitNot(int* listnon, int* list, size_t len)
109 {
110  for (size_t i = 0; i < len; i++)
111  {
112  listnon[i] = ~list[i];
113  }
114 }
115 
116 /******************************************************************************/
117 bool BipartitionTools::testBit(int* plist, int num)
118 {
119  // num--;
120  plist += (num / LWORD);
121  return (*plist) & (1 << (num % LWORD));
122 }
123 
124 /******************************************************************************/
125 
127  const BipartitionList& bipartL1, size_t i1,
128  const BipartitionList& bipartL2, size_t i2,
129  bool checkElements) throw (Exception)
130 {
131  vector<int*> bitBipL1, bitBipL2, twoBitBipL;
132  vector<string> elements;
133 
134  if (i1 >= bipartL1.getNumberOfBipartitions())
135  throw Exception("Bipartition index exceeds BipartitionList size");
136  if (i2 >= bipartL2.getNumberOfBipartitions())
137  throw Exception("Bipartition index exceeds BipartitionList size");
138 
139  if (checkElements && !VectorTools::haveSameElements(bipartL1.getElementNames(), bipartL2.getElementNames()))
140  throw Exception("Distinct bipartition element sets");
141 
142  /* get sorted bit bipartition lists */
143  /* (if input is sorted: easy; otherwise: first copy, then sort) */
144 
145  if (bipartL1.isSorted())
146  {
147  elements = bipartL1.getElementNames();
148  bitBipL1 = bipartL1.getBitBipartitionList();
149  }
150  else
151  {
152  BipartitionList provBipartL(bipartL1.getElementNames(), bipartL1.getBitBipartitionList());
153  provBipartL.sortElements();
154  elements = provBipartL.getElementNames();
155  bitBipL1 = provBipartL.getBitBipartitionList();
156  }
157 
158  if (bipartL2.isSorted())
159  {
160  bitBipL2 = bipartL2.getBitBipartitionList();
161  }
162  else
163  {
164  BipartitionList provBipartL(bipartL2.getElementNames(), bipartL2.getBitBipartitionList());
165  provBipartL.sortElements();
166  bitBipL2 = provBipartL.getBitBipartitionList();
167  }
168 
169  /* create a new BipartitionList with just the two focal bipartitions */
170 
171  twoBitBipL.push_back(bitBipL1[i1]);
172  twoBitBipL.push_back(bitBipL2[i2]);
173  BipartitionList* twoBipL = new BipartitionList(elements, twoBitBipL);
174  return twoBipL;
175 }
176 
177 /******************************************************************************/
178 
180  const BipartitionList& bipartL1, size_t i1,
181  const BipartitionList& bipartL2, size_t i2,
182  bool checkElements)
183 {
184  BipartitionList* twoBipL = buildBipartitionPair(bipartL1, i1, bipartL2, i2, checkElements);
185  bool test = twoBipL->areIdentical(0, 1);
186  delete twoBipL;
187  return test;
188 }
189 
190 /******************************************************************************/
191 
193  const BipartitionList& bipartL1, size_t i1,
194  const BipartitionList& bipartL2, size_t i2,
195  bool checkElements)
196 {
197  BipartitionList* twoBipL = buildBipartitionPair(bipartL1, i1, bipartL2, i2, checkElements);
198  bool test = twoBipL->areCompatible(0, 1);
199  delete twoBipL;
200  return test;
201 }
202 
203 /******************************************************************************/
204 
206  const vector<BipartitionList*>& vecBipartL,
207  bool checkElements) throw (Exception)
208 {
209  vector<string> elements;
210  vector<int*> mergedBitBipL;
211  int* provBitBip;
212  BipartitionList* mergedBipL;
213 
214  if (vecBipartL.size() == 0)
215  throw Exception("Empty vector passed");
216 
217  if (checkElements)
218  {
219  for (size_t i = 1; i < vecBipartL.size(); ++i)
220  {
221  if (!VectorTools::haveSameElements(vecBipartL[0]->getElementNames(), vecBipartL[0]->getElementNames()))
222  throw Exception("BipartitionTools::mergeBipartitionLists. Distinct bipartition element sets");
223  }
224  }
225 
226  size_t lword = static_cast<size_t>(BipartitionTools::LWORD);
227  size_t nbword = (vecBipartL[0]->getElementNames().size() + lword - 1) / lword;
228  size_t nbint = nbword * lword / (CHAR_BIT * sizeof(int));
229 
230  elements = vecBipartL[0]->getElementNames();
231  if (!vecBipartL[0]->isSorted())
232  std::sort(elements.begin(), elements.end());
233 
234  for (size_t i = 0; i < vecBipartL.size(); i++)
235  {
236  vector<int*> bitBipL;
237  if (vecBipartL[i]->isSorted())
238  {
239  bitBipL = vecBipartL[i]->getBitBipartitionList();
240  }
241  else
242  {
243  // We don't need the extra recopy here, do we?
244  // BipartitionList provBipartL(BipartitionList(vecBipartL[i]->getElementNames(), vecBipartL[i]->getBitBipartitionList()));
245  BipartitionList provBipartL(vecBipartL[i]->getElementNames(), vecBipartL[i]->getBitBipartitionList());
246  provBipartL.sortElements();
247  bitBipL = provBipartL.getBitBipartitionList();
248  }
249  for (size_t j = 0; j < bitBipL.size(); j++)
250  {
251  provBitBip = new int[nbint];
252  for (size_t k = 0; k < nbint; k++)
253  {
254  provBitBip[k] = bitBipL[j][k];
255  }
256  mergedBitBipL.push_back(provBitBip);
257  }
258  }
259 
260  mergedBipL = new BipartitionList(elements, mergedBitBipL);
261  return mergedBipL;
262 }
263 
264 /******************************************************************************/
265 
266 VectorSiteContainer* BipartitionTools::MRPEncode(
267  const vector<BipartitionList*>& vecBipartL) throw (Exception)
268 {
269  vector<string> all_elements;
270  map<string, bool> bip;
271  vector<string> bip_elements;
272  const DNA* alpha = &AlphabetTools::DNA_ALPHABET;
273  vector<string> sequences;
274 
275  if (vecBipartL.size() == 0)
276  throw Exception("Empty vector passed");
277 
278  vector< vector<string> > vecElementLists;
279  for (size_t i = 0; i < vecBipartL.size(); i++)
280  {
281  vecElementLists.push_back(vecBipartL[i]->getElementNames());
282  }
283 
284  all_elements = VectorTools::vectorUnion(vecElementLists);
285 
286  sequences.resize(all_elements.size());
287 
288  for (size_t i = 0; i < vecBipartL.size(); i++)
289  {
290  for (size_t j = 0; j < vecBipartL[i]->getNumberOfBipartitions(); j++)
291  {
292  bip = vecBipartL[i]->getBipartition(j);
293  bip_elements = MapTools::getKeys(bip);
294 
295  for (size_t k = 0; k < all_elements.size(); k++)
296  {
297  if (VectorTools::contains(bip_elements, all_elements[k]))
298  {
299  if (bip[all_elements[k]])
300  sequences[k].push_back('C');
301  else
302  sequences[k].push_back('A');
303  }
304  else
305  sequences[k].push_back('N');
306  }
307  }
308  }
309 
310  vector<const Sequence*> vec_sequences;
311  for (size_t i = 0; i < all_elements.size(); i++)
312  {
313  const Sequence* seq = new BasicSequence(all_elements[i], sequences[i], alpha);
314  vec_sequences.push_back(seq);
315  }
316 
317  VectorSequenceContainer vec_seq_cont(vec_sequences, alpha);
318  for (size_t i = 0; i < all_elements.size(); i++)
319  {
320  delete vec_sequences[i];
321  }
322 
323  VectorSiteContainer* vec_site_cont = new VectorSiteContainer(vec_seq_cont);
324 
325  return vec_site_cont;
326 }
327 
328 /******************************************************************************/
329 
331  const vector<BipartitionList*>& vecBipartL) throw (Exception)
332 {
333  vector<string> all_elements;
334  map<string, bool> bip;
335  vector<string> bip_elements;
336  const DNA* alpha = &AlphabetTools::DNA_ALPHABET;
337  vector<string> sequences;
338 
339  if (vecBipartL.size() == 0)
340  throw Exception("Empty vector passed");
341 
342  vector< vector<string> > vecElementLists;
343  for (size_t i = 0; i < vecBipartL.size(); i++)
344  {
345  vecElementLists.push_back(vecBipartL[i]->getElementNames());
346  }
347 
348  all_elements = VectorTools::vectorUnion(vecElementLists);
349 
350  sequences.resize(all_elements.size());
351 
352  for (size_t i = 0; i < vecBipartL.size(); i++)
353  {
354  for (size_t j = 0; j < vecBipartL[i]->getNumberOfBipartitions(); j++)
355  {
356  bip = vecBipartL[i]->getBipartition(j);
357  bip_elements = MapTools::getKeys(bip);
358  //Check for multilabel trees: if a taxa found on both sides, do not consider the entire bipartition
359  vector< string > zeroes;
360  vector< string > ones;
361  for (size_t k = 0; k < all_elements.size(); k++)
362  {
363  if (VectorTools::contains(bip_elements, all_elements[k]))
364  {
365  if (bip[all_elements[k]])
366  ones.push_back(all_elements[k]);
367  else
368  zeroes.push_back(all_elements[k]);
369  }
370  }
371  vector<string> inter = VectorTools::vectorIntersection(ones, zeroes);
372  if (inter.size() != 0) { //some taxa found on both sides of the bipartition
373  for (size_t k = 0; k < all_elements.size(); k++)
374  {
375  sequences[k].push_back('N');
376  }
377  }
378  else
379  {
380  for (size_t k = 0; k < all_elements.size(); k++)
381  {
382  if (VectorTools::contains(bip_elements, all_elements[k]))
383  {
384  if (bip[all_elements[k]])
385  sequences[k].push_back('C');
386  else
387  sequences[k].push_back('A');
388  }
389  else
390  sequences[k].push_back('N');
391  }
392  }
393  }
394  }
395 
396  vector<const Sequence*> vec_sequences;
397  for (size_t i = 0; i < all_elements.size(); i++)
398  {
399  const Sequence* seq = new BasicSequence(all_elements[i], sequences[i], alpha);
400  vec_sequences.push_back(seq);
401  }
402 
403  VectorSequenceContainer vec_seq_cont(vec_sequences, alpha);
404  for (size_t i = 0; i < all_elements.size(); i++)
405  {
406  delete vec_sequences[i];
407  }
408 
409  VectorSiteContainer* vec_site_cont = new VectorSiteContainer(vec_seq_cont);
410 
411  return vec_site_cont;
412 }
413 
414 /******************************************************************************/
415 
416 
static bool areIdentical(const BipartitionList &bipart1, size_t i1, const BipartitionList &bipart2, size_t i2, bool checkElements=true)
Tells whether two bipartitions from distinct lists are identical.
static bool areCompatible(const BipartitionList &bipart1, size_t i1, const BipartitionList &bipart2, size_t i2, bool checkElements=true)
Tells whether two bipartitions from distinct lists are compatible.
bool areIdentical(size_t k1, size_t k2) const
static void bitAnd(int *listet, int *list1, int *list2, size_t len)
bit-wise logical AND between two arrays of bits
static void bit0(int *list, int num)
Sets bit number num of bit array plist to zero.
static void bit1(int *list, int num)
Sets bit number num of bit array list to one.
bool areCompatible(size_t k1, size_t k2) const
Tells whether 2 bipartitions from the list are compatible.
STL namespace.
static BipartitionList * mergeBipartitionLists(const std::vector< BipartitionList *> &vecBipartL, bool checkElements=true)
Makes one BipartitionList out of several.
static int LWORD
Unit length (in bits) of arrays of bits. Must be a multiple of CHAR_BIT*sizeof(int). Default value is 64.
static void bitNot(int *listnon, int *list, size_t len)
bit-wise logical NOT
static bool testBit(int *list, int num)
Tells whether bit number num in bit array list is one.
static VectorSiteContainer * MRPEncode(const std::vector< BipartitionList *> &vecBipartL)
Create a sequence data set corresponding to the Matrix Representation of the input BipartitionList ob...
This class deals with the bipartitions defined by trees.
static VectorSiteContainer * MRPEncodeMultilabel(const std::vector< BipartitionList *> &vecBipartL)
Create a sequence data set corresponding to the Matrix Representation of the input BipartitionList ob...
static BipartitionList * buildBipartitionPair(const BipartitionList &bipartL1, size_t i1, const BipartitionList &bipartL2, size_t i2, bool checkElements=true)
Construct a BipartitionList containing two bipartitions taken from distinct input lists...
const std::vector< int * > & getBitBipartitionList() const
static void bitOr(int *listou, int *list1, int *list2, size_t len)
bit-wise logical OR between two arrays of bits