Bifrost
ColorSet.hpp
Go to the documentation of this file.
1 #ifndef BIFROST_COLORSET_HPP
2 #define BIFROST_COLORSET_HPP
3 
4 #include "roaring.hh"
5 
6 #include "CompactedDBG.hpp"
7 #include "TinyBitmap.hpp"
8 
14 template<typename Unitig_data_t> class ColoredCDBG;
15 template<typename Unitig_data_t> class DataAccessor;
16 template<typename Unitig_data_t> class DataStorage;
17 
21 class UnitigColors {
22 
23  //Ensure that UnitigColors::setPtrBmp is always allocated with an 8 bytes alignment
24  struct alignas(8) Bitmap { Roaring r; };
25 
26  template<typename U> friend class ColoredCDBG;
27  template<typename U> friend class DataAccessor;
28  template<typename U> friend class DataStorage;
29 
30  public:
31 
32  typedef pair<UnitigColors, size_t> SharedUnitigColors;
33 
37  class UnitigColors_const_iterator : public std::iterator<std::forward_iterator_tag, pair<size_t, size_t>> {
38 
39  friend class UnitigColors;
40 
41  public:
42 
48 
55 
60 
67 
73  inline pair<size_t, size_t> operator*() const {
74 
75  return make_pair(ck_id % um_sz, ck_id / um_sz);
76  }
77 
82  inline size_t getKmerPosition() const { return ck_id % um_sz; }
83 
88  inline size_t getColorID() const { return ck_id / um_sz; }
89 
97 
105 
111 
117 
123 
124  private:
125 
126  const UnitigColors* cs;
127 
128  size_t flag;
129 
130  size_t it_setBits;
131  size_t cs_sz;
132  size_t um_sz;
133 
134  size_t start_pos;
135  size_t end_pos;
136 
137  uint64_t ck_id;
138 
139  const Roaring empty_roar;
140 
141  TinyBitmap t_bmp;
142 
144  Roaring::const_iterator it_roar;
145  TinyBitmap::const_iterator it_t_bmp;
146 
147  UnitigColors_const_iterator(const UnitigColors* cs_, const size_t start_pos_, const size_t end_pos_,
148  const size_t len_unitig_km, const bool beg);
149 
150  inline uint64_t get_ID() const { return ck_id; }
151 
152  inline bool isInvalid() const {
153 
154  return ((ck_id == 0xffffffffffffffff) || (it_setBits == cs_sz));
155  }
156  };
157 
165 
170 
176  UnitigColors(const UnitigColors& o); // Copy constructor
177 
183  UnitigColors(UnitigColors&& o); // Move constructor
184 
189 
197 
205 
206  //bool operator==(const UnitigColors& o) const;
207  // inline bool operator!=(const UnitigColors& o) const { return !operator==(o);
208 
212  void clear();
213 
223  bool isEqual(const UnitigMapBase& um, const UnitigColors& o, const UnitigMapBase& um_o) const;
224 
229  inline bool isEmpty() const { return (size() == 0); }
230 
237  void add(const UnitigMapBase& um, const size_t color_id);
238 
245  void remove(const UnitigMapBase& um, const size_t color_id);
246 
255  bool contains(const UnitigMapBase& um, const size_t color_id) const;
256 
261  size_t size(const UnitigMapBase& um) const;
262 
269  size_t size(const UnitigMapBase& um, const size_t color_id) const;
270 
276  size_t colorMax(const UnitigMapBase& um) const;
277 
284  bool write(ostream& stream_out, const bool copy_UnitigColors = true) const;
285 
292  bool read(istream& stream_in);
293 
298  size_t getSizeInBytes() const;
299 
307 
315 
323 
324  uint64_t hash(const size_t seed = 0) const;
325 
326  private:
327 
328  UnitigColors(SharedUnitigColors& o);
329  UnitigColors(const UnitigColors& o, const SharedUnitigColors* old_ref_uc, const SharedUnitigColors* new_ref_uc);
330 
331  UnitigColors& operator=(SharedUnitigColors& o);
332 
333  void add(const size_t color_id);
334  bool contains(const size_t color_km_id) const;
335 
336  inline void releaseMemory(){
337 
338  const uintptr_t flag = setBits & flagMask;
339 
340  if (flag == ptrUnitigColors) delete[] getPtrUnitigColors();
341  else if (flag == ptrBitmap) delete getPtrBitmap();
342  else if (flag == ptrSharedUnitigColors){
343 
344  SharedUnitigColors* s_uc = getPtrSharedUnitigColors();
345 
346  if (--(s_uc->second) == 0) s_uc->first.clear();
347  }
348  else if (flag == localTinyBitmap){
349 
350  uint16_t* setPtrTinyBmp = getPtrTinyBitmap();
351  TinyBitmap t_bmp(&setPtrTinyBmp);
352 
353  t_bmp.clear();
354  }
355 
356  setBits = localBitVector;
357  }
358 
359  inline void shrinkSize(){
360 
361  const uintptr_t flag = setBits & flagMask;
362 
363  if (flag == ptrUnitigColors){
364 
365  UnitigColors* uc = getPtrUnitigColors();
366 
367  uc[0].shrinkSize();
368  uc[1].shrinkSize();
369  }
370  else if (flag == ptrBitmap) getPtrBitmap()->r.shrinkToFit();
371  else if (flag == localTinyBitmap){
372 
373  uint16_t* setPtrTinyBmp = getPtrTinyBitmap();
374  TinyBitmap t_bmp(&setPtrTinyBmp);
375 
376  t_bmp.shrinkSize();
377 
378  setBits = (reinterpret_cast<uintptr_t>(t_bmp.detach()) & pointerMask) | localTinyBitmap;
379  }
380  }
381 
382  UnitigColors makeFullColors(const UnitigMapBase& um) const;
383  UnitigColors getFullColors(const UnitigMapBase& um) const;
384  UnitigColors getNonFullColors(const UnitigMapBase& um, const UnitigColors& full_uc) const;
385 
386  inline UnitigColors* getFullColorsPtr() {
387 
388  return (isUnitigColors() ? getPtrUnitigColors() : nullptr);
389  }
390 
391  inline const UnitigColors* getFullColorsPtr() const {
392 
393  return (isUnitigColors() ? getPtrUnitigColors() : nullptr);
394  }
395 
396  inline bool isBitmap() const { return ((setBits & flagMask) == ptrBitmap); }
397  inline bool isTinyBitmap() const { return ((setBits & flagMask) == localTinyBitmap); }
398  inline bool isUnitigColors() const { return ((setBits & flagMask) == ptrUnitigColors); }
399  inline bool isSharedUnitigColors() const { return ((setBits & flagMask) == ptrSharedUnitigColors); }
400 
401  size_t size() const;
402 
403  UnitigColors reverse(const UnitigMapBase& um) const;
404 
405  const_iterator begin(const size_t start_pos, const size_t end_pos, const size_t len_km_sz) const;
406 
407  inline Bitmap* getPtrBitmap() const {
408 
409  return reinterpret_cast<Bitmap*>(setBits & pointerMask);
410  }
411 
412  inline const Bitmap* getConstPtrBitmap() const {
413 
414  return reinterpret_cast<const Bitmap*>(setBits & pointerMask);
415  }
416 
417  inline uint16_t* getPtrTinyBitmap() const {
418 
419  return reinterpret_cast<uint16_t*>(setBits & pointerMask);
420  }
421 
422  inline UnitigColors* getPtrUnitigColors() const {
423 
424  return reinterpret_cast<UnitigColors*>(setBits & pointerMask);
425  }
426 
427  inline const UnitigColors* getConstPtrUnitigColors() const {
428 
429  return reinterpret_cast<const UnitigColors*>(setBits & pointerMask);
430  }
431 
432  inline SharedUnitigColors* getPtrSharedUnitigColors() const {
433 
434  return reinterpret_cast<SharedUnitigColors*>(setBits & pointerMask);
435  }
436 
437  inline const SharedUnitigColors* getConstPtrSharedUnitigColors() const {
438 
439  return reinterpret_cast<const SharedUnitigColors*>(setBits & pointerMask);
440  }
441 
442  static const size_t maxBitVectorIDs; // 64 bits - 3 bits for the color set type = 61
443  static const size_t shiftMaskBits; // 3 bits
444 
445  // asBits and asPointer represent:
446  // Flag 0 - A TinyBitmap which can contain up to 65488 uint
447  // Flag 1 - A bit vector of 62 bits storing presence/absence of up to 62 integers
448  // Flag 2 - A single integer
449  // Flag 3 - A pointer to a CRoaring compressed bitmap which can contain up to 2^32 uint
450  // Flag 4 - A pointer to an array of 2 UnitigColors:
451  // 1 - Contains "full" colors -> color is present on ALL k-mers of the unitig
452  // 2 - Contains colors for k-mers if NOT full colors
453  // Flag 5 - A pointer to a pair (UnitigColors, size_t) shared by multiple UnitigColors
454 
455  static const uintptr_t localTinyBitmap; // Flag 0
456  static const uintptr_t localBitVector; // Flag 1
457  static const uintptr_t localSingleInt; // Flag 2
458  static const uintptr_t ptrBitmap; // Flag 3
459  static const uintptr_t ptrUnitigColors; // Flag 4
460  static const uintptr_t ptrSharedUnitigColors; // Flag 5
461 
462  static const uintptr_t flagMask; // 0x7 (= 2^shiftMaskBits - 1)
463  static const uintptr_t pointerMask; // 0xfffffffffffffff8 (= 2^64 - 1 - flagMask)
464 
465  uintptr_t setBits;
466 };
467 
469 
470  size_t operator()(const UnitigColors& uc) const {
471 
472  return uc.hash();
473  }
474 };
475 
476 #endif
UnitigColors::isEqual
bool isEqual(const UnitigMapBase &um, const UnitigColors &o, const UnitigMapBase &um_o) const
Check if two UnitigColors are equal.
UnitigColors::UnitigColors_const_iterator::operator==
bool operator==(const UnitigColors_const_iterator &o) const
Equality operator.
UnitigColors::end
const_iterator end() const
Create a constant iterator to the "past-the-last" pair (k-mer position, color) of the UnitigColors.
UnitigColors::UnitigColors_const_iterator::operator++
UnitigColors_const_iterator operator++(int)
Postfix increment operator: it iterates over the next k-mer of the unitig having the current color or...
UnitigColors::UnitigColors_const_iterator::operator++
UnitigColors_const_iterator & operator++()
Prefix increment operator: it iterates over the next k-mer of the unitig having the current color or ...
UnitigColors::UnitigColors_const_iterator::getKmerPosition
size_t getKmerPosition() const
Get the k-mer position of the k-mer visited by the iterator.
Definition: ColorSet.hpp:82
UnitigColors::read
bool read(istream &stream_in)
Read a UnitigColors from a stream.
UnitigColors::colorMax
size_t colorMax(const UnitigMapBase &um) const
Get the largest color index of all k-mers of a reference unitig.
UnitigColors::UnitigColors_const_iterator::operator*
pair< size_t, size_t > operator*() const
Indirection operator.
Definition: ColorSet.hpp:73
UnitigColors::write
bool write(ostream &stream_out, const bool copy_UnitigColors=true) const
Write a UnitigColors to a stream.
UnitigColors::optimizeFullColors
bool optimizeFullColors(const UnitigMapBase &um)
If possible, decrease the memory usage of the UnitigColors by optimizing the memory for "full colors"...
CompactedDBG.hpp
Interface for the Compacted de Bruijn graph API.
UnitigColors::remove
void remove(const UnitigMapBase &um, const size_t color_id)
Remove a color in the current UnitigColors for all k-mers of a unitig mapping.
UnitigColors::UnitigColors_const_iterator::nextColor
UnitigColors_const_iterator & nextColor()
Color increment operator: it iterates over the first k-mer position of the next color.
UnitigColors::size
size_t size(const UnitigMapBase &um, const size_t color_id) const
Get the number of k-mers of a reference unitig having a given color.
UnitigColors::add
void add(const UnitigMapBase &um, const size_t color_id)
Add a color in the current UnitigColors to all k-mers of a unitig mapping.
UnitigColors
Represent the k-mer color sets of a unitig.
Definition: ColorSet.hpp:21
UnitigColors::UnitigColors
UnitigColors()
Constructor (set up an empty container of k-mer color sets).
UnitigColors::~UnitigColors
~UnitigColors()
Destructor.
UnitigColors::UnitigColors_const_iterator
See UnitigColors::const_iterator.
Definition: ColorSet.hpp:37
UnitigColors::contains
bool contains(const UnitigMapBase &um, const size_t color_id) const
Check if a color is present on all k-mers of a unitig mapping.
UnitigColors::operator=
UnitigColors & operator=(UnitigColors &&o)
Move assignment operator.
UnitigColors::size
size_t size(const UnitigMapBase &um) const
Get the number of pairs (k-mer position, color) of a reference unitig.
UnitigColors::UnitigColors_const_iterator::~UnitigColors_const_iterator
~UnitigColors_const_iterator()
Destructor.
UnitigColors::UnitigColors_const_iterator::operator!=
bool operator!=(const UnitigColors_const_iterator &o) const
Inequality operator.
UnitigColors::UnitigColors_const_iterator::operator=
UnitigColors_const_iterator & operator=(const UnitigColors_const_iterator &o)
Copy assignment operator.
UnitigColors::getSizeInBytes
size_t getSizeInBytes() const
Size of the UnitigColors in bytes.
UnitigColors::isEmpty
bool isEmpty() const
Check if a UnitigColors is empty (no colors).
Definition: ColorSet.hpp:229
UnitigColors::begin
const_iterator begin(const UnitigMapBase &um) const
Create a constant iterator on all pairs (k-mer position, color) of the UnitigColors.
UnitigColors::UnitigColors_const_iterator::UnitigColors_const_iterator
UnitigColors_const_iterator()
Constructor of an empty iterator.
DataAccessor
Interface to access the colors and the data associated with a unitig of a ColoredCDBG.
Definition: DataAccessor.hpp:21
UnitigColors::UnitigColors_const_iterator::getColorID
size_t getColorID() const
Get the color of the k-mer visited by the iterator.
Definition: ColorSet.hpp:88
UnitigMapBase
Structure containing the basic information of a unitig mapping.
Definition: UnitigMap.hpp:33
UnitigColors::UnitigColors
UnitigColors(const UnitigColors &o)
Copy constructor.
UnitigColorsHash
Definition: ColorSet.hpp:468
UnitigColors::UnitigColors_const_iterator::UnitigColors_const_iterator
UnitigColors_const_iterator(const UnitigColors_const_iterator &o)
Copy constructor.
DataStorage
Definition: ColorSet.hpp:16
UnitigColors::UnitigColors
UnitigColors(UnitigColors &&o)
Move constructor.
UnitigColors::clear
void clear()
Empty a UnitigColors of its content.
UnitigColors::const_iterator
UnitigColors_const_iterator const_iterator
Iterator for the colors of a unitig.
Definition: ColorSet.hpp:164
UnitigColors::operator=
UnitigColors & operator=(const UnitigColors &o)
Copy assignment operator.
ColoredCDBG
Represent a Colored and Compacted de Bruijn graph.
Definition: ColoredCDBG.hpp:151