Bifrost
Kmer.hpp
Go to the documentation of this file.
1 #ifndef BIFROST_KMER_HPP
2 #define BIFROST_KMER_HPP
3 
4 #ifndef MAX_KMER_SIZE
5 #define MAX_KMER_SIZE 32
6 #endif
7 
8 #ifndef MAX_GMER_SIZE
9 #define MAX_GMER_SIZE MAX_KMER_SIZE
10 #endif
11 
12 #include <stdint.h>
13 #include <stdio.h>
14 
15 #include <bitset>
16 #include <cassert>
17 #include <cstring>
18 #include <iostream>
19 #include <string>
20 
21 #include "Common.hpp"
22 
32 class CompressedSequence;
33 
42 class Kmer {
43 
44  friend class CompressedSequence;
45 
46  public:
47 
50  Kmer();
51 
56  Kmer(const Kmer& o);
57 
62  explicit Kmer(const char* s);
63 
68  Kmer& operator=(const Kmer& o);
69 
72  BFG_INLINE void set_deleted() {
73 
74  for (size_t i = 0; i < MAX_K/32; ++i) longs[i] = 0xffffffffffffffffULL;
75  }
76 
79  BFG_INLINE void set_empty() {
80 
81  for (size_t i = 0; i < MAX_K/32; ++i) longs[i] = 0xfffffffffffffffeULL;
82  }
83 
87  BFG_INLINE bool isDeleted() const {
88 
89  return (longs[(MAX_K/32)-1] == 0xffffffffffffffffULL);
90  }
91 
95  BFG_INLINE bool isEmpty() const {
96 
97  return (longs[(MAX_K/32)-1] == 0xfffffffffffffffeULL);
98  }
99 
100  bool operator<(const Kmer& o) const;
101 
107  bool operator==(const Kmer& o) const;
108 
114  bool operator!=(const Kmer& o) const;
115 
120  BFG_INLINE uint64_t hash(const uint64_t seed = 0) const {
121 
122  return wyhash(bytes, MAX_K/4, seed, _wyp);
123  }
124 
128  Kmer twin() const;
129 
133  Kmer rep() const;
134 
141  Kmer forwardBase(const char b) const;
142 
149  Kmer backwardBase(const char b) const;
150 
155  void selfForwardBase(const char b);
156 
161  char getChar(const size_t offset) const;
162 
169  bool setChar(const size_t offset, const char b);
170 
174  void toString(char *s) const;
175 
179  std::string toString() const;
180 
187  bool write(std::ostream& stream_out) const;
188 
195  bool read(std::istream& stream_in);
196 
201  static void set_k(const unsigned int _k);
202 
203  static unsigned int k;
204 
205  private:
206 
207  static const unsigned int MAX_K = MAX_KMER_SIZE;
208 
209  union {
210 
211  uint8_t bytes[MAX_K/4];
212  uint64_t longs[MAX_K/32];
213  };
214 
215  void set_kmer(const char *s);
216 
217  Kmer getLink(const size_t index) const;
218 
219  std::string getBinary() const;
220 };
221 
222 
223 struct KmerHash {
224 
225  inline size_t operator()(const Kmer& km) const {
226 
227  return km.hash();
228  }
229 };
230 
232 class Minimizer {
233 
234  friend class CompressedSequence;
235 
236  public:
237 
238  Minimizer();
239  Minimizer(const Minimizer& o);
240  explicit Minimizer(const char *s);
241 
242  Minimizer& operator=(const Minimizer& o);
243 
244  bool operator<(const Minimizer& o) const;
245  bool operator==(const Minimizer& o) const;
246  bool operator!=(const Minimizer& o) const;
247 
248  void set_minimizer(const char *s);
249 
250  BFG_INLINE uint64_t hash(const uint64_t seed = 0) const {
251 
252  return wyhash(bytes, MAX_G/4, seed, _wyp);
253  }
254 
255  BFG_INLINE void set_deleted() {
256 
257  for (size_t i = 0; i < MAX_G/32; ++i) longs[i] = 0xffffffffffffffffULL;
258  }
259 
260  BFG_INLINE void set_empty() {
261 
262  for (size_t i = 0; i < MAX_G/32; ++i) longs[i] = 0xfffffffffffffffeULL;
263  }
264 
265  BFG_INLINE bool isDeleted() const {
266 
267  return (longs[(MAX_G/32)-1] == 0xffffffffffffffffULL);
268  }
269 
270  BFG_INLINE bool isEmpty() const {
271 
272  return (longs[(MAX_G/32)-1] == 0xfffffffffffffffeULL);
273  }
274 
275  Minimizer twin() const;
276  Minimizer rep() const;
277 
278  Minimizer getLink(const size_t index) const;
279 
280  Minimizer forwardBase(const char b) const;
281  Minimizer backwardBase(const char b) const;
282 
283  std::string getBinary() const;
284 
285  void toString(char *s) const;
286  std::string toString() const;
287 
288  bool write(std::ostream& stream_out) const;
289  bool read(std::istream& stream_in);
290 
291  // static functions
292  static void set_g(unsigned int _g);
293 
294  static unsigned int g;
295 
296  private:
297 
298  static const unsigned int MAX_G = MAX_GMER_SIZE;
299 
300  // data fields
301  union {
302 
303  uint8_t bytes[MAX_G/4];
304  uint64_t longs[MAX_G/32];
305  };
306 
307  // By default MAX_K == 64 so the union uses 16 bytes
308  // However sizeof(Kmer) == 24
309  // Are the 8 extra bytes alignment?
310 };
311 
312 
313 struct MinimizerHash {
314 
315  inline size_t operator()(const Minimizer& minz) const {
316 
317  return minz.hash();
318  }
319 };
321 
322 #endif // BFG_KMER_HPP
Kmer::toString
std::string toString() const
Get the string of a k-mer.
Kmer::setChar
bool setChar(const size_t offset, const char b)
Set a character at a given position in a k-mer.
Kmer::hash
uint64_t hash(const uint64_t seed=0) const
Get the hash of a k-mer.
Definition: Kmer.hpp:120
Kmer::operator=
Kmer & operator=(const Kmer &o)
Copy assignment operator (copy a k-mer).
Kmer
Interface to store and manipulate k-mers.
Definition: Kmer.hpp:42
Kmer::set_k
static void set_k(const unsigned int _k)
Set the length of k-mers.
Kmer::isEmpty
bool isEmpty() const
Check whether a k-mer is "empty".
Definition: Kmer.hpp:95
Kmer::isDeleted
bool isDeleted() const
Check whether a k-mer is "deleted".
Definition: Kmer.hpp:87
Kmer::selfForwardBase
void selfForwardBase(const char b)
Shift the current k-mer of one base on the left with one new character on the right.
Kmer::toString
void toString(char *s) const
Get the string of a k-mer.
KmerHash
Definition: Kmer.hpp:223
Kmer::operator==
bool operator==(const Kmer &o) const
Equality comparison operator.
Kmer::set_empty
void set_empty()
Set a k-mer as "empty".
Definition: Kmer.hpp:79
Kmer::read
bool read(std::istream &stream_in)
Read a k-mer (binary) from a stream.
Kmer::write
bool write(std::ostream &stream_out) const
Write a k-mer (binary) to a stream.
Kmer::rep
Kmer rep() const
Get the canonical k-mer (lexicographically smallest between a k-mer and its reverse-complement).
Kmer::set_deleted
void set_deleted()
Set a k-mer as "deleted".
Definition: Kmer.hpp:72
Kmer::Kmer
Kmer(const Kmer &o)
Copy constructor (copy a k-mer).
Kmer::operator!=
bool operator!=(const Kmer &o) const
Inequality comparison operator.
Kmer::backwardBase
Kmer backwardBase(const char b) const
Get a new k-mer which is the shift of the current k-mer of one base on the right with one new charact...
Kmer::getChar
char getChar(const size_t offset) const
Get the character at a given position in a k-mer.
Kmer::forwardBase
Kmer forwardBase(const char b) const
Get a new k-mer which is the shift of the current k-mer of one base on the left with one new characte...
Kmer::Kmer
Kmer()
Constructor (initialize a k-mer with 'A' k times).
Kmer::Kmer
Kmer(const char *s)
Constructor.
Kmer::twin
Kmer twin() const
Get the reverse-complement of a k-mer.