Bifrost
Kmer.hpp
Go to the documentation of this file.
1 #ifndef BIFROST_KMER_HPP
2 #define BIFROST_KMER_HPP
3 
4 #ifndef MAX_KMER_SIZE
5 #define MAX_KMER_SIZE 32
6 #endif
7 
8 #ifndef MAX_GMER_SIZE
9 #define MAX_GMER_SIZE MAX_KMER_SIZE
10 #endif
11 
12 #include <stdint.h>
13 #include <stdio.h>
14 
15 #include <bitset>
16 #include <cassert>
17 #include <cstring>
18 #include <iostream>
19 #include <string>
20 
21 #include "Common.hpp"
22 
32 class CompressedSequence;
33 
42 class Kmer {
43 
44  friend class CompressedSequence;
45 
46  public:
47 
50  Kmer();
51 
56  Kmer(const Kmer& o);
57 
62  explicit Kmer(const char* s);
63 
68  Kmer& operator=(const Kmer& o);
69 
72  BFG_INLINE void set_deleted() {
73 
74  for (size_t i = 0; i < MAX_K/32; ++i) longs[i] = 0xffffffffffffffffULL;
75  }
76 
79  BFG_INLINE void set_empty() {
80 
81  for (size_t i = 0; i < MAX_K/32; ++i) longs[i] = 0xfffffffffffffffeULL;
82  }
83 
87  BFG_INLINE bool isDeleted() const {
88 
89  return (longs[(MAX_K/32)-1] == 0xffffffffffffffffULL);
90  }
91 
95  BFG_INLINE bool isEmpty() const {
96 
97  return (longs[(MAX_K/32)-1] == 0xfffffffffffffffeULL);
98  }
99 
100  bool operator<(const Kmer& o) const;
101 
107  bool operator==(const Kmer& o) const;
108 
114  bool operator!=(const Kmer& o) const;
115 
120  BFG_INLINE uint64_t hash(const uint64_t seed = 0) const {
121 
122  return wyhash(bytes, MAX_K/4, seed, _wyp);
123  }
124 
128  Kmer twin() const;
129 
133  Kmer rep() const;
134 
141  Kmer forwardBase(const char b) const;
142 
149  Kmer backwardBase(const char b) const;
150 
155  void selfForwardBase(const char b);
156 
161  char getChar(const size_t offset) const;
162 
169  bool setChar(const size_t offset, const char b);
170 
174  void toString(char *s) const;
175 
179  std::string toString() const;
180 
187  bool write(std::ostream& stream_out) const;
188 
195  bool read(std::istream& stream_in);
196 
201  static void set_k(const unsigned int _k);
202 
203  static unsigned int k;
204 
205  private:
206 
207  static const unsigned int MAX_K = MAX_KMER_SIZE;
208 
209  union {
210 
211  uint8_t bytes[MAX_K/4];
212  uint64_t longs[MAX_K/32];
213  };
214 
215  void set_kmer(const char *s);
216 
217  Kmer getLink(const size_t index) const;
218 
219  std::string getBinary() const;
220 };
221 
222 
223 struct KmerHash {
224 
225  inline size_t operator()(const Kmer& km) const {
226 
227  return km.hash();
228  }
229 };
230 
232 class Minimizer {
233 
234  public:
235 
236  Minimizer();
237  Minimizer(const Minimizer& o);
238  explicit Minimizer(const char *s);
239 
240  Minimizer& operator=(const Minimizer& o);
241 
242  bool operator<(const Minimizer& o) const;
243  bool operator==(const Minimizer& o) const;
244  bool operator!=(const Minimizer& o) const;
245 
246  void set_minimizer(const char *s);
247 
248  BFG_INLINE uint64_t hash(const uint64_t seed = 0) const {
249 
250  return wyhash(bytes, MAX_G/4, seed, _wyp);
251  }
252 
253  BFG_INLINE void set_deleted() {
254 
255  for (size_t i = 0; i < MAX_G/32; ++i) longs[i] = 0xffffffffffffffffULL;
256  }
257 
258  BFG_INLINE void set_empty() {
259 
260  for (size_t i = 0; i < MAX_G/32; ++i) longs[i] = 0xfffffffffffffffeULL;
261  }
262 
263  BFG_INLINE bool isDeleted() const {
264 
265  return (longs[(MAX_G/32)-1] == 0xffffffffffffffffULL);
266  }
267 
268  BFG_INLINE bool isEmpty() const {
269 
270  return (longs[(MAX_G/32)-1] == 0xfffffffffffffffeULL);
271  }
272 
273  Minimizer twin() const;
274  Minimizer rep() const;
275 
276  Minimizer getLink(const size_t index) const;
277 
278  Minimizer forwardBase(const char b) const;
279  Minimizer backwardBase(const char b) const;
280 
281  std::string getBinary() const;
282 
283  void toString(char *s) const;
284  std::string toString() const;
285 
286  // static functions
287  static void set_g(unsigned int _g);
288 
289  static unsigned int g;
290 
291  private:
292 
293  static const unsigned int MAX_G = MAX_GMER_SIZE;
294 
295  // data fields
296  union {
297 
298  uint8_t bytes[MAX_G/4];
299  uint64_t longs[MAX_G/32];
300  };
301 
302  // By default MAX_K == 64 so the union uses 16 bytes
303  // However sizeof(Kmer) == 24
304  // Are the 8 extra bytes alignment?
305 };
306 
307 
308 struct MinimizerHash {
309 
310  inline size_t operator()(const Minimizer& minz) const {
311 
312  return minz.hash();
313  }
314 };
316 
317 #endif // BFG_KMER_HPP
Kmer::toString
std::string toString() const
Get the string of a k-mer.
Kmer::setChar
bool setChar(const size_t offset, const char b)
Set a character at a given position in a k-mer.
Kmer::hash
uint64_t hash(const uint64_t seed=0) const
Get the hash of a k-mer.
Definition: Kmer.hpp:120
Kmer::operator=
Kmer & operator=(const Kmer &o)
Copy assignment operator (copy a k-mer).
Kmer
Interface to store and manipulate k-mers.
Definition: Kmer.hpp:42
Kmer::set_k
static void set_k(const unsigned int _k)
Set the length of k-mers.
Kmer::isEmpty
bool isEmpty() const
Check whether a k-mer is "empty".
Definition: Kmer.hpp:95
Kmer::isDeleted
bool isDeleted() const
Check whether a k-mer is "deleted".
Definition: Kmer.hpp:87
Kmer::selfForwardBase
void selfForwardBase(const char b)
Shift the current k-mer of one base on the left with one new character on the right.
Kmer::toString
void toString(char *s) const
Get the string of a k-mer.
KmerHash
Definition: Kmer.hpp:223
Kmer::operator==
bool operator==(const Kmer &o) const
Equality comparison operator.
Kmer::set_empty
void set_empty()
Set a k-mer as "empty".
Definition: Kmer.hpp:79
Kmer::read
bool read(std::istream &stream_in)
Read a k-mer (binary) from a stream.
Kmer::write
bool write(std::ostream &stream_out) const
Write a k-mer (binary) to a stream.
Kmer::rep
Kmer rep() const
Get the canonical k-mer (lexicographically smallest between a k-mer and its reverse-complement).
Kmer::set_deleted
void set_deleted()
Set a k-mer as "deleted".
Definition: Kmer.hpp:72
Kmer::Kmer
Kmer(const Kmer &o)
Copy constructor (copy a k-mer).
Kmer::operator!=
bool operator!=(const Kmer &o) const
Inequality comparison operator.
Kmer::backwardBase
Kmer backwardBase(const char b) const
Get a new k-mer which is the shift of the current k-mer of one base on the right with one new charact...
Kmer::getChar
char getChar(const size_t offset) const
Get the character at a given position in a k-mer.
Kmer::forwardBase
Kmer forwardBase(const char b) const
Get a new k-mer which is the shift of the current k-mer of one base on the left with one new characte...
Kmer::Kmer
Kmer()
Constructor (initialize a k-mer with 'A' k times).
Kmer::Kmer
Kmer(const char *s)
Constructor.
Kmer::twin
Kmer twin() const
Get the reverse-complement of a k-mer.