Bifrost
Kmer.hpp
Go to the documentation of this file.
1 #ifndef BIFROST_KMER_HPP
2 #define BIFROST_KMER_HPP
3 
4 #ifndef MAX_KMER_SIZE
5 #define MAX_KMER_SIZE 32
6 #endif
7 
8 #include <stdio.h>
9 #include <stdint.h>
10 #include <cassert>
11 #include <cstring>
12 #include <string>
13 #include <iostream>
14 
15 #include <bitset>
16 #include <string>
17 #include <iostream>
18 
19 #include "Common.hpp"
20 
30 class CompressedSequence;
31 
40 class Kmer {
41 
42  friend class CompressedSequence;
43 
44  public:
45 
48  Kmer();
49 
54  Kmer(const Kmer& o);
55 
60  explicit Kmer(const char* s);
61 
66  Kmer& operator=(const Kmer& o);
67 
70  inline void set_empty() { for (size_t i = 0; i < MAX_K/32; ++i) longs[i] = 0xffffffffffffffff; }
71 
74  inline void set_deleted() {
75 
76  for (size_t i = 0; i < MAX_K/32; ++i) longs[i] = 0xffffffffffffffff;
77  bytes[0] ^= 1;
78  }
79 
80  bool operator<(const Kmer& o) const;
81 
87  bool operator==(const Kmer& o) const;
88 
94  bool operator!=(const Kmer& o) const;
95 
100  BFG_INLINE uint64_t hash(const uint64_t seed = 0) const {
101 
102  return (uint64_t)XXH64((const void *)bytes, MAX_K/4, seed);
103  //return wyhash(bytes, MAX_K/4, seed);
104  }
105 
109  Kmer twin() const;
110 
114  Kmer rep() const;
115 
122  Kmer forwardBase(const char b) const;
123 
130  Kmer backwardBase(const char b) const;
131 
136  void selfForwardBase(const char b);
137 
142  char getChar(const size_t offset) const;
143 
150  bool setChar(const size_t offset, const char b);
151 
155  void toString(char *s) const;
156 
160  std::string toString() const;
161 
168  bool write(std::ostream& stream_out) const;
169 
176  bool read(std::istream& stream_in);
177 
182  static void set_k(const unsigned int _k);
183 
184  static unsigned int k;
185 
186  private:
187 
188  static const unsigned int MAX_K = MAX_KMER_SIZE;
189 
190  union {
191 
192  uint8_t bytes[MAX_K/4];
193  uint64_t longs[MAX_K/32];
194  };
195 
196  void set_kmer(const char *s);
197 
198  Kmer getLink(const size_t index) const;
199 
200  std::string getBinary() const;
201 };
202 
203 
204 struct KmerHash {
205 
206  size_t operator()(const Kmer& km) const {
207 
208  return km.hash();
209  }
210 };
211 
212 
213 
214 
216 class Minimizer {
217 
218  public:
219 
220  Minimizer();
221  Minimizer(const Minimizer& o);
222  explicit Minimizer(const char *s);
223 
224  Minimizer& operator=(const Minimizer& o);
225 
226  void set_empty();
227  void set_deleted();
228 
229  bool operator<(const Minimizer& o) const;
230  bool operator==(const Minimizer& o) const;
231  bool operator!=(const Minimizer& o) const;
232 
233  void set_minimizer(const char *s);
234 
235  BFG_INLINE uint64_t hash(const uint64_t seed = 0) const {
236 
237  return (uint64_t)XXH64((const void *)bytes, MAX_G/4, seed);
238  //return wyhash(bytes, MAX_G/4, seed);
239  }
240 
241  Minimizer twin() const;
242  Minimizer rep() const;
243 
244  Minimizer getLink(const size_t index) const;
245 
246  Minimizer forwardBase(const char b) const;
247  Minimizer backwardBase(const char b) const;
248 
249  std::string getBinary() const;
250 
251  void toString(char *s) const;
252  std::string toString() const;
253 
254  // static functions
255  static void set_g(unsigned int _g);
256 
257  static const unsigned int MAX_G = MAX_KMER_SIZE;
258  static unsigned int g;
259 
260  private:
261 
262  // data fields
263  union {
264 
265  uint8_t bytes[MAX_G/4];
266  uint64_t longs[MAX_G/32];
267  };
268 
269  //static unsigned int g_bytes;
270  //static unsigned int g_longs;
271  //static unsigned int g_modmask; // int?
272 
273  // By default MAX_K == 64 so the union uses 16 bytes
274  // However sizeof(Kmer) == 24
275  // Are the 8 extra bytes alignment?
276 
277  // private functions
278  //void shiftForward(int shift);
279 
280  //void shiftBackward(int shift);
281 };
282 
283 
284 struct MinimizerHash {
285 
286  size_t operator()(const Minimizer& minz) const {
287 
288  return minz.hash();
289  }
290 };
292 
293 #endif // BFG_KMER_HPP
Kmer::toString
std::string toString() const
Get the string of a k-mer.
Kmer::setChar
bool setChar(const size_t offset, const char b)
Set a character at a given position in a k-mer.
Kmer::hash
uint64_t hash(const uint64_t seed=0) const
Get the hash of a k-mer.
Definition: Kmer.hpp:100
Kmer::operator=
Kmer & operator=(const Kmer &o)
Copy assignment operator (copy a k-mer).
Kmer
Interface to store and manipulate k-mers.
Definition: Kmer.hpp:40
Kmer::set_k
static void set_k(const unsigned int _k)
Set the length of k-mers.
Kmer::selfForwardBase
void selfForwardBase(const char b)
Shift the current k-mer of one base on the left with one new character on the right.
Kmer::toString
void toString(char *s) const
Get the string of a k-mer.
KmerHash
Definition: Kmer.hpp:204
Kmer::operator==
bool operator==(const Kmer &o) const
Equality comparison operator.
Kmer::set_empty
void set_empty()
Set a k-mer as "empty".
Definition: Kmer.hpp:70
Kmer::read
bool read(std::istream &stream_in)
Read a k-mer (binary) from a stream.
Kmer::write
bool write(std::ostream &stream_out) const
Write a k-mer (binary) to a stream.
Kmer::rep
Kmer rep() const
Get the canonical k-mer (lexicographically smallest between a k-mer and its reverse-complement).
Kmer::set_deleted
void set_deleted()
Set a k-mer as "deleted".
Definition: Kmer.hpp:74
Kmer::Kmer
Kmer(const Kmer &o)
Copy constructor (copy a k-mer).
Kmer::operator!=
bool operator!=(const Kmer &o) const
Inequality comparison operator.
Kmer::backwardBase
Kmer backwardBase(const char b) const
Get a new k-mer which is the shift of the current k-mer of one base on the right with one new charact...
Kmer::getChar
char getChar(const size_t offset) const
Get the character at a given position in a k-mer.
Kmer::forwardBase
Kmer forwardBase(const char b) const
Get a new k-mer which is the shift of the current k-mer of one base on the left with one new characte...
Kmer::Kmer
Kmer()
Constructor (initialize a k-mer with 'A' k times).
Kmer::Kmer
Kmer(const char *s)
Constructor.
Kmer::twin
Kmer twin() const
Get the reverse-complement of a k-mer.