Bifrost
UnitigMap.hpp
Go to the documentation of this file.
1 #ifndef BIFROST_UNITIGMAP_HPP
2 #define BIFROST_UNITIGMAP_HPP
3 
4 #include <string>
5 #include "Common.hpp"
6 #include "Kmer.hpp"
7 
13 template<typename U> class Unitig;
14 template<typename U, typename G> class CompactedDBG;
15 template<typename U, typename G, bool is_const> class BackwardCDBG;
16 template<typename U, typename G, bool is_const> class ForwardCDBG;
17 template<typename U, typename G, bool is_const> class neighborIterator;
18 
33 struct UnitigMapBase {
34 
39  UnitigMapBase(const size_t length = 1);
40 
48  UnitigMapBase(const size_t start, const size_t length, const size_t unitig_sz, const bool strand);
49 
53  bool operator==(const UnitigMapBase& o) const;
54 
58  bool operator!=(const UnitigMapBase& o) const;
59 
60  size_t dist;
61  size_t len;
62  size_t size;
63 
64  bool strand;
65  bool isEmpty;
66 };
67 
91 template<typename Unitig_data_t = void, typename Graph_data_t = void, bool is_const = false>
92 class UnitigMap : public UnitigMapBase {
93 
94  typedef Unitig_data_t U;
95  typedef Graph_data_t G;
96 
97  template<typename U, typename G> friend class CompactedDBG;
98  template<typename U, typename G, bool C> friend class BackwardCDBG;
99  template<typename U, typename G, bool C> friend class ForwardCDBG;
100  template<typename U, typename G, bool C> friend class unitigIterator;
101  template<typename U, typename G, bool C> friend class UnitigMap;
102 
103  typedef typename std::conditional<is_const, const CompactedDBG<U, G>*, CompactedDBG<U, G>*>::type CompactedDBG_ptr_t;
104  typedef typename std::conditional<is_const, const U*, U*>::type Unitig_data_ptr_t;
105 
106  public:
107 
110 
116  UnitigMap(size_t length = 1, CompactedDBG_ptr_t cdbg_ = nullptr);
117 
126  UnitigMap(const size_t start, const size_t length, const size_t unitig_sz, const bool strand);
127 
131  bool operator==(const UnitigMap& o) const;
132 
136  bool operator!=(const UnitigMap& o) const;
137 
141  bool isSameReferenceUnitig(const UnitigMap& o) const;
142 
147  string mappedSequenceToString() const;
148 
153  string referenceUnitigToString() const;
154 
167  size_t lcp(const char* s, const size_t pos_s = 0, const size_t pos_um_seq = 0, const bool um_reversed = false) const;
168 
174 
180 
187  Kmer getUnitigKmer(const size_t pos) const;
188 
194 
200 
207  Kmer getMappedKmer(const size_t pos) const;
208 
213  UnitigMap<U, G, is_const> getKmerMapping(const size_t pos) const;
214 
222  Unitig_data_ptr_t getData() const;
223 
230 
237 
243  inline CompactedDBG_ptr_t getGraph() const { return cdbg; }
244 
245  operator UnitigMap<U, G, true>() const {
246 
247  UnitigMap<U, G, true> um(pos_unitig, dist, len, size, isShort, isAbundant, strand, cdbg);
248 
249  um.isEmpty = isEmpty;
250 
251  return um;
252  }
253 
254  void setFullCoverage() const;
255  void increaseCoverage() const;
256  void decreaseCoverage() const;
257 
258  bool isCoverageFull() const;
259  size_t getCoverage(const size_t pos) const;
260 
261  private:
262 
263  UnitigMap(size_t p_unitig, size_t i, size_t l, size_t sz, bool short_, bool abundance, bool strd, CompactedDBG_ptr_t cdbg_);
264 
265  neighborIterator<U, G, is_const> bw_begin() const;
266  neighborIterator<U, G, is_const> bw_end() const;
267 
268  neighborIterator<U, G, is_const> fw_begin() const;
269  neighborIterator<U, G, is_const> fw_end() const;
270 
271  template<bool is_void> typename std::enable_if<!is_void, Unitig<U>>::type splitData_(const bool last_split) const;
272  template<bool is_void> typename std::enable_if<is_void, Unitig<U>>::type splitData_(const bool last_split) const;
273 
274  Unitig<U> splitData(const bool last_split) const;
275 
276  template<bool is_void> typename std::enable_if<!is_void, Unitig_data_ptr_t>::type getData_() const;
277  template<bool is_void> typename std::enable_if<is_void, Unitig_data_ptr_t>::type getData_() const;
278 
279  void partialCopy(const UnitigMap<U, G, is_const>& um);
280 
281  size_t pos_unitig; // unitig pos. in v_unitigs or km_unitigs or h_kmers
282 
283  bool isShort; // true if the unitig has length k
284  bool isAbundant; // true if the unitig has length k and has an abundant minimizer
285 
286  CompactedDBG_ptr_t cdbg;
287 };
288 
289 template<typename Unitig_data_t = void, typename Graph_data_t = void, bool is_const = false>
291 
292  typedef Unitig_data_t U;
293  typedef Graph_data_t G;
294 
295  size_t operator()(const UnitigMap<U, G, is_const>& um) const {
296 
297  struct UnitigMapTMP {
298 
299  size_t pos_unitig; // unitig pos. in v_unitigs or km_unitigs or h_kmers
300  size_t dist;
301  size_t len;
302  size_t size;
303 
304  bool strand;
305  bool isEmpty;
306 
307  bool isShort; // true if the unitig has length k
308  bool isAbundant; // true if the unitig has length k and has an abundant minimizer
309 
310  const void* cdbg;
311 
312  UnitigMapTMP(const UnitigMap<U, G, is_const>& um) : pos_unitig(um.pos_unitig), dist(um.dist), len(um.len), size(um.size),
313  strand(um.strand), isEmpty(um.isEmpty), isShort(um.isShort),
314  isAbundant(um.isAbundant), cdbg(static_cast<const void*>(um.cdbg)) {};
315  };
316 
317  UnitigMapTMP tmp(um);
318 
319  //return static_cast<size_t>(XXH64(static_cast<const void*>(&tmp), sizeof(UnitigMapTMP), 0));
320  return static_cast<size_t>(wyhash(&tmp, sizeof(UnitigMapTMP), 0, _wyp));
321  }
322 };
323 
324 #include "UnitigMap.tcc"
325 
326 #endif
UnitigMap::getUnitigKmer
Kmer getUnitigKmer(const size_t pos) const
Get the k-mer starting at position pos in the reference unitig used for the mapping.
UnitigMap::getSuccessors
UnitigMap_FW getSuccessors() const
Create a UnitigMap_FW object that can create iterators (through UnitigMap_FW::begin() and UnitigMap_F...
UnitigMap::referenceUnitigToString
string referenceUnitigToString() const
Create a string containing the sequence of the reference unitig used the mapping.
UnitigMapBase::isEmpty
bool isEmpty
True if there is no mapping.
Definition: UnitigMap.hpp:65
UnitigMap::mappedSequenceToString
string mappedSequenceToString() const
Create a string containing the sequence corresponding to the mapping.
Kmer
Interface to store and manipulate k-mers.
Definition: Kmer.hpp:42
UnitigMapBase::size
size_t size
Length of the reference unitig.
Definition: UnitigMap.hpp:62
UnitigMapHash
Definition: UnitigMap.hpp:290
UnitigMapBase::strand
bool strand
True if the mapped k-mer or sequence matches the forward strand, false if it matches its reverse-comp...
Definition: UnitigMap.hpp:64
UnitigMapBase::len
size_t len
Length of the mapping on the reference unitig, in k-mers.
Definition: UnitigMap.hpp:61
UnitigMapBase::dist
size_t dist
Start position of the mapping (0-based distance) from the start of the reference unitig.
Definition: UnitigMap.hpp:60
UnitigMap::isSameReferenceUnitig
bool isSameReferenceUnitig(const UnitigMap &o) const
check if two UnitigMap have the same unitig as reference.
UnitigMap
Contain all the information for the mapping of a k-mer or a sequence to a unitig of a Compacted de Br...
Definition: UnitigMap.hpp:92
UnitigMap::getPredecessors
UnitigMap_BW getPredecessors() const
Create a UnitigMap_BW object that can create iterators (through UnitigMap_BW::begin() and UnitigMap_B...
UnitigMap::getMappedTail
Kmer getMappedTail() const
Get the tail k-mer of the mapped sequence.
UnitigMap::getUnitigTail
Kmer getUnitigTail() const
Get the tail k-mer of the reference unitig used for the mapping.
BackwardCDBG
Wrapper for class neighborIterator to iterate over the predecessors of a reference unitig used in a U...
Definition: NeighborIterator.hpp:116
ForwardCDBG
Wrapper for class neighborIterator to iterate over the predecessors of a reference unitig used in a U...
Definition: NeighborIterator.hpp:162
UnitigMapBase::operator!=
bool operator!=(const UnitigMapBase &o) const
Inequality operator: check if two UnitigMapBase are different.
UnitigMapBase::operator==
bool operator==(const UnitigMapBase &o) const
Equality operator: check if two UnitigMapBase are the same.
UnitigMap::operator!=
bool operator!=(const UnitigMap &o) const
Inequality operator: check if two UnitigMap are different.
UnitigMap::getUnitigHead
Kmer getUnitigHead() const
Get the head k-mer of the reference unitig used for the mapping.
UnitigMapBase::UnitigMapBase
UnitigMapBase(const size_t start, const size_t length, const size_t unitig_sz, const bool strand)
UnitigMapBase constructor (isEmpty = false).
neighborIterator
Iterator for the neighbors (predecessors or successors) of a reference unitig used in a UnitigMap obj...
Definition: NeighborIterator.hpp:34
unitigIterator
Iterator for the unitigs of a Compacted de Bruijn graph.
Definition: UnitigIterator.hpp:36
UnitigMap::getMappedKmer
Kmer getMappedKmer(const size_t pos) const
Get the k-mer starting at position pos in the mapped sequence.
UnitigMap::lcp
size_t lcp(const char *s, const size_t pos_s=0, const size_t pos_um_seq=0, const bool um_reversed=false) const
Compute the length of the longest common prefix between a given sequence and the reference unitig use...
UnitigMap::getGraph
CompactedDBG_ptr_t getGraph() const
Get a pointer to the CompactedDBG containing the reference unitig used in the mapping.
Definition: UnitigMap.hpp:243
Unitig
Represent a unitig which is a vertex of the Compacted de Bruijn graph.
Definition: Unitig.hpp:22
UnitigMap::UnitigMap
UnitigMap(size_t length=1, CompactedDBG_ptr_t cdbg_=nullptr)
UnitigMap constructor.
Kmer.hpp
Interface for the class Kmer:
UnitigMapBase::UnitigMapBase
UnitigMapBase(const size_t length=1)
UnitigMapBase constructor (isEmpty = true).
UnitigMapBase
Structure containing the basic information of a unitig mapping.
Definition: UnitigMap.hpp:33
UnitigMap::getData
Unitig_data_ptr_t getData() const
Get a pointer to the data associated with the reference unitig used in the mapping.
UnitigMap::getMappedHead
Kmer getMappedHead() const
Get the head k-mer of the mapped sequence.
CompactedDBG
Represent a Compacted de Bruijn graph.
Definition: CompactedDBG.hpp:313
UnitigMap::getKmerMapping
UnitigMap< U, G, is_const > getKmerMapping(const size_t pos) const
Create a new UnitigMap object which is the mapping of a k-mer on a reference unitig.
UnitigMap::UnitigMap
UnitigMap(const size_t start, const size_t length, const size_t unitig_sz, const bool strand)
UnitigMap constructor.
UnitigMap::operator==
bool operator==(const UnitigMap &o) const
Equality operator: check if two UnitigMap are the same.