Bifrost
CompactedDBG.hpp
Go to the documentation of this file.
1 #ifndef BIFROST_COMPACTED_DBG_HPP
2 #define BIFROST_COMPACTED_DBG_HPP
3 
4 #include <cmath>
5 #include <cstdlib>
6 #include <cstring>
7 #include <ctime>
8 #include <cstdio>
9 #include <climits>
10 #include <functional>
11 #include <getopt.h>
12 #include <iostream>
13 #include <sstream>
14 #include <stdint.h>
15 #include <string>
16 #include <unordered_map>
17 #include <unordered_set>
18 #include <vector>
19 
20 #include <thread>
21 #include <atomic>
22 #include <mutex>
23 
24 #include "BlockedBloomFilter.hpp"
25 #include "Common.hpp"
26 #include "File_Parser.hpp"
27 #include "FASTX_Parser.hpp"
28 #include "GFA_Parser.hpp"
29 #include "Kmer.hpp"
30 #include "KmerCovIndex.hpp"
31 #include "KmerHashTable.hpp"
32 #include "KmerIterator.hpp"
33 #include "KmerStream.hpp"
34 #include "Lock.hpp"
35 #include "minHashIterator.hpp"
36 #include "MinimizerIndex.hpp"
37 #include "RepHash.hpp"
38 #include "TinyVector.hpp"
39 #include "Unitig.hpp"
40 #include "UnitigIterator.hpp"
41 #include "UnitigMap.hpp"
42 
43 #include "roaring.hh"
44 
45 #define MASK_CONTIG_ID (0xffffffff00000000)
46 #define MASK_CONTIG_TYPE (0x80000000)
47 #define MASK_CONTIG_POS (0x7fffffff)
48 #define RESERVED_ID (0xffffffff)
49 
50 #define DEFAULT_K 31
51 
52 #define DEFAULT_G_DEC1 8
53 #define DEFAULT_G_DEC2 4
54 
60 using namespace std;
61 
129 
130  bool verbose;
131 
132  size_t nb_threads;
134 
137 
140 
141  vector<string> filename_seq_in;
142  vector<string> filename_ref_in;
143 
144  // The following members are NOT used by CompactedDBG<U, G>::build
145  // but you can set them to use them as parameters for other functions
146  // such as CompactedDBG<U, G>::simplify, CompactedDBG<U, G>::read or
147  // CompactedDBG<U, G>::write.
148 
149  int k, g;
150 
151  bool build;
152  bool update;
153  bool query;
154 
155  bool clipTips;
158 
159  bool outputGFA;
160  bool inexact_search;
161 
162  double ratio_kmers;
163 
165 
167 
168  vector<string> filename_query_in;
169 
170  CDBG_Build_opt() : nb_threads(1), k(DEFAULT_K), g(-1), nb_bits_unique_kmers_bf(14),
171  nb_bits_non_unique_kmers_bf(14), read_chunksize(64), ratio_kmers(0.8),
172  build(false), update(false), query(false), clipTips(false), deleteIsolated(false),
173  inexact_search(false), useMercyKmers(false), outputGFA(true), verbose(false) {}
174 };
175 
181 template<typename U = void, typename G = void> using const_UnitigMap = UnitigMap<U, G, true>;
182 
212 template<typename Unitig_data_t, typename Graph_data_t = void> //Curiously Recurring Template Pattern (CRTP)
213 class CDBG_Data_t {
214 
215  public:
216 
223 
238 
250 
265  void extract(const UnitigMap<Unitig_data_t, Graph_data_t>& um_src, bool last_extraction){}
266 
276 
277  return string();
278  }
279 };
280 
305 template<typename Unitig_data_t = void, typename Graph_data_t = void>
307 
308  static_assert(is_void<Unitig_data_t>::value || is_base_of<CDBG_Data_t<Unitig_data_t, Graph_data_t>, Unitig_data_t>::value,
309  "Type of data associated with vertices of class CompactedDBG must be void (no data) or a class extending class CDBG_Data_t");
310 
311  typedef Unitig_data_t U;
312  typedef Graph_data_t G;
313 
314  public:
315 
316  template<typename U, typename G, bool is_const> friend class UnitigMap;
317  template<typename U, typename G, bool is_const> friend class unitigIterator;
318  template<typename U, typename G, bool is_const> friend class neighborIterator;
319 
320  template<typename X, typename Y> friend class CompactedDBG;
321 
328  CompactedDBG(const int kmer_length = DEFAULT_K);
329 
334  CompactedDBG(const int kmer_length, const int minimizer_length);
335 
341  CompactedDBG(const CompactedDBG<U, G>& o); // Copy constructor
342 
348  CompactedDBG(CompactedDBG<U, G>&& o); // Move constructor
349 
352  virtual ~CompactedDBG();
353 
361 
369 
382 
387  bool operator==(const CompactedDBG<U, G>& o) const;
388 
393  inline bool operator!=(const CompactedDBG<U, G>& o) const;
394 
397  void clear();
398 
403  bool build(CDBG_Build_opt& opt);
404 
411  bool simplify(const bool delete_short_isolated_unitigs = true, const bool clip_short_tips = true, const bool verbose = false);
412 
420  bool write(const string& output_filename, const size_t nb_threads = 1, const bool GFA_output = true, const bool verbose = false) const;
421 
431  bool read(const string& input_filename, const size_t nb_threads = 1, const bool verbose = false);
432 
440  UnitigMap<U, G> find(const Kmer& km, const bool extremities_only = false);
441 
449  const_UnitigMap<U, G> find(const Kmer& km, const bool extremities_only = false) const;
450 
459  UnitigMap<U, G> findUnitig(const char* s, const size_t pos, const size_t len);
460 
469  const_UnitigMap<U, G> findUnitig(const char* s, const size_t pos, const size_t len) const;
470 
483  vector<pair<size_t, UnitigMap<U, G>>> searchSequence( const string& seq, const bool exact, const bool insertion, const bool deletion,
484  const bool substitution, const bool or_exclusive_match = false);
485 
498  vector<pair<size_t, const_UnitigMap<U, G>>> searchSequence( const string& seq, const bool exact, const bool insertion, const bool deletion,
499  const bool substitution, const bool or_exclusive_match = false) const;
500 
508  bool add(const string& seq, const bool verbose = false);
509 
515  bool remove(const const_UnitigMap<U, G>& um, const bool verbose = false);
516 
529  bool merge(const CompactedDBG& o, const size_t nb_threads = 1, const bool verbose = false);
530 
541  bool merge(const vector<CompactedDBG>& v, const size_t nb_threads = 1, const bool verbose = false);
542 
547 
552 
557 
562 
566  size_t length() const;
567 
571  size_t nbKmers() const;
572 
576  inline bool isInvalid() const { return invalid; }
577 
581  inline int getK() const { return k_; }
582 
586  inline int getG() const { return g_; }
587 
591  inline size_t size() const { return v_unitigs.size() + km_unitigs.size() + h_kmers_ccov.size(); }
592 
596  inline G* getData() { return data.getData(); }
597 
601  inline const G* getData() const { return data.getData(); }
602 
603  bool search(const vector<string>& query_filenames, const string& out_filename_prefix,
604  const double ratio_kmers, const bool inexact_search, const size_t nb_threads,
605  const size_t verbose = false) const;
606 
607  protected:
608 
609  bool annotateSplitUnitigs(const CompactedDBG<U, G>& o, const size_t nb_threads = 1, const bool verbose = false);
610 
611  pair<size_t, size_t> splitAllUnitigs();
612  pair<size_t, size_t> getSplitInfoAllUnitigs() const;
613 
614  inline size_t joinUnitigs(vector<Kmer>* v_joins = nullptr, const size_t nb_threads = 1) {
615 
616  return joinUnitigs_<is_void<U>::value>(v_joins, nb_threads);
617  }
618 
619  bool mergeData(const CompactedDBG<U, G>& o, const size_t nb_threads = 1, const bool verbose = false);
620  bool mergeData(CompactedDBG<U, G>&& o, const size_t nb_threads = 1, const bool verbose = false);
621 
622  private:
623 
624  CompactedDBG<U, G>& toDataGraph(CompactedDBG<void, void>&& o, const size_t nb_threads = 1);
625 
626  bool filter(const CDBG_Build_opt& opt, const size_t nb_unique_kmers, const size_t nb_non_unique_kmers);
627  bool construct(const CDBG_Build_opt& opt, const size_t nb_unique_minimizers, const size_t nb_non_unique_minimizers);
628 
629  bool addUnitigSequenceBBF(const Kmer km, const string& seq, const size_t pos_match_km, const size_t len_match_km, LockGraph& lck_g);
630 
631  size_t findUnitigSequenceBBF(Kmer km, string& s, bool& isIsolated, vector<Kmer>& l_ignored_km_tip);
632  bool bwStepBBF(const Kmer km, Kmer& front, char& c, bool& has_no_neighbor, vector<Kmer>& l_ignored_km_tip, const bool check_fp_cand = true) const;
633  bool fwStepBBF(const Kmer km, Kmer& end, char& c, bool& has_no_neighbor, vector<Kmer>& l_ignored_km_tip, const bool check_fp_cand = true) const;
634 
635  inline size_t find(const preAllocMinHashIterator<RepHash>& it_min_h) const {
636 
637  const int pos = it_min_h.getPosition();
638  return (hmap_min_unitigs.find(Minimizer(it_min_h.s + pos).rep()) != hmap_min_unitigs.end() ? 0 : pos - it_min_h.p);
639  }
640 
641  UnitigMap<U, G> find(const char* s, const size_t pos_km, const minHashIterator<RepHash>& it_min, const bool extremities_only = false);
642  const_UnitigMap<U, G> find(const char* s, const size_t pos_km, const minHashIterator<RepHash>& it_min, const bool extremities_only = false) const;
643 
644  UnitigMap<U, G> find(const Kmer& km, const preAllocMinHashIterator<RepHash>& it_min_h);
645 
646  vector<const_UnitigMap<U, G>> findPredecessors(const Kmer& km, const bool extremities_only = false) const;
647  vector<const_UnitigMap<U, G>> findSuccessors(const Kmer& km, const size_t limit = 4, const bool extremities_only = false) const;
648 
649  vector<UnitigMap<U, G>> findPredecessors(const Kmer& km, const bool extremities_only = false);
650  vector<UnitigMap<U, G>> findSuccessors(const Kmer& km, const size_t limit = 4, const bool extremities_only = false);
651 
652  UnitigMap<U, G> findUnitig(const Kmer& km, const char* s, const size_t pos);
653  UnitigMap<U, G> findUnitig(const Kmer& km, const char* s, const size_t pos, const preAllocMinHashIterator<RepHash>& it_min_h);
654 
655  UnitigMap<U, G> findUnitig(const char* s, const size_t pos, const size_t len, const minHashIterator<RepHash>& it_min);
656  const_UnitigMap<U, G> findUnitig(const char* s, const size_t pos, const size_t len, const minHashIterator<RepHash>& it_min) const;
657 
658  bool addUnitig(const string& str_unitig, const size_t id_unitig);
659  bool addUnitig(const string& str_unitig, const size_t id_unitig, const size_t id_unitig_r, const size_t is_short_r);
660  bool addUnitig(const string& str_unitig, const size_t id_unitig, SpinLock& lck_unitig, SpinLock& lck_kmer/*, const bool enable_abundant = true*/);
661  void swapUnitigs(const bool isShort, const size_t id_a, const size_t id_b);
662 
663  bool mergeUnitig(const string& seq, const bool verbose = false);
664  bool annotateSplitUnitig(const string& seq, const bool verbose = false);
665  bool annotateSplitUnitig(const string& seq, LockGraph& lck_g, const bool verbose = false);
666 
667  template<bool is_void>
668  inline typename std::enable_if<!is_void, void>::type mergeData_(const UnitigMap<U, G>& a, const const_UnitigMap<U, G>& b){
669 
670  a.getData()->merge(a, b);
671  }
672 
673  template<bool is_void>
674  inline typename std::enable_if<is_void, void>::type mergeData_(const UnitigMap<U, G>& a, const const_UnitigMap<U, G>& b) {}
675 
676  template<bool is_void>
677  typename std::enable_if<!is_void, void>::type deleteUnitig_(const bool isShort, const bool isAbundant,
678  const size_t id_unitig, const bool delete_data = true);
679 
680  template<bool is_void>
681  typename std::enable_if<is_void, void>::type deleteUnitig_( const bool isShort, const bool isAbundant,
682  const size_t id_unitig, const bool delete_data = true);
683 
684  void deleteUnitig_(const bool isShort, const bool isAbundant, const size_t id_unitig, const string& str);
685 
686  template<bool is_void>
687  typename std::enable_if<!is_void, bool>::type extractUnitig_(size_t& pos_v_unitigs, size_t& nxt_pos_insert_v_unitigs,
688  size_t& v_unitigs_sz, size_t& v_kmers_sz, const vector<pair<int,int>>& sp);
689  template<bool is_void>
690  typename std::enable_if<is_void, bool>::type extractUnitig_(size_t& pos_v_unitigs, size_t& nxt_pos_insert_v_unitigs,
691  size_t& v_unitigs_sz, size_t& v_kmers_sz, const vector<pair<int,int>>& sp);
692 
693  pair<size_t, size_t> extractAllUnitigs();
694 
695  template<bool is_void>
696  typename std::enable_if<!is_void, size_t>::type joinUnitigs_(vector<Kmer>* v_joins = nullptr, const size_t nb_threads = 1);
697 
698  template<bool is_void>
699  typename std::enable_if<is_void, size_t>::type joinUnitigs_(vector<Kmer>* v_joins = nullptr, const size_t nb_threads = 1);
700 
701  void moveToAbundant();
702  void setFullCoverage(const size_t cov) const;
703 
704  void createJoinHT(vector<Kmer>* v_joins, KmerHashTable<Kmer>& joins, const size_t nb_threads) const;
705  void createJoinHT(vector<Kmer>* v_joins, KmerHashTable<char>& joins, const size_t nb_threads) const;
706 
707  bool checkJoin(const Kmer& a, const const_UnitigMap<U, G>& cm_a, Kmer& b) const;
708  void check_fp_tips(KmerHashTable<bool>& ignored_km_tips);
709  size_t removeUnitigs(bool rmIsolated, bool clipTips, vector<Kmer>& v);
710 
711  size_t joinTips(string filename_MBBF_uniq_kmers, const size_t nb_threads = 1, const bool verbose = false);
712  vector<Kmer> extractMercyKmers(BlockedBloomFilter& bf_uniq_km, const size_t nb_threads = 1, const bool verbose = false);
713 
714  void writeGFA(const string& graphfilename, const size_t nb_threads = 1) const;
715  void writeFASTA(const string& graphfilename) const;
716 
717  void readGFA(const string& graphfilename, const size_t nb_threads = 1);
718  void readFASTA(const string& graphfilename, const size_t nb_threads = 1);
719 
720  template<bool is_void>
721  typename std::enable_if<!is_void, void>::type writeGFA_sequence_(GFA_Parser& graph, KmerHashTable<size_t>& idmap) const;
722  template<bool is_void>
723  typename std::enable_if<is_void, void>::type writeGFA_sequence_(GFA_Parser& graph, KmerHashTable<size_t>& idmap) const;
724 
725  void mapRead(const const_UnitigMap<U, G>& um);
726  void mapRead(const const_UnitigMap<U, G>& um, LockGraph& lck_g);
727 
728  void unmapRead(const const_UnitigMap<U, G>& um);
729  void unmapRead(const const_UnitigMap<U, G>& um, LockGraph& lck_g);
730 
731  void setKmerGmerLength(const int kmer_length, const int minimizer_length = -1);
732  void print() const;
733 
734  vector<pair<size_t, UnitigMap<U, G>>> searchSequence( const string& seq, const bool exact, const bool insertion, const bool deletion,
735  const bool substitution, const double ratio_kmers, const bool or_exclusive_match);
736 
737  vector<pair<size_t, const_UnitigMap<U, G>>> searchSequence( const string& seq, const bool exact, const bool insertion, const bool deletion,
738  const bool substitution, const double ratio_kmers, const bool or_exclusive_match) const;
739 
740  int k_;
741  int g_;
742 
743  bool invalid;
744 
745  static const int tiny_vector_sz = 2;
746  static const int min_abundance_lim = 15;
747  static const int max_abundance_lim = 15;
748 
749  typedef KmerHashTable<CompressedCoverage_t<U>> h_kmers_ccov_t;
750 
751  vector<Unitig<U>*> v_unitigs;
752 
753  KmerCovIndex<U> km_unitigs;
754  MinimizerIndex hmap_min_unitigs;
755 
756  h_kmers_ccov_t h_kmers_ccov;
757 
758  BlockedBloomFilter bf;
759 
760  wrapperData<G> data;
761 };
762 
763 #include "CompactedDBG.tcc"
764 #include "Search.tcc"
765 
766 #endif
CompactedDBG::size
size_t size() const
Return the number of unitigs in the graph.
Definition: CompactedDBG.hpp:591
CompactedDBG::find
const_UnitigMap< U, G > find(const Kmer &km, const bool extremities_only=false) const
Find the unitig containing the queried k-mer in the Compacted de Bruijn graph.
CompactedDBG::operator=
CompactedDBG< U, G > & operator=(CompactedDBG< U, G > &&o)
Move assignment operator (move a compacted de Bruijn graph).
CompactedDBG::simplify
bool simplify(const bool delete_short_isolated_unitigs=true, const bool clip_short_tips=true, const bool verbose=false)
Simplify the Compacted de Bruijn graph: clip short (< 2k length) tips and/or delete short (< 2k lengt...
CDBG_Build_opt::useMercyKmers
bool useMercyKmers
Keep in the graph low coverage k-mers (cov=1) connecting tips of the graph.
Definition: CompactedDBG.hpp:157
CompactedDBG::write
bool write(const string &output_filename, const size_t nb_threads=1, const bool GFA_output=true, const bool verbose=false) const
Write the Compacted de Bruijn graph to disk (GFA1 format).
CDBG_Data_t::extract
void extract(const UnitigMap< Unitig_data_t, Graph_data_t > &um_src, bool last_extraction)
Extract data corresponding to a sub-unitig of a unitig A.
Definition: CompactedDBG.hpp:265
CompactedDBG::operator+=
CompactedDBG< U, G > & operator+=(const CompactedDBG< U, G > &o)
Addition assignment operator (merge a compacted de Bruijn graph).
CompactedDBG::operator=
CompactedDBG< U, G > & operator=(const CompactedDBG< U, G > &o)
Copy assignment operator (copy a compacted de Bruijn graph).
CDBG_Build_opt::outFilenameBBF
string outFilenameBBF
String containing the name of a Bloom filter file that will be generated by CompactedDBG<U,...
Definition: CompactedDBG.hpp:139
CDBG_Data_t::serialize
string serialize(const const_UnitigMap< Unitig_data_t, Graph_data_t > &um_src) const
Serialize the data to a GFA-formatted string.
Definition: CompactedDBG.hpp:275
CompactedDBG::add
bool add(const string &seq, const bool verbose=false)
Add a sequence to the Compacted de Bruijn graph.
CompactedDBG::isInvalid
bool isInvalid() const
Return a boolean indicating if the graph is invalid (wrong input parameters/files,...
Definition: CompactedDBG.hpp:576
CompactedDBG::findUnitig
UnitigMap< U, G > findUnitig(const char *s, const size_t pos, const size_t len)
Find the unitig containing the k-mer starting at a given position in a query sequence and extends the...
CompactedDBG::getData
const G * getData() const
Return a constant pointer to the graph data.
Definition: CompactedDBG.hpp:601
CompactedDBG::operator==
bool operator==(const CompactedDBG< U, G > &o) const
Equality operator.
Kmer
Interface to store and manipulate k-mers.
Definition: Kmer.hpp:40
CompactedDBG::const_iterator
unitigIterator< U, G, true > const_iterator
A constant iterator for the unitigs of the graph.
Definition: CompactedDBG.hpp:323
CompactedDBG::searchSequence
vector< pair< size_t, UnitigMap< U, G > > > searchSequence(const string &seq, const bool exact, const bool insertion, const bool deletion, const bool substitution, const bool or_exclusive_match=false)
Performs exact and/or inexact search of the k-mers of a sequence query in the Compacted de Bruijn gra...
CompactedDBG::operator!=
bool operator!=(const CompactedDBG< U, G > &o) const
Inequality operator.
CompactedDBG::remove
bool remove(const const_UnitigMap< U, G > &um, const bool verbose=false)
Remove a unitig from the Compacted de Bruijn graph.
CDBG_Build_opt::prefixFilenameOut
string prefixFilenameOut
Prefix for the name of the file to which the graph must be written.
Definition: CompactedDBG.hpp:164
CompactedDBG::getK
int getK() const
Return the length of k-mers of the graph.
Definition: CompactedDBG.hpp:581
CompactedDBG::read
bool read(const string &input_filename, const size_t nb_threads=1, const bool verbose=false)
Read a Compacted de Bruijn graph from disk (GFA1 or FASTA format).
CompactedDBG::getG
int getG() const
Return the length of minimizers of the graph.
Definition: CompactedDBG.hpp:586
CDBG_Build_opt::nb_bits_unique_kmers_bf
size_t nb_bits_unique_kmers_bf
Number of Bloom filter bits per k-mer occurring at least once in the FASTA/FASTQ/GFA files of CDBG_Bu...
Definition: CompactedDBG.hpp:135
CompactedDBG::CompactedDBG
CompactedDBG(const CompactedDBG< U, G > &o)
Copy constructor (copy a compacted de Bruijn graph).
CompactedDBG::CompactedDBG
CompactedDBG(CompactedDBG< U, G > &&o)
Move constructor (move a compacted de Bruijn graph).
UnitigMap.hpp
UnitigMap type interface.
CompactedDBG::build
bool build(CDBG_Build_opt &opt)
Build the Compacted de Bruijn graph.
CompactedDBG::CompactedDBG
CompactedDBG(const int kmer_length, const int minimizer_length)
Constructor (set up an empty compacted dBG).
CDBG_Build_opt::build
bool build
Boolean indicating if the graph must be built.
Definition: CompactedDBG.hpp:151
CDBG_Build_opt::filename_seq_in
vector< string > filename_seq_in
Vector of strings, each string is the name of a FASTA/FASTQ/GFA file to use for the graph constructio...
Definition: CompactedDBG.hpp:141
CDBG_Data_t
If data are to be associated with the unitigs of the compacted de Bruijn graph, those data must be wr...
Definition: CompactedDBG.hpp:213
UnitigMap
Contain all the information for the mapping of a k-mer or a sequence to a unitig of a Compacted de Br...
Definition: UnitigMap.hpp:92
CDBG_Build_opt::verbose
bool verbose
Print information messages during execution if true.
Definition: CompactedDBG.hpp:130
CDBG_Build_opt::filename_graph_in
string filename_graph_in
String containing the name of a GFA file to read using CompactedDBG<U, G>::read.
Definition: CompactedDBG.hpp:166
CompactedDBG::begin
iterator begin()
Create an iterator to the first unitig of the Compacted de Bruijn graph (unitigs are NOT sorted lexic...
CompactedDBG::end
const_iterator end() const
Create a constant iterator to the "past-the-last" unitig of the Compacted de Bruijn graph (unitigs ar...
CompactedDBG::find
UnitigMap< U, G > find(const Kmer &km, const bool extremities_only=false)
Find the unitig containing the queried k-mer in the Compacted de Bruijn graph.
CDBG_Build_opt::clipTips
bool clipTips
Clip short tips (length < 2k) of the graph (not used by CompactedDBG<U, G>::build).
Definition: CompactedDBG.hpp:155
Unitig.hpp
The Unitig interface.
CompactedDBG::findUnitig
const_UnitigMap< U, G > findUnitig(const char *s, const size_t pos, const size_t len) const
Find the unitig containing the k-mer starting at a given position in a query sequence and extends the...
CDBG_Build_opt::nb_bits_non_unique_kmers_bf
size_t nb_bits_non_unique_kmers_bf
Number of Bloom filter bits per k-mer occurring at least twice in the FASTA/FASTQ/GFA files of CDBG_B...
Definition: CompactedDBG.hpp:136
CompactedDBG::begin
const_iterator begin() const
Create an constant iterator to the first unitig of the Compacted de Bruijn graph (unitigs are NOT sor...
CompactedDBG::searchSequence
vector< pair< size_t, const_UnitigMap< U, G > > > searchSequence(const string &seq, const bool exact, const bool insertion, const bool deletion, const bool substitution, const bool or_exclusive_match=false) const
Performs exact and/or inexact search of the k-mers of a sequence query in the Compacted de Bruijn gra...
CompactedDBG::CompactedDBG
CompactedDBG(const int kmer_length=31)
Constructor (set up an empty compacted dBG).
CDBG_Build_opt::update
bool update
Boolean indicating if the graph must be updated.
Definition: CompactedDBG.hpp:152
CDBG_Data_t::clear
void clear(const UnitigMap< Unitig_data_t, Graph_data_t > &um_dest)
Clear the data associated with a unitig.
Definition: CompactedDBG.hpp:222
neighborIterator
Iterator for the neighbors (predecessors or successors) of a reference unitig used in a UnitigMap obj...
Definition: NeighborIterator.hpp:34
CDBG_Build_opt::filename_ref_in
vector< string > filename_ref_in
Vector of strings, each string is the name of a FASTA/FASTQ/GFA file to use for the graph constructio...
Definition: CompactedDBG.hpp:142
CDBG_Build_opt::nb_threads
size_t nb_threads
Number of threads to use for building the graph.
Definition: CompactedDBG.hpp:132
unitigIterator
Iterator for the unitigs of a Compacted de Bruijn graph.
Definition: UnitigIterator.hpp:36
CDBG_Data_t::concat
void concat(const UnitigMap< Unitig_data_t, Graph_data_t > &um_dest, const UnitigMap< Unitig_data_t, Graph_data_t > &um_src)
Join data of two unitigs which are going to be concatenated.
Definition: CompactedDBG.hpp:237
CompactedDBG::merge
bool merge(const vector< CompactedDBG > &v, const size_t nb_threads=1, const bool verbose=false)
Merge multiple compacted de Bruijn graphs.
CDBG_Build_opt
Most members of this structure are parameters for CompactedDBG<U, G>::build(), except for:
Definition: CompactedDBG.hpp:128
CompactedDBG::getData
G * getData()
Return a pointer to the graph data.
Definition: CompactedDBG.hpp:596
CompactedDBG::clear
void clear()
Clear the graph: empty the graph and reset its parameters.
CompactedDBG::end
iterator end()
Create an iterator to the "past-the-last" unitig of the Compacted de Bruijn graph (unitigs are NOT so...
Kmer.hpp
Interface for the class Kmer:
CDBG_Build_opt::read_chunksize
size_t read_chunksize
Number of reads a thread can read and process at a time.
Definition: CompactedDBG.hpp:133
UnitigMap::getData
Unitig_data_ptr_t getData() const
Get a pointer to the data associated with the reference unitig used in the mapping.
UnitigIterator.hpp
The unitigIterator type interface.
CompactedDBG::~CompactedDBG
virtual ~CompactedDBG()
Destructor.
CDBG_Build_opt::deleteIsolated
bool deleteIsolated
Remove short isolated unitigs (length < 2k) of the graph (not used by CompactedDBG<U,...
Definition: CompactedDBG.hpp:156
CompactedDBG::length
size_t length() const
Return the sum of the unitigs length.
DataStorage
Definition: ColorSet.hpp:16
CDBG_Data_t::merge
void merge(const UnitigMap< Unitig_data_t, Graph_data_t > &um_dest, const const_UnitigMap< Unitig_data_t, Graph_data_t > &um_src)
Merge the data of a sub-unitig B to the data of a sub-unitig A.
Definition: CompactedDBG.hpp:249
CompactedDBG::iterator
unitigIterator< U, G, false > iterator
An iterator for the unitigs of the graph.
Definition: CompactedDBG.hpp:322
CompactedDBG
Represent a Compacted de Bruijn graph.
Definition: CompactedDBG.hpp:306
CDBG_Build_opt::outputGFA
bool outputGFA
Boolean indicating if the graph is written to a GFA file (true) or if the unitigs are written to a FA...
Definition: CompactedDBG.hpp:159
CompactedDBG::merge
bool merge(const CompactedDBG &o, const size_t nb_threads=1, const bool verbose=false)
Merge a compacted de Bruijn graph.
CompactedDBG::nbKmers
size_t nbKmers() const
Return the number of k-mers in the graph.
CDBG_Build_opt::inFilenameBBF
string inFilenameBBF
String containing the name of a Bloom filter file that is generated by CompactedDBG<U,...
Definition: CompactedDBG.hpp:138
CDBG_Build_opt::k
int k
Length of k-mers (not used by CompactedDBG<U, G>::build).
Definition: CompactedDBG.hpp:149