Bifrost
CompactedDBG.hpp
Go to the documentation of this file.
1 #ifndef BIFROST_COMPACTED_DBG_HPP
2 #define BIFROST_COMPACTED_DBG_HPP
3 
4 #include <cmath>
5 #include <cstdlib>
6 #include <cstring>
7 #include <ctime>
8 #include <cstdio>
9 #include <climits>
10 #include <functional>
11 #include <getopt.h>
12 #include <iostream>
13 #include <map>
14 #include <set>
15 #include <sstream>
16 #include <stdint.h>
17 #include <string>
18 #include <unordered_map>
19 #include <unordered_set>
20 #include <vector>
21 
22 #include <thread>
23 #include <atomic>
24 #include <mutex>
25 
26 #include "BlockedBloomFilter.hpp"
27 #include "Common.hpp"
28 #include "File_Parser.hpp"
29 #include "FASTX_Parser.hpp"
30 #include "GFA_Parser.hpp"
31 #include "Kmer.hpp"
32 #include "KmerCovIndex.hpp"
33 #include "KmerHashTable.hpp"
34 #include "KmerIterator.hpp"
35 #include "KmerStream.hpp"
36 #include "Lock.hpp"
37 #include "minHashIterator.hpp"
38 #include "MinimizerIndex.hpp"
39 #include "RepHash.hpp"
40 #include "TinyVector.hpp"
41 #include "Unitig.hpp"
42 #include "UnitigIterator.hpp"
43 #include "UnitigMap.hpp"
44 
45 #include "roaring.hh"
46 
47 #define MASK_CONTIG_ID (0xffffffff00000000)
48 #define MASK_CONTIG_TYPE (0x80000000)
49 #define MASK_CONTIG_POS (0x7fffffff)
50 #define RESERVED_ID (0xffffffff)
51 
52 #define DEFAULT_K 31
53 
54 #define DEFAULT_G_DEC1 8
55 #define DEFAULT_G_DEC2 4
56 
62 using namespace std;
63 
129 
130  bool verbose;
131 
132  size_t nb_threads;
133 
136 
139 
140  vector<string> filename_seq_in;
141  vector<string> filename_ref_in;
142 
143  // The following members are NOT used by CompactedDBG<U, G>::build
144  // but you can set them to use them as parameters for other functions
145  // such as CompactedDBG<U, G>::simplify, CompactedDBG<U, G>::read or
146  // CompactedDBG<U, G>::write.
147 
148  int k, g;
149 
150  bool build;
151  bool update;
152  bool query;
153 
154  bool clipTips;
157 
158  bool outputGFA;
159  bool inexact_search;
160 
161  double ratio_kmers;
162 
164 
166 
167  vector<string> filename_query_in;
168 
169  CDBG_Build_opt() : nb_threads(1), k(DEFAULT_K), g(-1), nb_bits_unique_kmers_bf(14),
170  nb_bits_non_unique_kmers_bf(14), ratio_kmers(0.8),
171  build(false), update(false), query(false), clipTips(false), deleteIsolated(false),
172  inexact_search(false), useMercyKmers(false), outputGFA(true), verbose(false) {}
173 };
174 
180 template<typename U = void, typename G = void> using const_UnitigMap = UnitigMap<U, G, true>;
181 
211 template<typename Unitig_data_t, typename Graph_data_t = void> //Curiously Recurring Template Pattern (CRTP)
212 class CDBG_Data_t {
213 
214  public:
215 
222 
237 
249 
264  void extract(const UnitigMap<Unitig_data_t, Graph_data_t>& um_src, bool last_extraction){}
265 
275 
276  return string();
277  }
278 };
279 
304 template<typename Unitig_data_t = void, typename Graph_data_t = void>
306 
307  static_assert(is_void<Unitig_data_t>::value || is_base_of<CDBG_Data_t<Unitig_data_t, Graph_data_t>, Unitig_data_t>::value,
308  "Type of data associated with vertices of class CompactedDBG must be void (no data) or a class extending class CDBG_Data_t");
309 
310  typedef Unitig_data_t U;
311  typedef Graph_data_t G;
312 
313  public:
314 
315  template<typename U, typename G, bool is_const> friend class UnitigMap;
316  template<typename U, typename G, bool is_const> friend class unitigIterator;
317  template<typename U, typename G, bool is_const> friend class neighborIterator;
318 
319  template<typename X, typename Y> friend class CompactedDBG;
320 
328  CompactedDBG(const int kmer_length = DEFAULT_K, const int minimizer_length = -1);
329 
335  CompactedDBG(const CompactedDBG<U, G>& o); // Copy constructor
336 
342  CompactedDBG(CompactedDBG<U, G>&& o); // Move constructor
343 
346  virtual ~CompactedDBG();
347 
355 
363 
376 
381  bool operator==(const CompactedDBG<U, G>& o) const;
382 
387  inline bool operator!=(const CompactedDBG<U, G>& o) const;
388 
391  void clear();
392 
397  bool build(CDBG_Build_opt& opt);
398 
405  bool simplify(const bool delete_short_isolated_unitigs = true, const bool clip_short_tips = true, const bool verbose = false);
406 
414  bool write(const string& output_filename, const size_t nb_threads = 1, const bool GFA_output = true, const bool verbose = false) const;
415 
425  bool read(const string& input_filename, const size_t nb_threads = 1, const bool verbose = false);
426 
434  UnitigMap<U, G> find(const Kmer& km, const bool extremities_only = false);
435 
443  const_UnitigMap<U, G> find(const Kmer& km, const bool extremities_only = false) const;
444 
453  UnitigMap<U, G> findUnitig(const char* s, const size_t pos, const size_t len);
454 
463  const_UnitigMap<U, G> findUnitig(const char* s, const size_t pos, const size_t len) const;
464 
477  vector<pair<size_t, UnitigMap<U, G>>> searchSequence( const string& s, const bool exact, const bool insertion, const bool deletion,
478  const bool substitution, const bool or_exclusive_match = false);
479 
492  vector<pair<size_t, const_UnitigMap<U, G>>> searchSequence( const string& s, const bool exact, const bool insertion, const bool deletion,
493  const bool substitution, const bool or_exclusive_match = false) const;
494 
502  bool add(const string& seq, const bool verbose = false);
503 
509  bool remove(const const_UnitigMap<U, G>& um, const bool verbose = false);
510 
523  bool merge(const CompactedDBG& o, const size_t nb_threads = 1, const bool verbose = false);
524 
535  bool merge(const vector<CompactedDBG>& v, const size_t nb_threads = 1, const bool verbose = false);
536 
541 
546 
551 
556 
560  size_t length() const;
561 
565  size_t nbKmers() const;
566 
570  inline bool isInvalid() const { return invalid; }
571 
575  inline int getK() const { return k_; }
576 
580  inline int getG() const { return g_; }
581 
585  inline size_t size() const { return v_unitigs.size() + km_unitigs.size() + h_kmers_ccov.size(); }
586 
590  inline G* getData() { return data.getData(); }
591 
595  inline const G* getData() const { return data.getData(); }
596 
597  bool search(const vector<string>& query_filenames, const string& out_filename_prefix,
598  const double ratio_kmers, const bool inexact_search, const size_t nb_threads,
599  const size_t verbose = false) const;
600 
601  protected:
602 
603  bool annotateSplitUnitigs(const CompactedDBG<U, G>& o, const size_t nb_threads = 1, const bool verbose = false);
604 
605  pair<size_t, size_t> splitAllUnitigs();
606  pair<size_t, size_t> getSplitInfoAllUnitigs() const;
607 
608  inline size_t joinUnitigs(vector<Kmer>* v_joins = nullptr, const size_t nb_threads = 1) {
609 
610  return joinUnitigs_<is_void<U>::value>(v_joins, nb_threads);
611  }
612 
613  bool mergeData(const CompactedDBG<U, G>& o, const size_t nb_threads = 1, const bool verbose = false);
614  bool mergeData(CompactedDBG<U, G>&& o, const size_t nb_threads = 1, const bool verbose = false);
615 
616  private:
617 
618  CompactedDBG<U, G>& toDataGraph(CompactedDBG<void, void>&& o, const size_t nb_threads = 1);
619 
620  bool filter(const CDBG_Build_opt& opt, const size_t nb_unique_kmers, const size_t nb_non_unique_kmers);
621  bool construct(const CDBG_Build_opt& opt, const size_t nb_unique_minimizers, const size_t nb_non_unique_minimizers);
622 
623  bool addUnitigSequenceBBF(const Kmer km, const string& seq, const size_t pos_match_km, const size_t len_match_km, LockGraph& lck_g);
624 
625  size_t findUnitigSequenceBBF(Kmer km, string& s, bool& isIsolated, vector<Kmer>& l_ignored_km_tip);
626  bool bwStepBBF(const Kmer km, Kmer& front, char& c, bool& has_no_neighbor, vector<Kmer>& l_ignored_km_tip, const bool check_fp_cand = true) const;
627  bool fwStepBBF(const Kmer km, Kmer& end, char& c, bool& has_no_neighbor, vector<Kmer>& l_ignored_km_tip, const bool check_fp_cand = true) const;
628 
629  inline size_t find(const preAllocMinHashIterator<RepHash>& it_min_h) const {
630 
631  const int pos = it_min_h.getPosition();
632  return (hmap_min_unitigs.find(Minimizer(it_min_h.s + pos).rep()) != hmap_min_unitigs.end() ? 0 : pos - it_min_h.p);
633  }
634 
635  UnitigMap<U, G> find(const char* s, const size_t pos_km, const minHashIterator<RepHash>& it_min, const bool extremities_only = false);
636  const_UnitigMap<U, G> find(const char* s, const size_t pos_km, const minHashIterator<RepHash>& it_min, const bool extremities_only = false) const;
637 
638  UnitigMap<U, G> find(const Kmer& km, const preAllocMinHashIterator<RepHash>& it_min_h);
639 
640  //vector<const_UnitigMap<U, G>> find(const Minimizer& minz) const;
641 
642  vector<const_UnitigMap<U, G>> findPredecessors(const Kmer& km, const bool extremities_only = false) const;
643  vector<const_UnitigMap<U, G>> findSuccessors(const Kmer& km, const size_t limit = 4, const bool extremities_only = false) const;
644 
645  vector<UnitigMap<U, G>> findPredecessors(const Kmer& km, const bool extremities_only = false);
646  vector<UnitigMap<U, G>> findSuccessors(const Kmer& km, const size_t limit = 4, const bool extremities_only = false);
647 
648  UnitigMap<U, G> findUnitig(const Kmer& km, const char* s, const size_t pos);
649  UnitigMap<U, G> findUnitig(const Kmer& km, const char* s, const size_t pos, const preAllocMinHashIterator<RepHash>& it_min_h);
650 
651  UnitigMap<U, G> findUnitig(const char* s, const size_t pos, const size_t len, const minHashIterator<RepHash>& it_min);
652  const_UnitigMap<U, G> findUnitig(const char* s, const size_t pos, const size_t len, const minHashIterator<RepHash>& it_min) const;
653 
654  bool addUnitig(const string& str_unitig, const size_t id_unitig);
655  bool addUnitig(const string& str_unitig, const size_t id_unitig, const size_t id_unitig_r, const size_t is_short_r);
656  bool addUnitig(const string& str_unitig, const size_t id_unitig, SpinLock& lck_unitig, SpinLock& lck_kmer/*, const bool enable_abundant = true*/);
657  void swapUnitigs(const bool isShort, const size_t id_a, const size_t id_b);
658 
659  bool mergeUnitig(const string& seq, const bool verbose = false);
660  bool annotateSplitUnitig(const string& seq, const bool verbose = false);
661  bool annotateSplitUnitig(const string& seq, LockGraph& lck_g, const bool verbose = false);
662 
663  template<bool is_void>
664  inline typename std::enable_if<!is_void, void>::type mergeData_(const UnitigMap<U, G>& a, const const_UnitigMap<U, G>& b){
665 
666  a.getData()->merge(a, b);
667  }
668 
669  template<bool is_void>
670  inline typename std::enable_if<is_void, void>::type mergeData_(const UnitigMap<U, G>& a, const const_UnitigMap<U, G>& b) {}
671 
672  template<bool is_void>
673  typename std::enable_if<!is_void, void>::type deleteUnitig_(const bool isShort, const bool isAbundant,
674  const size_t id_unitig, const bool delete_data = true);
675 
676  template<bool is_void>
677  typename std::enable_if<is_void, void>::type deleteUnitig_( const bool isShort, const bool isAbundant,
678  const size_t id_unitig, const bool delete_data = true);
679 
680  void deleteUnitig_(const bool isShort, const bool isAbundant, const size_t id_unitig, const string& str);
681 
682  template<bool is_void>
683  typename std::enable_if<!is_void, bool>::type extractUnitig_(size_t& pos_v_unitigs, size_t& nxt_pos_insert_v_unitigs,
684  size_t& v_unitigs_sz, size_t& v_kmers_sz, const vector<pair<int,int>>& sp);
685  template<bool is_void>
686  typename std::enable_if<is_void, bool>::type extractUnitig_(size_t& pos_v_unitigs, size_t& nxt_pos_insert_v_unitigs,
687  size_t& v_unitigs_sz, size_t& v_kmers_sz, const vector<pair<int,int>>& sp);
688 
689  pair<size_t, size_t> extractAllUnitigs();
690 
691  template<bool is_void>
692  typename std::enable_if<!is_void, size_t>::type joinUnitigs_(vector<Kmer>* v_joins = nullptr, const size_t nb_threads = 1);
693 
694  template<bool is_void>
695  typename std::enable_if<is_void, size_t>::type joinUnitigs_(vector<Kmer>* v_joins = nullptr, const size_t nb_threads = 1);
696 
697  void moveToAbundant();
698  void setFullCoverage(const size_t cov) const;
699 
700  void createJoinHT(vector<Kmer>* v_joins, KmerHashTable<Kmer>& joins, const size_t nb_threads) const;
701  void createJoinHT(vector<Kmer>* v_joins, KmerHashTable<char>& joins, const size_t nb_threads) const;
702 
703  bool checkJoin(const Kmer& a, const const_UnitigMap<U, G>& cm_a, Kmer& b) const;
704  void check_fp_tips(KmerHashTable<bool>& ignored_km_tips);
705  size_t removeUnitigs(bool rmIsolated, bool clipTips, vector<Kmer>& v);
706 
707  size_t joinTips(string filename_MBBF_uniq_kmers, const size_t nb_threads = 1, const bool verbose = false);
708  vector<Kmer> extractMercyKmers(BlockedBloomFilter& bf_uniq_km, const size_t nb_threads = 1, const bool verbose = false);
709 
710  void writeGFA(const string& graphfilename, const size_t nb_threads = 1) const;
711  void writeFASTA(const string& graphfilename) const;
712 
713  void readGFA(const string& graphfilename, const size_t nb_threads = 1);
714  void readFASTA(const string& graphfilename, const size_t nb_threads = 1);
715 
716  template<bool is_void>
717  typename std::enable_if<!is_void, void>::type writeGFA_sequence_(GFA_Parser& graph, KmerHashTable<size_t>& idmap) const;
718  template<bool is_void>
719  typename std::enable_if<is_void, void>::type writeGFA_sequence_(GFA_Parser& graph, KmerHashTable<size_t>& idmap) const;
720 
721  void mapRead(const const_UnitigMap<U, G>& um);
722  void mapRead(const const_UnitigMap<U, G>& um, LockGraph& lck_g);
723 
724  void unmapRead(const const_UnitigMap<U, G>& um);
725  void unmapRead(const const_UnitigMap<U, G>& um, LockGraph& lck_g);
726 
727  void setKmerGmerLength(const int kmer_length, const int minimizer_length = -1);
728  void print() const;
729 
730  vector<pair<size_t, UnitigMap<U, G>>> searchSequence( const string& seq, const bool exact, const bool insertion, const bool deletion,
731  const bool substitution, const double ratio_kmers, const bool or_exclusive_match);
732 
733  vector<pair<size_t, const_UnitigMap<U, G>>> searchSequence( const string& seq, const bool exact, const bool insertion, const bool deletion,
734  const bool substitution, const double ratio_kmers, const bool or_exclusive_match) const;
735 
736  int k_;
737  int g_;
738 
739  bool invalid;
740 
741  static const int tiny_vector_sz = 2;
742  static const int min_abundance_lim = 15;
743  static const int max_abundance_lim = 15;
744 
745  typedef KmerHashTable<CompressedCoverage_t<U>> h_kmers_ccov_t;
746 
747  vector<Unitig<U>*> v_unitigs;
748 
749  KmerCovIndex<U> km_unitigs;
750  MinimizerIndex hmap_min_unitigs;
751 
752  h_kmers_ccov_t h_kmers_ccov;
753 
754  BlockedBloomFilter bf;
755 
756  wrapperData<G> data;
757 };
758 
759 #include "CompactedDBG.tcc"
760 #include "Search.tcc"
761 
762 #endif
CompactedDBG::size
size_t size() const
Return the number of unitigs in the graph.
Definition: CompactedDBG.hpp:585
CompactedDBG::find
const_UnitigMap< U, G > find(const Kmer &km, const bool extremities_only=false) const
Find the unitig containing the queried k-mer in the Compacted de Bruijn graph.
CompactedDBG::operator=
CompactedDBG< U, G > & operator=(CompactedDBG< U, G > &&o)
Move assignment operator (move a compacted de Bruijn graph).
CompactedDBG::simplify
bool simplify(const bool delete_short_isolated_unitigs=true, const bool clip_short_tips=true, const bool verbose=false)
Simplify the Compacted de Bruijn graph: clip short (< 2k length) tips and/or delete short (< 2k lengt...
CDBG_Build_opt::useMercyKmers
bool useMercyKmers
Keep in the graph low coverage k-mers (cov=1) connecting tips of the graph.
Definition: CompactedDBG.hpp:156
CompactedDBG::write
bool write(const string &output_filename, const size_t nb_threads=1, const bool GFA_output=true, const bool verbose=false) const
Write the Compacted de Bruijn graph to disk (GFA1 format).
CDBG_Data_t::extract
void extract(const UnitigMap< Unitig_data_t, Graph_data_t > &um_src, bool last_extraction)
Extract data corresponding to a sub-unitig of a unitig A.
Definition: CompactedDBG.hpp:264
CompactedDBG::operator+=
CompactedDBG< U, G > & operator+=(const CompactedDBG< U, G > &o)
Addition assignment operator (merge a compacted de Bruijn graph).
CompactedDBG::operator=
CompactedDBG< U, G > & operator=(const CompactedDBG< U, G > &o)
Copy assignment operator (copy a compacted de Bruijn graph).
CDBG_Build_opt::outFilenameBBF
string outFilenameBBF
String containing the name of a Bloom filter file that will be generated by CompactedDBG<U,...
Definition: CompactedDBG.hpp:138
CDBG_Data_t::serialize
string serialize(const const_UnitigMap< Unitig_data_t, Graph_data_t > &um_src) const
Serialize the data to a GFA-formatted string.
Definition: CompactedDBG.hpp:274
CompactedDBG::add
bool add(const string &seq, const bool verbose=false)
Add a sequence to the Compacted de Bruijn graph.
CompactedDBG::isInvalid
bool isInvalid() const
Return a boolean indicating if the graph is invalid (wrong input parameters/files,...
Definition: CompactedDBG.hpp:570
CompactedDBG::findUnitig
UnitigMap< U, G > findUnitig(const char *s, const size_t pos, const size_t len)
Find the unitig containing the k-mer starting at a given position in a query sequence and extends the...
CompactedDBG::getData
const G * getData() const
Return a constant pointer to the graph data.
Definition: CompactedDBG.hpp:595
CompactedDBG::operator==
bool operator==(const CompactedDBG< U, G > &o) const
Equality operator.
Kmer
Interface to store and manipulate k-mers.
Definition: Kmer.hpp:42
CompactedDBG::const_iterator
unitigIterator< U, G, true > const_iterator
A constant iterator for the unitigs of the graph.
Definition: CompactedDBG.hpp:322
CompactedDBG::operator!=
bool operator!=(const CompactedDBG< U, G > &o) const
Inequality operator.
CompactedDBG::remove
bool remove(const const_UnitigMap< U, G > &um, const bool verbose=false)
Remove a unitig from the Compacted de Bruijn graph.
CDBG_Build_opt::prefixFilenameOut
string prefixFilenameOut
Prefix for the name of the file to which the graph must be written.
Definition: CompactedDBG.hpp:163
CompactedDBG::getK
int getK() const
Return the length of k-mers of the graph.
Definition: CompactedDBG.hpp:575
CompactedDBG::read
bool read(const string &input_filename, const size_t nb_threads=1, const bool verbose=false)
Read a Compacted de Bruijn graph from disk (GFA1 or FASTA format).
CompactedDBG::getG
int getG() const
Return the length of minimizers of the graph.
Definition: CompactedDBG.hpp:580
CDBG_Build_opt::nb_bits_unique_kmers_bf
size_t nb_bits_unique_kmers_bf
Number of Bloom filter bits per k-mer occurring at least once in the FASTA/FASTQ/GFA files of CDBG_Bu...
Definition: CompactedDBG.hpp:134
CompactedDBG::CompactedDBG
CompactedDBG(const CompactedDBG< U, G > &o)
Copy constructor (copy a compacted de Bruijn graph).
CompactedDBG::CompactedDBG
CompactedDBG(CompactedDBG< U, G > &&o)
Move constructor (move a compacted de Bruijn graph).
CompactedDBG::CompactedDBG
CompactedDBG(const int kmer_length=31, const int minimizer_length=-1)
Constructor (set up an empty compacted dBG).
UnitigMap.hpp
UnitigMap type interface.
CompactedDBG::build
bool build(CDBG_Build_opt &opt)
Build the Compacted de Bruijn graph.
CDBG_Build_opt::build
bool build
Boolean indicating if the graph must be built.
Definition: CompactedDBG.hpp:150
CDBG_Build_opt::filename_seq_in
vector< string > filename_seq_in
Vector of strings, each string is the name of a FASTA/FASTQ/GFA file to use for the graph constructio...
Definition: CompactedDBG.hpp:140
CDBG_Data_t
If data are to be associated with the unitigs of the compacted de Bruijn graph, those data must be wr...
Definition: CompactedDBG.hpp:212
UnitigMap
Contain all the information for the mapping of a k-mer or a sequence to a unitig of a Compacted de Br...
Definition: UnitigMap.hpp:92
CDBG_Build_opt::verbose
bool verbose
Print information messages during execution if true.
Definition: CompactedDBG.hpp:130
CDBG_Build_opt::filename_graph_in
string filename_graph_in
String containing the name of a GFA file to read using CompactedDBG<U, G>::read.
Definition: CompactedDBG.hpp:165
CompactedDBG::begin
iterator begin()
Create an iterator to the first unitig of the Compacted de Bruijn graph (unitigs are NOT sorted lexic...
CompactedDBG::end
const_iterator end() const
Create a constant iterator to the "past-the-last" unitig of the Compacted de Bruijn graph (unitigs ar...
CompactedDBG::find
UnitigMap< U, G > find(const Kmer &km, const bool extremities_only=false)
Find the unitig containing the queried k-mer in the Compacted de Bruijn graph.
CDBG_Build_opt::clipTips
bool clipTips
Clip short tips (length < 2k) of the graph (not used by CompactedDBG<U, G>::build).
Definition: CompactedDBG.hpp:154
Unitig.hpp
The Unitig interface.
CompactedDBG::findUnitig
const_UnitigMap< U, G > findUnitig(const char *s, const size_t pos, const size_t len) const
Find the unitig containing the k-mer starting at a given position in a query sequence and extends the...
CDBG_Build_opt::nb_bits_non_unique_kmers_bf
size_t nb_bits_non_unique_kmers_bf
Number of Bloom filter bits per k-mer occurring at least twice in the FASTA/FASTQ/GFA files of CDBG_B...
Definition: CompactedDBG.hpp:135
CompactedDBG::begin
const_iterator begin() const
Create an constant iterator to the first unitig of the Compacted de Bruijn graph (unitigs are NOT sor...
CDBG_Build_opt::update
bool update
Boolean indicating if the graph must be updated.
Definition: CompactedDBG.hpp:151
CDBG_Data_t::clear
void clear(const UnitigMap< Unitig_data_t, Graph_data_t > &um_dest)
Clear the data associated with a unitig.
Definition: CompactedDBG.hpp:221
CompactedDBG::searchSequence
vector< pair< size_t, const_UnitigMap< U, G > > > searchSequence(const string &s, const bool exact, const bool insertion, const bool deletion, const bool substitution, const bool or_exclusive_match=false) const
Performs exact and/or inexact search of the k-mers of a sequence query in the Compacted de Bruijn gra...
neighborIterator
Iterator for the neighbors (predecessors or successors) of a reference unitig used in a UnitigMap obj...
Definition: NeighborIterator.hpp:34
CDBG_Build_opt::filename_ref_in
vector< string > filename_ref_in
Vector of strings, each string is the name of a FASTA/FASTQ/GFA file to use for the graph constructio...
Definition: CompactedDBG.hpp:141
CDBG_Build_opt::nb_threads
size_t nb_threads
Number of threads to use for building the graph.
Definition: CompactedDBG.hpp:132
unitigIterator
Iterator for the unitigs of a Compacted de Bruijn graph.
Definition: UnitigIterator.hpp:36
CDBG_Data_t::concat
void concat(const UnitigMap< Unitig_data_t, Graph_data_t > &um_dest, const UnitigMap< Unitig_data_t, Graph_data_t > &um_src)
Join data of two unitigs which are going to be concatenated.
Definition: CompactedDBG.hpp:236
CompactedDBG::merge
bool merge(const vector< CompactedDBG > &v, const size_t nb_threads=1, const bool verbose=false)
Merge multiple compacted de Bruijn graphs.
CDBG_Build_opt
Most members of this structure are parameters for CompactedDBG<U, G>::build(), except for:
Definition: CompactedDBG.hpp:128
CompactedDBG::getData
G * getData()
Return a pointer to the graph data.
Definition: CompactedDBG.hpp:590
CompactedDBG::clear
void clear()
Clear the graph: empty the graph and reset its parameters.
CompactedDBG::end
iterator end()
Create an iterator to the "past-the-last" unitig of the Compacted de Bruijn graph (unitigs are NOT so...
Kmer.hpp
Interface for the class Kmer:
CompactedDBG::searchSequence
vector< pair< size_t, UnitigMap< U, G > > > searchSequence(const string &s, const bool exact, const bool insertion, const bool deletion, const bool substitution, const bool or_exclusive_match=false)
Performs exact and/or inexact search of the k-mers of a sequence query in the Compacted de Bruijn gra...
UnitigMap::getData
Unitig_data_ptr_t getData() const
Get a pointer to the data associated with the reference unitig used in the mapping.
UnitigIterator.hpp
The unitigIterator type interface.
CompactedDBG::~CompactedDBG
virtual ~CompactedDBG()
Destructor.
CDBG_Build_opt::deleteIsolated
bool deleteIsolated
Remove short isolated unitigs (length < 2k) of the graph (not used by CompactedDBG<U,...
Definition: CompactedDBG.hpp:155
CompactedDBG::length
size_t length() const
Return the sum of the unitigs length.
DataStorage
Definition: ColorSet.hpp:16
CDBG_Data_t::merge
void merge(const UnitigMap< Unitig_data_t, Graph_data_t > &um_dest, const const_UnitigMap< Unitig_data_t, Graph_data_t > &um_src)
Merge the data of a sub-unitig B to the data of a sub-unitig A.
Definition: CompactedDBG.hpp:248
CompactedDBG::iterator
unitigIterator< U, G, false > iterator
An iterator for the unitigs of the graph.
Definition: CompactedDBG.hpp:321
CompactedDBG
Represent a Compacted de Bruijn graph.
Definition: CompactedDBG.hpp:305
CDBG_Build_opt::outputGFA
bool outputGFA
Boolean indicating if the graph is written to a GFA file (true) or if the unitigs are written to a FA...
Definition: CompactedDBG.hpp:158
CompactedDBG::merge
bool merge(const CompactedDBG &o, const size_t nb_threads=1, const bool verbose=false)
Merge a compacted de Bruijn graph.
CompactedDBG::nbKmers
size_t nbKmers() const
Return the number of k-mers in the graph.
CDBG_Build_opt::inFilenameBBF
string inFilenameBBF
String containing the name of a Bloom filter file that is generated by CompactedDBG<U,...
Definition: CompactedDBG.hpp:137
CDBG_Build_opt::k
int k
Length of k-mers (not used by CompactedDBG<U, G>::build).
Definition: CompactedDBG.hpp:148