Bifrost
ColoredCDBG.hpp
Go to the documentation of this file.
1 #ifndef BIFROST_COLOREDCDBG_HPP
2 #define BIFROST_COLOREDCDBG_HPP
3 
4 #include <iostream>
5 #include <random>
6 #include <unordered_map>
7 #include <unordered_set>
8 
9 #include "CompactedDBG.hpp"
10 #include "DataManager.hpp"
11 
12 #include "getRSS.h"
13 
31 
33 
35 
36  CCDBG_Build_opt() : outputColors(true) {}
37 };
38 
39 template<typename U = void> using UnitigColorMap = UnitigMap<DataAccessor<U>, DataStorage<U>>;
40 template<typename U = void> using const_UnitigColorMap = const_UnitigMap<DataAccessor<U>, DataStorage<U>>;
41 
59 template<typename Unitig_data_t> //Curiously Recurring Template Pattern (CRTP)
60 class CCDBG_Data_t {
61 
62  typedef Unitig_data_t U;
63 
64  public:
65 
72  void clear(const UnitigColorMap<U>& um_dest){}
73 
88  void concat(const UnitigColorMap<U>& um_dest, const UnitigColorMap<U>& um_src){}
89 
100  void merge(const UnitigColorMap<U>& um_dest, const const_UnitigColorMap<U>& um_src){}
101 
117  void extract(const UnitigColors* uc_dest, const UnitigColorMap<U>& um_src, const bool last_extraction){}
118 
126  string serialize(const const_UnitigColorMap<U>& um_src) const {
127 
128  return string();
129  }
130 };
131 
150 template<typename Unitig_data_t = void>
151 class ColoredCDBG : public CompactedDBG<DataAccessor<Unitig_data_t>, DataStorage<Unitig_data_t>> {
152 
153  static_assert(is_void<Unitig_data_t>::value || is_base_of<CCDBG_Data_t<Unitig_data_t>, Unitig_data_t>::value,
154  "Type Unitig_data_t of data associated with vertices of class ColoredCDBG<Unitig_data_t> must "
155  " be void (no data) or a class extending class CCDBG_Data_t");
156 
157  typedef Unitig_data_t U;
158 
159  template<typename U> friend class DataAccessor;
160 
161  public:
162 
167  ColoredCDBG(int kmer_length = DEFAULT_K, int minimizer_length = -1);
168 
175 
182 
190 
198 
203  bool operator==(const ColoredCDBG& o) const;
204 
209  inline bool operator!=(const ColoredCDBG& o) const;
210 
224 
227  void clear();
228 
234  bool buildGraph(const CCDBG_Build_opt& opt);
235 
241  bool buildColors(const CCDBG_Build_opt& opt);
242 
253  bool write(const string& prefix_output_fn, const size_t nb_threads = 1, const bool write_index_file = true, const bool compress_output = false, const bool verbose = false) const;
254 
265  bool read(const string& input_graph_fn, const string& input_colors_fn, const size_t nb_threads = 1, const bool verbose = false);
266 
276  bool read(const string& input_graph_fn, const string& input_index_fn, const string& input_colors_fn, const size_t nb_threads = 1, const bool verbose = false);
277 
290  bool merge(const ColoredCDBG& o, const size_t nb_threads = 1, const bool verbose = false);
291 
306  bool merge(ColoredCDBG&& o, const size_t nb_threads = 1, const bool verbose = false);
307 
318  bool merge(const vector<ColoredCDBG>& v, const size_t nb_threads = 1, const bool verbose = false);
319 
332  bool merge(vector<ColoredCDBG>&& v, const size_t nb_threads = 1, const bool verbose = false);
333 
338  string getColorName (const size_t color_id) const;
339 
344  vector<string> getColorNames() const;
345 
349  inline size_t getNbColors() const { return this->getData()->getNbColors(); }
350 
351  bool search(const vector<string>& query_filenames, const string& out_filename_prefix,
352  const double ratio_kmers, const bool inexact_search, const size_t nb_threads,
353  const bool verbose = false) const;
354 
355  private:
356 
357  void checkColors(const vector<string>& filename_seq_in) const;
358  bool loadColors(const string& input_graph_fn, const string& input_colors_fn, const size_t nb_threads, const bool verbose);
359 
360  void initUnitigColors(const CCDBG_Build_opt& opt, const size_t max_nb_hash = 31);
361  void buildUnitigColors(const size_t nb_threads);
362  //void buildUnitigColors2(const size_t nb_threads);
363 
364  void resizeDataUC(const size_t sz, const size_t nb_threads = 1, const size_t max_nb_hash = 31);
365 
366  bool invalid;
367 };
368 
369 #include "ColoredCDBG.tcc"
370 
371 #endif
CCDBG_Build_opt::filename_colors_in
string filename_colors_in
String containing the name of a Bifrost color file to read in ColoredCDBG<U>::read().
Definition: ColoredCDBG.hpp:32
ColoredCDBG::ColoredCDBG
ColoredCDBG(const ColoredCDBG &o)
Copy constructor (copy a colored cdBG).
ColoredCDBG::ColoredCDBG
ColoredCDBG(ColoredCDBG &&o)
Move constructor (move a colored cdBG).
ColoredCDBG::operator=
ColoredCDBG & operator=(const ColoredCDBG &o)
Copy assignment operator (copy a colored cdBG).
CCDBG_Data_t
If data are to be associated with the unitigs of the colored and compacted de Bruijn graph,...
Definition: ColoredCDBG.hpp:60
ColoredCDBG::operator=
ColoredCDBG & operator=(ColoredCDBG &&o)
Move assignment operator (move a colored cdBG).
CCDBG_Build_opt
This structure inherits from CDBG_Build_opt and introduces only a few new members which are color-rel...
Definition: ColoredCDBG.hpp:30
ColoredCDBG::merge
bool merge(const vector< ColoredCDBG > &v, const size_t nb_threads=1, const bool verbose=false)
Merge multiple colored and compacted de Bruijn graphs.
ColoredCDBG::buildColors
bool buildColors(const CCDBG_Build_opt &opt)
Map the colors to the unitigs.
ColoredCDBG::read
bool read(const string &input_graph_fn, const string &input_index_fn, const string &input_colors_fn, const size_t nb_threads=1, const bool verbose=false)
Read a colored and compacted de Bruijn graph from disk using an index file.
CCDBG_Data_t::concat
void concat(const UnitigColorMap< U > &um_dest, const UnitigColorMap< U > &um_src)
Join data of two unitigs which are going to be concatenated.
Definition: ColoredCDBG.hpp:88
UnitigMap
Contain all the information for the mapping of a k-mer or a sequence to a unitig of a Compacted de Br...
Definition: UnitigMap.hpp:92
CompactedDBG.hpp
Interface for the Compacted de Bruijn graph API.
ColoredCDBG::write
bool write(const string &prefix_output_fn, const size_t nb_threads=1, const bool write_index_file=true, const bool compress_output=false, const bool verbose=false) const
Write a colored and compacted de Bruijn graph to disk.
CCDBG_Data_t::extract
void extract(const UnitigColors *uc_dest, const UnitigColorMap< U > &um_src, const bool last_extraction)
Extract data corresponding to a sub-unitig of a unitig A.
Definition: ColoredCDBG.hpp:117
ColoredCDBG::buildGraph
bool buildGraph(const CCDBG_Build_opt &opt)
Build the Colored and compacted de Bruijn graph (only the unitigs).
UnitigColors
Represent the k-mer color sets of a unitig.
Definition: ColorSet.hpp:21
ColoredCDBG::getColorNames
vector< string > getColorNames() const
Get the names of all colors.
CCDBG_Build_opt::outputColors
bool outputColors
Boolean indicating if the graph should be colored or not.
Definition: ColoredCDBG.hpp:34
ColoredCDBG::merge
bool merge(const ColoredCDBG &o, const size_t nb_threads=1, const bool verbose=false)
Merge a colored and compacted de Bruijn graph.
CCDBG_Data_t::merge
void merge(const UnitigColorMap< U > &um_dest, const const_UnitigColorMap< U > &um_src)
Merge the data of a sub-unitig B to the data of a sub-unitig A.
Definition: ColoredCDBG.hpp:100
CCDBG_Data_t::serialize
string serialize(const const_UnitigColorMap< U > &um_src) const
Serialize the data to a GFA-formatted string.
Definition: ColoredCDBG.hpp:126
ColoredCDBG::merge
bool merge(ColoredCDBG &&o, const size_t nb_threads=1, const bool verbose=false)
Merge and clear a colored and compacted de Bruijn graph.
ColoredCDBG::clear
void clear()
Clear the graph: remove unitigs, user data and colors + reset its parameters.
ColoredCDBG::operator!=
bool operator!=(const ColoredCDBG &o) const
Inequality operator.
ColoredCDBG::read
bool read(const string &input_graph_fn, const string &input_colors_fn, const size_t nb_threads=1, const bool verbose=false)
Read a colored and compacted de Bruijn graph from disk.
CCDBG_Data_t::clear
void clear(const UnitigColorMap< U > &um_dest)
Clear the data associated with a unitig.
Definition: ColoredCDBG.hpp:72
CDBG_Build_opt
Most members of this structure are parameters for CompactedDBG<U, G>::build(), except for:
Definition: CompactedDBG.hpp:128
ColoredCDBG::operator==
bool operator==(const ColoredCDBG &o) const
Equality operator.
CompactedDBG< DataAccessor< void >, DataStorage< void > >::getData
G * getData()
Return a pointer to the graph data.
Definition: CompactedDBG.hpp:620
ColoredCDBG::getNbColors
size_t getNbColors() const
Get the number of colors in the graph.
Definition: ColoredCDBG.hpp:349
DataAccessor
Interface to access the colors and the data associated with a unitig of a ColoredCDBG.
Definition: DataAccessor.hpp:21
ColoredCDBG::ColoredCDBG
ColoredCDBG(int kmer_length=31, int minimizer_length=-1)
Constructor (set up an empty colored cdBG).
ColoredCDBG::getColorName
string getColorName(const size_t color_id) const
Get the name of a color.
DataStorage
Definition: ColorSet.hpp:16
ColoredCDBG::operator+=
ColoredCDBG & operator+=(const ColoredCDBG &o)
Addition assignment operator (merge a colored cdBG).
ColoredCDBG::merge
bool merge(vector< ColoredCDBG > &&v, const size_t nb_threads=1, const bool verbose=false)
Merge and clear multiple colored and compacted de Bruijn graphs.
CompactedDBG
Represent a Compacted de Bruijn graph.
Definition: CompactedDBG.hpp:313
ColoredCDBG
Represent a Colored and Compacted de Bruijn graph.
Definition: ColoredCDBG.hpp:151