#!/usr/bin/env bash

# exit when any command fails
set -eo pipefail

input_fasta=false
output_dir=""
temp_dir=""
input_paf=false
resume=false
map_pct_id=90
n_mappings=false
segment_length=5000
block_length=false
sparse_map=false
mash_kmer=false
mash_kmer_thres=false
min_match_length=19
sparse_factor=0
transclose_batch=10000000
n_haps=false
block_ratio_min=0
pad_max_depth=100
max_path_jump=0
max_edge_jump=0
target_poa_length=700,900,1100
poa_params=false
poa_padding=0.001
run_abpoa=false
run_global_poa=false
do_viz=true
do_layout=true
threads=1
poa_threads=0
mapper=wfmash
no_merge_segments=false
do_stats=false
exclude_delim=false
write_maf=false
consensus_spec=false # Disabled for the moment due to PGGB's #133 and #182 issues
consensus_prefix=Consensus_
no_splits=false
multiqc=false
keep_intermediate_files=false
compress=false
normalize=true
vcf_spec=false
show_version=false

if [ $# -eq 0 ]; then
    show_help=true
fi

# read the options
cmd=$0" "$@
TEMP=`getopt -o i:o:D:a:p:n:s:l:K:F:k:x:f:B:H:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,n-haps:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,do-stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,version -n 'pggb' -- "$@"`
eval set -- "$TEMP"

# extract options and their arguments into variables.
while true ; do
    case "$1" in
        -i|--input-fasta) input_fasta=$2 ; shift 2 ;;
        -o|--output-dir) output_dir=$2 ; shift 2 ;;
        -D|--temp-dir) temp_dir=$2 ; shift 2 ;;
        -a|--input-paf) input_paf=$2 ; shift 2 ;;
        -p|--map-pct-id) map_pct_id=$2 ; shift 2 ;;
        -n|--n-mappings) n_mappings=$2 ; shift 2 ;;
        -s|--segment-length) segment_length=$2 ; shift 2 ;;
        -l|--block-length) block_length=$2 ; shift 2 ;;
        -N|--no-splits) no_splits=true ; shift ;;
        -K|--mash-kmer) mash_kmer=$2 ; shift 2 ;;
        -F|--mash-kmer-thres) mash_kmer_thres=$2 ; shift 2 ;;
        -Y|--exclude-delim) exclude_delim=$2 ; shift 2 ;;
        -x|--sparse-map) sparse_map=$2 ; shift 2 ;;
        -k|--min-match-length) min_match_length=$2 ; shift 2 ;;
        -f|--sparse-factor) sparse_factor=$2 ; shift 2 ;;
        -B|--transclose-batch) transclose_batch=$2 ; shift 2 ;;
        -H|--n-haps) n_haps=$2 ; shift 2 ;;
        -d|--pad-max-depth) pad_max_depth=$2 ; shift 2 ;;
        -j|--path-jump-max) max_path_jump=$2 ; shift 2 ;;
        -e|--edge-jump-max) max_edge_jump=$2 ; shift 2 ;;
        -G|--poa-length-target) target_poa_length=$2 ; shift 2 ;;
        -P|--poa-params) poa_params=$2 ; shift 2 ;;
        -O|--poa-padding) poa_padding=$2 ; shift 2 ;;
        -b|--run-abpoa) run_abpoa=true ; shift ;;
        -z|--global-poa) run_global_poa=true ; shift ;;
        -M|--write-maf) write_maf=true ; shift ;;
        #-C|--consensus-spec) consensus_spec=$2 ; shift 2 ;;
        -Q|--consensus-prefix) consensus_prefix=$2 ; shift 2 ;;
        -t|--threads) threads=$2 ; shift 2 ;;
        -T|--poa-threads) poa_threads=$2 ; shift 2 ;;
        -v|--skip-viz) do_viz=false ; do_layout=false; shift ;;
        -S|--do-stats) do_stats=true ; shift ;;
        -m|--multiqc) multiqc=true ; shift ;;
        -r|--resume) resume=true ; shift ;;
        -A|--keep-temp-files) keep_intermediate_files=true ; shift ;;
        -Z|--compress) compress=true ; shift ;;
        -V|--vcf-spec) vcf_spec=$2 ; shift 2 ;;
        -h|--help) show_help=true ; shift ;;
        --version) show_version=true ; shift ;;
        --) shift ; break ;;
        *) echo "$2" "Internal error!" ; exit 1 ;;
    esac
done

if [ $show_version == true ]; then
    SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
    cd "$SCRIPT_DIR"
    GIT_VERSION=$(git describe --always --tags)
    echo "pggb $GIT_VERSION"
    cd - &> /dev/null
    exit
fi

# if a PAF is given as input, we set the segment_length by to an arbitrary number, since we will skip the wfmash alignment process
if [[ $input_paf != false ]]; then
    if [[ $segment_length == false ]];     then
        segment_length=42
    fi
    mapper=EXTERNAL
fi

if [[
       "$input_fasta" == false
    || $n_mappings == false
   ]]; then
    show_help=true
    >&2 echo "ERROR: mandatory arguments -i and -n"
fi

if (( "$n_mappings" < 2 )); then
    show_help=true
    >&2 echo "ERROR: -n must be greater than or equal to 2"
fi

if [[ $poa_threads == 0 ]]; then
    poa_threads=$threads
fi

block_length_cmd=""
if [[ $block_length != false ]]; then
    block_length_cmd="-l $block_length"
fi
# wfmash auto-sets our block length based on its parameters

mash_kmer_cmd=""
if [[ $mash_kmer != false ]]; then
    mash_kmer_cmd="-k $mash_kmer"
fi
# wfmash auto-sets our kmer length based on its parameters

mash_kmer_thres_cmd=""
if [[ $mash_kmer_thres != false ]]; then
    mash_kmer_thres_cmd="-H $mash_kmer_thres"
fi
# wfmash auto-sets our kmer frequency threshold based on its parameters


# our partial order alignment parameters
# poa param suggestions from minimap2
# - asm5, --poa-params 1,19,39,3,81,1, ~0.1 divergence
# - asm10, --poa-params 1,9,16,2,41,1, ~1 divergence
# - asm20, --poa-params 1,4,6,2,26,1, ~5% divergence
# between asm10 and asm20 ~ 1,7,11,2,33,1
poa_params_cmd=""
if [[ $poa_params == false ]]; then
    poa_params_cmd="-P 1,19,39,3,81,1"
else
    if [[ $poa_params == "asm5" ]]; then
        poa_params_cmd="-P 1,19,39,3,81,1"
    elif [[ $poa_params == "asm10" ]]; then
        poa_params_cmd="-P 1,9,16,2,41,1"
    elif [[ $poa_params == "asm15" ]]; then
        poa_params_cmd="-P 1,7,11,2,33,1"
    elif [[ $poa_params == "asm20" ]]; then
        poa_params_cmd="-P 1,4,6,2,26,1"
    else
        poa_params_cmd="-P $poa_params"
    fi
fi

if [[ $n_haps == false ]]; then
    n_haps=$n_mappings
fi

if [ $show_help ]; then
    padding=`printf %${#0}s` # prints as many spaces as the length of $0
    echo "usage: $0 -i <input-fasta> -n <n-mappings> [options]"
    echo "options:"
    echo "   [wfmash]"
    echo "    -i, --input-fasta FILE      input FASTA/FASTQ file"
    echo "    -s, --segment-length N      segment length for mapping [default: 5k]"
    echo "    -l, --block-length N        minimum block length filter for mapping [default: 5*segment-length]"
    echo "    -N, --no-split              disable splitting of input sequences during mapping [enabled by default]"
    echo "    -p, --map-pct-id PCT        percent identity for mapping/alignment [default: 90]"
    echo "    -n, --n-mappings N          number of mappings to retain for each segment"
    echo "    -x, --sparse-map N          keep this fraction of mappings ('auto' for giant component heuristic) [default: 1.0]"
    echo "    -K, --mash-kmer N           kmer size for mapping [default: 19]"
    echo "    -F, --mash-kmer-thres N     ignore the top % most-frequent kmers [default: 0.001]"
    echo "    -Y, --exclude-delim C       skip mappings between sequences with the same name prefix before"
    echo "                                the given delimiter character [default: all-vs-all and !self]"
    echo "   [seqwish]"
    echo "    -k, --min-match-len N       filter exact matches below this length [default: 19]"
    echo "    -f, --sparse-factor N       keep this randomly selected fraction of input matches [default: no sparsification]"
    echo "    -B, --transclose-batch      number of bp to use for transitive closure batch [default: 10000000]"
    echo "   [smoothxg]"
    echo "    -H, --n-haps N              number of haplotypes, if different than that set with -n [default: n-mappings]"
    echo "    -j, --path-jump-max         maximum path jump to include in block [default: 0]"
    echo "    -e, --edge-jump-max N       maximum edge jump before breaking [default: 0 / off]"
    echo "    -G, --poa-length-target N,M target sequence length for POA, one per pass [default: 700,900,1100]"
    echo "    -P, --poa-params PARAMS     score parameters for POA in the form of match,mismatch,gap1,ext1,gap2,ext2"
    echo "                                may also be given as presets: asm5, asm10, asm15, asm20"
    echo "                                [default: 1,19,39,3,81,1 = asm5]"
    echo "    -O, --poa-padding N         pad each end of each sequence in POA with N*(mean_seq_len) bp [default: 0.001]"
    echo "    -b, --run-abpoa             run abPOA [default: SPOA]"
    echo "    -z, --global-poa            run the POA in global mode [default: local mode]"
    echo "    -d, --pad-max-depth N       depth/haplotype at which we don't pad the POA problem [default: 100]"
    echo "    -M, --write-maf             write MAF output representing merged POA blocks [default: off]"
    echo "    -Q, --consensus-prefix P    use this prefix for consensus path names [default: Consensus_]"
    #echo "    -C, --consensus-spec SPEC   consensus graph specification: write consensus graphs to"
    #echo "                                BASENAME.cons_[spec].gfa; where each spec contains at least a min_len parameter"
    #echo "                                (which defines the length of divergences from consensus paths to preserve in the"
    #echo "                                output), optionally a file containing reference paths to preserve in the output,"
    #echo "                                a flag (y/n) indicating whether we should also use the POA consensus paths, a"
    #echo "                                minimum coverage of consensus paths to retain (min_cov), and a maximum allele"
    #echo "                                length (max_len, defaults to 1e6); implies -a; example:"
    #echo "                                cons,100,1000:refs1.txt:n,1000:refs2.txt:y:2.3:1000000,10000"
    #echo "                                [default: off]"
    echo "   [odgi]"
    echo "    -v, --skip-viz              don't render visualizations of the graph in 1D and 2D [default: make them]"
    echo "    -S, --stats                 generate statistics of the seqwish and smoothxg graph [default: off]"
    echo "   [vg]"
    echo "    -V, --vcf-spec SPEC         specify a set of VCFs to produce with SPEC = REF:DELIM[:LEN][,REF:DELIM:[LEN]]*"
    echo "                                the paths matching ^REF are used as a reference, while the sample haplotypes"
    echo "                                are derived from path names, e.g. when DELIM=# and with '-V chm13:#',"
    echo "                                a path named HG002#1#ctg would be assigned to sample HG002 phase 1."
    echo "                                If LEN is specified and greater than 0, the VCFs are decomposed, filtering "
    echo "                                sites whose max allele length is greater than LEN. [default: off]"
    echo "   [multiqc]"
    echo "    -m, --multiqc               generate MultiQC report of graphs' statistics and visualizations,"
    echo "                                automatically runs odgi stats [default: off]"
    echo "   [general]"
    echo "    -o, --output-dir PATH       output directory"
    echo "    -D, --temp-dir PATH         directory for temporary files"
    echo "    -a, --input-paf FILE        input PAF file; the wfmash alignment step is skipped"
    echo "    -r, --resume                do not overwrite existing outputs in the given directory"
    echo "                                [default: start pipeline from scratch]"
    echo "    -t, --threads N             number of compute threads to use in parallel steps"
    echo "    -T, --poa-threads N         number of compute threads to use during POA (set lower if you OOM during smoothing)"
    echo "    -A, --keep-temp-files       keep intermediate graphs"
    echo "    -Z, --compress              compress alignment (.paf), graph (.gfa, .og), and MSA (.maf) outputs with pigz,"
    echo "                                and variant (.vcf) outputs with bgzip"
    echo "    --version                   display the version of pggb"
    echo "    -h, --help                  this text"
    echo
    echo "Use wfmash, seqwish, smoothxg, odgi, gfaffix, and vg to build, project and display a pangenome graph."
    exit
fi

# Alignment
mapper_letter='W'
n_mappings_minus_1=$( echo "$n_mappings - 1" | bc )
paf_spec=$mapper_letter-s$segment_length-l$block_length-p$map_pct_id-n$n_mappings_minus_1-K$mash_kmer-F$mash_kmer_thres-x$sparse_map

if [[ $no_merge_segments == true ]]; then
    merge_cmd=-M
    paf_spec="$paf_spec"-M
fi

if [[ $no_splits == true ]]; then
    split_cmd=-N
    paf_spec="$paf_spec"-N
fi

if [[ $sparse_map == "auto" ]]; then
    # set sparse mapping using giant component heuristic
    # we keep 10x log(n)/n mappings
    # if this is < 1, otherwise we keep all
    n=$n_haps
    sparse_map_frac=$(echo "x=l($n)/$n * 10; if (x < 1) { x } else { 1 }"  | bc -l | cut -c -8)
    sparse_map_cmd="-x $sparse_map_frac"
elif [[ $sparse_map != false ]]; then
    sparse_map_cmd="-x $sparse_map"
fi

prefix_paf="$input_fasta".$(echo $paf_spec | sha256sum | head -c 7)
if [[ $input_paf != false ]]; then
    prefix_paf=$input_paf
fi

if [[ $exclude_delim != false ]]; then
    exclude_cmd="-Y "$exclude_delim
else
    exclude_cmd=-X
fi

# Graph induction
prefix_seqwish="$prefix_paf".$(echo k$min_match_length-f$sparse_factor-B$transclose_batch | sha256sum | head -c 7)

# Normalization
block_id_min=$(echo "scale=4; $map_pct_id / 100.0" | bc)
prefix_smoothed="$prefix_seqwish".$(echo h$n_haps-G$target_poa_length-j$max_path_jump-e$max_edge_jump-d$pad_max_depth-I$block_id_min-R$block_ratio_min-p$poa_params-O$poa_padding | sha256sum | head -c 7).smooth
prefix_smoothed_output="$prefix_smoothed"


fmt="%C\n%Us user %Ss system %P cpu %es total %MKb max memory"
timer=$(which time)


# Directories
if [[ "$output_dir" != "" ]]; then
	if [ ! -e "$output_dir" ]; then
		mkdir "$output_dir"
	fi

	prefix_paf="$output_dir"/$(basename "$prefix_paf")
	prefix_smoothed_output="$output_dir"/$(basename "$prefix_smoothed")
fi

if [[ "$temp_dir" == "" ]]; then
  temp_dir=$output_dir
fi
temp_dir_was_created=false
if [[ "$temp_dir" != "" ]]; then
  if [ ! -e "$temp_dir" ]; then
    mkdir "$temp_dir"
    temp_dir_was_created=true
  fi

  prefix_seqwish="$temp_dir"/$(basename "$prefix_seqwish")
  prefix_smoothed="$temp_dir"/$(basename "$prefix_smoothed")
fi


date=`date "+%m-%d-%Y_%H:%M:%S"`
log_file="$prefix_smoothed_output".$date.log
param_file="$prefix_smoothed_output".$date.params.yml

# write parameters to log_file:
echo -e "Starting pggb on `date`\n" > "$log_file"
echo -e "Command: $cmd\n" >> "$log_file"
echo -e "PARAMETERS\n" >> "$log_file"
cat <<EOT | tee -a "$log_file" "$param_file" >/dev/null
general:
  input-fasta:        $input_fasta
  output-dir:         $output_dir
  temp-dir:           $temp_dir
  resume:             $resume
  compress:           $compress
  threads:            $threads
wfmash:
  mapping-tool:       $mapper
  no-splits:          $no_splits
  segment-length:     $segment_length
  block-length:       $block_length
  no-merge-segments:  $no_merge_segments
  map-pct-id:         $map_pct_id
  n-mappings:         $n_mappings
  mash-kmer:          $mash_kmer
  mash-kmer-thres:    $mash_kmer_thres
  sparse-map:         $sparse_map
  exclude-delim:      $exclude_delim
seqwish:
  min-match-len:      $min_match_length
  sparse-factor:      $sparse_factor
  transclose-batch:   $transclose_batch
smoothxg:
  n-haps:             $n_haps
  block_id_min:       $block_id_min
  path-jump-max:      $max_path_jump
  edge-jump-max:      $max_edge_jump
  poa-length-target:  $target_poa_length
  poa-params:         $poa_params_cmd
  write-maf:          $write_maf
  consensus-prefix:   $consensus_prefix
  consensus-spec:     $consensus_spec
  pad-max-depth:      $pad_max_depth
  block-id-min:       $block_id_min
  block-ratio-min:    $block_ratio_min
  poa_threads:        $poa_threads
  poa_padding:        $poa_padding
  run_abpoa:          $run_abpoa
  run_global_poa:     $run_global_poa
odgi:
  viz:                $do_viz
  layout:             $do_layout
  stats:              $do_stats
gfaffix:
  normalize:          $normalize
vg:
  deconstruct:        $vcf_spec
reporting:
  multiqc:            $multiqc
EOT

echo -e "\nRunning pggb\n" >> "$log_file"
if [[ ! -s $prefix_paf.$mapper.paf || $resume == false ]]; then
  if [[ "$mapper" == "wfmash" ]]; then
          wfmash_temp_dir=""
          if [[ "$temp_dir" != "" ]]; then
            wfmash_temp_dir="-B $temp_dir"
          fi

          ($timer -f "$fmt" wfmash \
              $exclude_cmd \
              -s $segment_length \
              $block_length_cmd \
              $merge_cmd \
              $split_cmd \
              $mash_kmer_cmd \
              $mash_kmer_thres_cmd \
              $sparse_map_cmd \
              -p $map_pct_id \
              -n $n_mappings_minus_1 \
              $wfmash_temp_dir \
              -t $threads \
              "$input_fasta" "$input_fasta" \
              > "$prefix_paf".$mapper.paf) 2> >(tee -a "$log_file")
  fi
fi

# correct prefix_paf and ajdust input here accordingly
seqwish_paf="$prefix_paf".$mapper.paf
if [[ $input_paf != false ]]; then
    seqwish_paf=$input_paf
fi
if [[ ! -s $prefix_seqwish.seqwish.gfa || $resume == false ]]; then
    seqwish_temp_dir=""
    if [[ "$temp_dir" != "" ]]; then
      seqwish_temp_dir="--temp-dir $temp_dir"
    fi

    $timer -f "$fmt" seqwish \
        -t $threads \
        -s "$input_fasta" \
        -p "$seqwish_paf" \
        -k $min_match_length \
        -f $sparse_factor \
        -g "$prefix_seqwish".seqwish.gfa \
        -B $transclose_batch \
        $seqwish_temp_dir \
        -P \
        2> >(tee -a "$log_file")
fi

if [[ $consensus_spec != false ]]; then
    # for merging consensus (currently problematic) we should add "-M -J 1 -G 150" here
    consensus_params="-C ${prefix_smoothed_output}.cons,$consensus_spec"
else
    consensus_params="-V"
fi

if [[ $write_maf != false ]]; then
    maf_params="-m ${prefix_smoothed_output}.maf"
fi

# how many times will we smooth?
smooth_iterations=$(echo $target_poa_length | tr ',' '\n' | wc -l)

keep_temp=""
if [[ $keep_intermediate_files == true ]]; then
    keep_temp="-K"
fi

smoothxg_temp_dir=""
if [[ "$temp_dir" != "" ]]; then
  smoothxg_temp_dir="-b $temp_dir"
fi

smoothxg_xpoa="-S"
if [[ "$run_abpoa" == true ]]; then
  smoothxg_xpoa=""
fi

smoothxg_poa_mode=""
if [[ "$run_global_poa" == true ]]; then
  smoothxg_poa_mode="-Z"
fi

for i in $(seq 1 $smooth_iterations);
do
    input_gfa="$prefix_seqwish".seqwish.gfa
    if [[ $i != 1 ]]; then
        input_gfa="$prefix_smoothed".$(echo $i - 1 | bc).gfa
    fi
    if [[ $i != "$smooth_iterations" ]]; then
        if [[ ! -s $prefix_smoothed.$i.gfa || $resume == false ]]; then
            resume=false # smoothxg is not deterministic, then all subsequent steps need to be rerun for consistency

            poa_length=$(echo $target_poa_length | cut -f $i -d, )
            $timer -f "$fmt" smoothxg \
                   -t $threads \
                   -T $poa_threads \
                   -g "$input_gfa" \
                   -w $(echo "$poa_length * $n_haps" | bc) \
                   $smoothxg_temp_dir \
                   $keep_temp \
                   -X 100 \
                   -I $block_id_min \
                   -R $block_ratio_min \
                   -j $max_path_jump \
                   -e $max_edge_jump \
                   -l $poa_length \
                   $poa_params_cmd \
                   -O $poa_padding \
                   -Y $(echo "$pad_max_depth * $n_haps" | bc) \
                   -d 0 -D 0 \
                   $smoothxg_xpoa $smoothxg_poa_mode \
                   -V \
                   -o "$prefix_smoothed".$i.gfa \
                   2> >(tee -a "$log_file")
        fi
    else
        if [[ ! -s $prefix_smoothed.gfa || ($write_maf != false && ! -s ${prefix_smoothed_output}.maf) || $resume == false ]]; then
            resume=false # smoothxg is not deterministic, then all subsequent steps need to be rerun for consistency

            poa_length=$(echo $target_poa_length | cut -f $i -d, )
            $timer -f "$fmt" smoothxg \
                   -t $threads \
                   -T $poa_threads \
                   -g "$input_gfa" \
                   -w $(echo "$poa_length * $n_haps" | bc) \
                   $smoothxg_temp_dir \
                   $keep_temp \
                   -X 100 \
                   -I $block_id_min \
                   -R $block_ratio_min \
                   -j $max_path_jump \
                   -e $max_edge_jump \
                   -l $poa_length \
                   $poa_params_cmd \
                   -O $poa_padding \
                   -Y $(echo "$pad_max_depth * $n_haps" | bc) \
                   -d 0 -D 0 \
                   $smoothxg_xpoa $smoothxg_poa_mode \
                   $maf_params \
                   -Q $consensus_prefix \
                   $consensus_params \
                   -o "$prefix_smoothed".gfa \
                   2> >(tee -a "$log_file")
        fi
    fi
done

odgi_temp_dir=""
if [[ "$temp_dir" != "" ]]; then
  odgi_temp_dir="--temp-dir $temp_dir"
fi

if [[ $normalize == true ]]; then
    # Remove redundancy
    if [[ ! -s "$prefix_smoothed_output".fix.gfa || ! -s "$prefix_smoothed_output".fix.affixes.tsv.gz || $resume == false ]]; then
      ( $timer -f "$fmt" gfaffix "$prefix_smoothed".gfa -o "$prefix_smoothed".fix.gfa | $timer -f "$fmt" pigz > "$prefix_smoothed_output".fix.affixes.tsv.gz ) 2> >(tee -a "$log_file")
    fi

    # Sort
    if [[ ! -s "$prefix_smoothed_output".final.og || $resume == false ]]; then
      ( $timer -f "$fmt" odgi build -t $threads -P -g "$prefix_smoothed".fix.gfa -o - -O \
          | $timer -f "$fmt" odgi sort -P -p Ygs $odgi_temp_dir -t $threads -i - -o "$prefix_smoothed_output".final.og ) 2> >(tee -a "$log_file")

      resume=false # The PG-SGD sorting (`-pY`) is not deterministic, then all subsequent steps need to be rerun for consistency
    fi

    if [[ ! -s "$prefix_smoothed_output".final.gfa || $resume == false ]]; then
      ( $timer -f "$fmt" odgi view -i "$prefix_smoothed_output".final.og -g > "$prefix_smoothed_output".final.gfa ) 2> >(tee -a "$log_file")
    fi
else
    if [[ ! -s "$prefix_smoothed_output".final.gfa || $resume == false ]]; then
      mv "$prefix_smoothed".gfa "$prefix_smoothed_output".final.gfa
    fi

    if [[ ! -s "$prefix_smoothed_output".final.og || $resume == false ]]; then
      $timer -f "$fmt" odgi build -t $threads -P -g "$prefix_smoothed_output".final.gfa -o "$prefix_smoothed_output".final.og 2> >(tee -a "$log_file")
    fi
fi


if [[ $multiqc == true ]]; then
    do_stats=true
fi

if [[ $do_stats == true ]]; then
    $timer -f "$fmt" odgi build -t $threads -P -g "$prefix_seqwish".seqwish.gfa -o "$prefix_seqwish".seqwish.og 2> >(tee -a "$log_file")
    odgi stats -i "$prefix_seqwish".seqwish.og -m > "$prefix_seqwish".seqwish.og.stats.yaml 2>&1 | tee -a "$log_file"
    odgi stats -i "$prefix_smoothed_output".final.og -m  > "$prefix_smoothed_output".final.og.stats.yaml 2>&1 | tee -a "$log_file"
    if [[ $consensus_spec != false ]]; then
        for consensus_graph in "$prefix_smoothed_output"*.cons*.gfa; do
            odgi build -t $threads -P -g "$consensus_graph" -o "$consensus_graph".og 2> >(tee -a "$log_file")
            odgi stats -i "$consensus_graph".og -m >"$consensus_graph".og.stats.yaml 2>&1 | tee -a "$log_file"
        done
    fi
fi

if [[ $do_viz == true ]]; then
    # big problem: this assumes that there is no "Consensus_" in the input sequences

    if [[ ! -s "$prefix_smoothed_output".final.og.viz_multiqc.png || $resume == false ]]; then
      $timer -f "$fmt" odgi viz -i "$prefix_smoothed_output".final.og \
                      -o "$prefix_smoothed_output".final.og.viz_multiqc.png \
                      -x 1500 -y 500 -a 10 -I $consensus_prefix \
                      2> >(tee -a "$log_file")
    fi

    if [[ ! -s "$prefix_smoothed_output".final.og.viz_pos_multiqc.png || $resume == false ]]; then
      $timer -f "$fmt" odgi viz -i "$prefix_smoothed_output".final.og \
                      -o "$prefix_smoothed_output".final.og.viz_pos_multiqc.png \
                      -x 1500 -y 500 -a 10 -u -d -I $consensus_prefix \
                      2> >(tee -a "$log_file")
    fi

    if [[ ! -s "$prefix_smoothed_output".final.og.viz_depth_multiqc.png || $resume == false ]]; then
      $timer -f "$fmt" odgi viz -i "$prefix_smoothed_output".final.og \
                      -o "$prefix_smoothed_output".final.og.viz_depth_multiqc.png \
                      -x 1500 -y 500 -a 10 -m -I $consensus_prefix \
                      2> >(tee -a "$log_file")
    fi

    if [[ ! -s "$prefix_smoothed_output".final.og.viz_inv_multiqc.png || $resume == false ]]; then
      $timer -f "$fmt" odgi viz -i "$prefix_smoothed_output".final.og \
                      -o "$prefix_smoothed_output".final.og.viz_inv_multiqc.png \
                      -x 1500 -y 500 -a 10 -z -I $consensus_prefix \
                      2> >(tee -a "$log_file")
    fi
fi

if [[ $do_layout == true ]]; then
    # the 2D layout is "smoother" when we chop the nodes of the graph to a fixed maximum length
    #$timer -f "$fmt" odgi chop -i "$prefix_smoothed_output".final.og -c 100 -o ""$prefix_smoothed_output".final.chop.og \
    #    2> >(tee -a "$log_file")

    if [[ ! -s "$prefix_smoothed_output".final.og.lay || $resume == false ]]; then
      # adding `-N g` to this call can help when rendering large, complex graphs that aren't globally linear
      $timer -f "$fmt" odgi layout -i "$prefix_smoothed_output".final.og \
                         -o "$prefix_smoothed_output".final.og.lay \
                         -T "$prefix_smoothed_output".final.og.lay.tsv \
                         -t $threads $odgi_temp_dir -P \
                         2> >(tee -a "$log_file")
    fi

    if [[ ! -s "$prefix_smoothed_output".final.og.lay.draw.png || $resume == false ]]; then
      # this can be configured to draw the graph in different ways, based on the same layout
      # here we draw in default mode
      $timer -f "$fmt" odgi draw -i "$prefix_smoothed_output".final.og \
                       -c "$prefix_smoothed_output".final.og.lay \
                       -p "$prefix_smoothed_output".final.og.lay.draw.png \
                       -H 1000 \
                       2> >(tee -a "$log_file")
    fi

    if [[ ! -s "$prefix_smoothed_output".final.og.lay.draw_multiqc.png || $resume == false ]]; then
      # this attempts to add paths
      $timer -f "$fmt" odgi draw -i "$prefix_smoothed_output".final.og \
                       -c "$prefix_smoothed_output".final.og.lay \
                       -p "$prefix_smoothed_output".final.og.lay.draw_multiqc.png \
                       -C -w 20 \
                       -H 1000 \
                       2> >(tee -a "$log_file")
    fi
fi

if [[ $vcf_spec != false ]]; then
    for s in $( echo "$vcf_spec" | tr ',' ' ' );
    do
        ref=$(echo "$s" | cut -f 1 -d: )
        delim=$(echo "$s" | cut -f 2 -d: )
        pop_length=$(echo "$s" | cut -f 3 -d: )
        if [[ -z $pop_length ]]; then
          pop_length=0
        fi
        vcf="$prefix_smoothed_output".final.$(echo $ref | tr '/|' '_').vcf
        if [[ ! -s $vcf || $resume == false ]]; then
          echo "[vg::deconstruct] making VCF with reference=$ref and delim=$delim"
          ( TEMPDIR=$(pwd) $timer -f "$fmt" vg deconstruct -P "$ref" \
                   -H "$delim" -e -a -t $threads "$prefix_smoothed_output".final.gfa >"$vcf" ) 2> >(tee -a "$log_file")
          bcftools stats "$vcf" > "$vcf".stats
        fi

        if [[ $pop_length -gt 0 ]]; then
          vcf_decomposed="$prefix_smoothed_output".final.$(echo $ref | tr '/|' '_').decomposed.vcf
          if [[ ! -s $vcf_decomposed || $resume == false ]]; then
            echo "[vg::deconstruct] decompose VCF"
            vcf_decomposed_tmp=$vcf_decomposed.tmp.vcf
            bgzip -c -@ 48 "$vcf" > "$vcf".gz
            TEMPDIR=$(pwd) $timer -f "$fmt" vcfbub -l 0 -a $pop_length --input "$vcf".gz | TEMPDIR=$(pwd) $timer -f "$fmt" vcfwave -I 1000 -t $threads > "$vcf_decomposed_tmp"

            #TODO: to remove when vcfwave will be bug-free
            # The TYPE info sometimes is wrong/missing
            # There are variants without the ALT allele
            bcftools annotate -x INFO/TYPE "$vcf_decomposed_tmp"  | awk '$5 != "."' > "$vcf_decomposed"
            rm "$vcf_decomposed_tmp" "$vcf".gz

            bcftools stats "$vcf_decomposed" > "$vcf_decomposed".stats
          fi
        fi
    done
fi

multiqc_out_dir=$(dirname "$input_fasta")

multiqc_config="# Report section config for nice titles and descriptions
custom_data:
  odgi_viz:
    section_name: ODGI 1D visualization
    description: This image shows a 1D rendering of the built pangenome graph. The graph nodes are arranged from left to right, forming the pangenome sequence. The colored bars represent the paths versus the pangenome sequence in a binary matrix. The path names are placed on the left. The black lines under the paths are the links, which represent the graph topology.
  odgi_viz_pos:
    section_name: ODGI 1D visualization by path position
    description: This shows a 1D rendering of the built pangenome graph where the paths are colored according to their nucleotide position. Light grey means a low path position, black is the highest path position.
  odgi_viz_inv:
    section_name: ODGI 1D visualization by path orientation
    description: This image shows a 1D rendering of the built pangenome graph where the paths are colored by orientation. Forward is black, reverse is red.
  odgi_viz_depth:
    section_name: ODGI 1D visualization by node depth
    description: This shows a 1D rendering of the built pangenome graph where the paths are colored according to path depth. Using the Spectra color palette with 4 levels of path depths, white indicates no depth, while grey, red, and yellow indicate depth 1, 2, and greater than or equal to 3, respectively.
  odgi_draw:
    section_name: ODGI 2D drawing
    description: This image shows a 2D rendering of the built pangenome graph.

# Custom search patterns to find the image outputs
sp:
  odgi_draw:
    fn: \"*draw_multiqc.png\"
  odgi_viz:
    fn: \"*viz_multiqc.png\"
  odgi_viz_pos:
    fn: \"*viz_pos_multiqc.png\"
  odgi_viz_inv:
    fn: \"*viz_inv_multiqc.png\"
  odgi_viz_depth:
    fn: \"*viz_depth_multiqc.png\"
  testing_name:
    fn: \"*draw.png\"
ignore_images: false

# Make the custom content stuff come after the ODGI module output
module_order:
  - odgi
  - custom_content

# Set the order that the custom content plots should come in
custom_content:
  order:
    - odgi_viz
    - odgi_viz_pos
    - odgi_viz_inv
    - odgi_viz_depth
    - odgi_draw
fn_clean_exts:
  - \".gfa\""

if [[ $multiqc == true ]]; then
    echo "$multiqc_config" > "$output_dir"/multiqc_config.yaml
    if [[ $output_dir == "" ]]; then
        $timer -f "$fmt" multiqc "$multiqc_out_dir" \
        -s \
        -o "$multiqc_out_dir" \
        -c "$output_dir"/multiqc_config.yaml \
        2> >(tee -a "$log_file")
    else
        $timer -f "$fmt" multiqc "$output_dir" \
        -s \
        -o "$output_dir" \
        -c "$output_dir"/multiqc_config.yaml \
        2> >(tee -a "$log_file")
    fi
fi

if [[ $keep_intermediate_files == true ]]; then
  # `|| true` to avoid `mv` fail if there are missing files to move
  mv -f "$prefix_seqwish".seqwish.{gfa,og} "$output_dir" 2> /dev/null || true
  mv -f "$prefix_seqwish".*.prep.gfa "$output_dir" 2> /dev/null || true
  mv -f "$prefix_smoothed".[0-9]*.{gfa,og} "$output_dir" 2> /dev/null || true
  mv -f "$prefix_smoothed".fix.gfa "$output_dir" 2> /dev/null || true

  if [[ $normalize == true ]]; then
    mv -f "$prefix_smoothed".{gfa,og} "$output_dir" 2> /dev/null || true
  fi

  # Since the files have just been moved
  if [[ "$output_dir" != "" ]]; then
    prefix_seqwish="$output_dir"/$(basename "$prefix_seqwish")
    prefix_smoothed="$output_dir"/$(basename "$prefix_smoothed")
  fi
else
  rm -f "$prefix_seqwish".seqwish.{gfa,og}
  rm -f "$prefix_smoothed".[0-9]*.{gfa,og}

  if [[ $normalize == true ]]; then
    rm -f "$prefix_smoothed".{gfa,og}
    rm -f "$prefix_smoothed".fix.gfa
  fi
fi

if [[ $temp_dir_was_created == true ]]; then
  rm -r $temp_dir
fi

if [[ $compress == true ]]; then
    if [[ $input_paf == false ]]; then
        pigz -f -q -p $threads "$prefix_paf"*.paf -v
    fi
    pigz -f -q -p $threads "$prefix_seqwish"*.{gfa,og} -v
    if [[ $write_maf != false ]]; then
      pigz -f -q -p $threads "$prefix_seqwish"*.maf -v
    fi
    if [[ $vcf_spec != false ]]; then
      ls "$prefix_seqwish"*.vcf | while read f; do bgzip $f -f -@ $threads; done
    fi
    if [[ $do_layout == true ]]; then
        pigz -f -q -p $threads "$prefix_smoothed_output".final.og.lay.tsv -v
    fi
fi
