#! /opt/gensoft/adm/bin/perl
use lib "/opt/gensoft/exe/RepeatModeler/2.0.5/lib/perl5";
use lib "/opt/gensoft/exe/RepeatModeler/2.0.5/lib/perl5";
##---------------------------------------------------------------------------##
##  File:
##      @(#) Refiner
##  Author:
##      Arian Smit <asmit@systemsbiology.org>
##      Robert Hubley <rhubley@systemsbiology.org>
##  Description:
##      Given a set of instances of a particular interspersed
##      repeat develop and refine a consensus model for
##      them.
##
#******************************************************************************
#* Copyright (C) Institute for Systems Biology 2008 Developed by
#* Arian Smit and Robert Hubley.
#*
#* This work is licensed under the Open Source License v2.1.  To view a copy
#* of this license, visit http://www.opensource.org/licenses/osl-2.1.php or
#* see the license.txt file contained in this distribution.
#*
###############################################################################

=head1 NAME

Refiner - Generate and refine a seed alignment given a set of family instances

=head1 SYNOPSIS

  Refiner [-options] <family fasta sequences>

  -threads #     : The maximum number of threads the program may use.

  NOTE: The fasta file should contain only sequence identifiers and sequences.
        The fasta description field is currently reserved for RepeatModeler
        use at this time.

=head1 DESCRIPTION

This tool was developed to overcome the limitation many multiple sequence
aligners when supplied with highly diverged and fragmented sequences. Given
a set of related instances of TE family this tool:

  - Generates an all-vs-all pairwise alignment to identify the sequence
    with the minimal distance (best score) to all other sequences.

  - A transitive multiple sequence alignment is generated from the pairwise
    alignments to this minimally distant copy and a new consensus is drawn.
 
  - If the consensus has changed, the new consensus is realigned to the 
    instances and the process is repeated until the consensus stabilizes.

The options are:

=over 4

=item -h(elp)

Detailed help

=back

=head1 SEE ALSO

=over 4

RepeatModeler, RepeatMasker

=back

=head1 COPYRIGHT

 Copyright 2005-2022 Institute for Systems Biology

=head1 AUTHOR

 Robert Hubley <rhubley@systemsbiology.org>
 Arian Smit <asmit@systemsbiology.org>

=cut

#
# Module Dependence
#
use strict;
use FindBin;
use lib $FindBin::RealBin;
use Getopt::Long;
use POSIX qw(:sys_wait_h);
use File::Copy;
use File::Spec;
use File::Path;
use File::Basename;
use Cwd;
use Pod::Text;
use Data::Dumper;

# RepeatModeler Libraries
use RepModelConfig;
use lib $ENV{REPEATMASKER_LIB};
use MultAln;
use NeedlemanWunschGotohAlgorithm;

# RepeatMasker Libraries
use SearchResult;
use SearchResultCollection;
use WUBlastSearchEngine;
use NCBIBlastSearchEngine;
use SequenceSimilarityMatrix;
use SeqDBI;
use SimpleBatcher;
use FastaDB;

#
# Class Globals & Constants
#
my $CLASS = "Refiner";
my $DEBUG = 0;
$DEBUG = 1 if ( $RepModelConfig::DEBUGALL == 1 );
$|     = 1;                                         # Turn autoflush on

#
# Version
#
my $version = $RepModelConfig::VERSION;

if ( $ARGV[ 0 ] && $ARGV[ 0 ] eq '-v' ) {
  print "Refiner version $version\n";
  exit;
}

#
# Option processing
#  e.g.
#   -t: Single letter binary option
#   -t=s: String parameters
#   -t=i: Number paramters
#
my @opts = ( '-help',     
             '-debug', 
             '-noTmp',  
             '-quiet',
             '-giToID=s', 
             '-name=s', 
             '-threads=i' );

# Add configuration parameters as additional command-line options
push @opts, RepModelConfig::getCommandLineOptions();

#
# Get the supplied command line options, and set flags
#
my %options = ();
unless ( &GetOptions( \%options, @opts ) ) {
  usage();
}

#
# Provide the POD text from this file and
# from the config file by merging them
# together.  The heading "CONFIGURATION
# OVERRIDES" provides the insertion point
# for the configuration POD.
#
sub usage {
  my $p = Pod::Text->new();
  $p->output_fh( *STDOUT );
  my $pod_str;
  open IN, "<$0"
      or die "Could not open self ($0) for generating documentation!";
  while ( <IN> ) {
    if ( /^=head1\s+CONFIGURATION OVERRIDES\s*$/ ) {
      my $c_pod = RepModelConfig::getPOD();
      if ( $c_pod ) {
        $pod_str .= $_ . $c_pod;
      }
    }
    else {
      $pod_str .= $_;
    }
  }
  close IN;
  print "$0 - $version\n";
  $p->parse_string_document( $pod_str );
  exit( 1 );
}

#
# Resolve configuration settings using the following precedence:
# command line first, then environment, followed by config
# file.
#
RepModelConfig::resolveConfiguration( \%options );
my $config           = $RepModelConfig::configuration;
my $NCBIBLASTDB_PRGM = $config->{'RMBLAST_DIR'}->{'value'} . "/makeblastdb";
my $RMBLASTN_PRGM    = $config->{'RMBLAST_DIR'}->{'value'} . "/rmblastn";

# Print the internal POD documentation if something is missing
if ( $#ARGV == -1 || $options{'help'} ) {
  print "No query sequence file indicated\n\n";
  usage();
}

$DEBUG = 1 if ( $options{'debug'} );

#
# If used as part of the RepeatModeler system
# read in the seq IDs from the original database
# so we can place them in the stockholm file.
#
my %genomeDBToSeqID = ();
if ( $options{'giToID'} ) {
  open IN, "<$options{'giToID'}"
      or die "Could not open $options{'giToID'} file for reading!\n";
  while ( <IN> ) {
    if ( /^(\S+)\s+(\d+)/ ) {
      $genomeDBToSeqID{"gi|$2"} = $1;

      # LTR_retriever has an issue with "|" symbols.  For this
      # process the seq identifier will be gi-# instead.
      $genomeDBToSeqID{"gi-$2"} = $1;
    }
  }
  close IN;
}

#
# Setup the search engines
#
my $srchEngAllVsAll;
my $srchEngOneVsAll;
$srchEngAllVsAll =
    NCBIBlastSearchEngine->new( pathToEngine => $RMBLASTN_PRGM );
$srchEngOneVsAll =
    NCBIBlastSearchEngine->new( pathToEngine => $RMBLASTN_PRGM );
if ( not defined $srchEngAllVsAll ) {
  die "Refiner Failed: Cannot execute $RMBLASTN_PRGM please make "
    . "sure you have setup RepeatModeler to use RMBlast by "
    . "running the configure script.\n";
}

my $rmblast_version = $srchEngOneVsAll->getVersion();
my $engineHasQueryThreading = 0;
if ( $rmblast_version =~ /(\d+)\.(\d+)\.(\d+)\+/ ) {
  my $majorVer = $1;
  my $minorVer = $2;
  my $revision = $3;
  if ( $majorVer > 2 || ($majorVer == 2 && $minorVer >= 13 )) {
    $engineHasQueryThreading = 1;
  }
}


#
# Parse filenames
#
foreach my $file ( @ARGV ) {
  if ( $file =~ /\s/ ) {
    die "RepeatModeler can not handle filenames with spaces "
        . "like the file \"$file\"\n";
  }
  elsif ( $file =~ /([\`\!\$\^\&\*\(\)\{\}\[\]\|\\\;\"\'\<\>\?])/ ) {
    die "RepeatModeler can not handle filenames with the special "
        . "character \"$1\" as in the file \"$file\"\n";
  }
}

my @tmpDirPath = ( cwd(), ( File::Spec->splitpath( $ARGV[ 0 ] ) )[ 1 ] );
print "TempDirPath = " . join( ", ", @tmpDirPath ) . "\n" if ( $DEBUG );

my $tmpDir;

#
# Main loop
#
my @TimeBefore = ();
elapsedTime( 0 );
foreach my $file ( @ARGV ) {

  unless ( -r $file ) {
    print "cannot read file $file\n";
    next;
  }

  print "\nanalyzing file $file\n" if ( $#ARGV >= 0 && $DEBUG );
  $tmpDir = dirname( $file );
  unless ( $options{'noTmp'} ) {
    $tmpDir = createTempDir( \@tmpDirPath );
  }
  print "Tmpdir = $tmpDir\n" if ( $DEBUG );

  #
  # Handle one sequence case
  #
  open IN, "<$file" or die "Could not open $file for reading!";
  my $firstSeq;
  my $firstID;
  my $count = 0;
  while ( <IN> ) {
    if ( /^>(\S+)/ ) {
      $count++;
      last if ( $count > 1 );
      $firstID = $1;
      next;
    }
    s/[\n\r\s]+//g;
    $firstSeq .= uc($_);
  }
  close IN;

  # Name the final consensus sequence
  my $id = "family";
  $id = $options{'name'} if ( $options{'name'} );

  my $cons;
  my $maSize;
  my $avgKDiv;
  if ( $count > 1 ) {
    ( $cons, $maSize, $avgKDiv ) =
        &buildConsensus( "/opt/gensoft/exe/RepeatModeler/2.0.5/share/Matrices", 
                         $srchEngAllVsAll, $srchEngOneVsAll, $file, $tmpDir );
  }
  else {
    my $malign = MultAln->new( sequences => [ [ $firstID, $firstSeq ] ] );
    $cons    = $malign->consensus();
    $maSize  = 1;
    $avgKDiv = 0;

    my $db = FastaDB->new( fileName => $file,
                        openMode => SeqDBI::ReadOnly );

    adjustIdentifiers($malign, $db);

    $malign->toSTK( filename => "$file.refiner.stk", id => $id );
  }

  $cons =~ s/-//g;
  open OUTC, ">$file.refiner_cons";

  # Save the consensus to the consensi file.
  print OUTC ">$id ( Final Multiple Alignment Size = " . $maSize
      . " , Avg Kimura = $avgKDiv )\n";
  print OUTC "$cons\n";
  close OUTC;
}

##########################################################################
##########################################################################
##########################################################################
##########################################################################

sub buildConsensus {
  my $matrixDir       = shift;
  my $srchEngAllVsAll = shift();
  my $srchEngOneVsAll = shift();
  my $pathToFastaFile = shift();
  my $wrkDir          = shift();

  # Break down fastaFile path to directory/file
  # components
  my $consName = basename( $pathToFastaFile );
  $consName =~ s/\.fa//g;

  my $cons = "";
  my $db;
  my @multiSeqs               = ();
  my %uniqSeqIDs              = ();
  my $finalMultiAlignmentSize = 0;
  my $avgKDiv                 = 0;
  my $round1ConsLen           = 0;
  my $round1NumUnAlignSeqs    = 0;
  if ( $DEBUG ) {
    print "========================="
        . $consName
        . "=========================\n";
    print " -------------------ROUND 1 ------------------ \n";
  }

  # MAKEBLASTDB the database
  system(   "$NCBIBLASTDB_PRGM -blastdb_version 4 -out $pathToFastaFile "
          . "-dbtype nucl -in $pathToFastaFile >> "
          . "$wrkDir/makeblastdb.log 2>&1" );

  ## Setup the ALL vs ALL parameters 
  $srchEngAllVsAll->setMatrix( "$matrixDir/ncbi/nt/comparison.matrix" );
  $srchEngAllVsAll->setMinScore( 150 );
  $srchEngAllVsAll->setGenerateAlignments( 1 );
  $srchEngAllVsAll->setGapInit( -25 );
  $srchEngAllVsAll->setInsGapExt( -5 );
  $srchEngAllVsAll->setDelGapExt( -5 );
  $srchEngAllVsAll->setMinMatch( 7 );
  $srchEngAllVsAll->setCores( $options{'threads'} ? $options{'threads'} : undef );
  #$srchEngAllVsAll->setBandwidth( -50 );
  $srchEngAllVsAll->setTempDir( dirname( $pathToFastaFile ) );
  $srchEngAllVsAll->setScoreMode( SearchEngineI::complexityAdjustedScoreMode );

  ## Setup the ONE vs ALL parameters 
  $srchEngOneVsAll->setMatrix( "$matrixDir/ncbi/nt/comparison.matrix" );
  $srchEngOneVsAll->setMinScore( 150 );
  $srchEngOneVsAll->setGenerateAlignments( 1 );
  $srchEngOneVsAll->setGapInit( -25 );
  $srchEngOneVsAll->setInsGapExt( -5 );
  $srchEngOneVsAll->setDelGapExt( -5 );
  $srchEngOneVsAll->setMinMatch( 7 );
  $srchEngOneVsAll->setCores( $options{'threads'} ? $options{'threads'} : undef);
  $srchEngOneVsAll->setTempDir( dirname( $pathToFastaFile ) );
  $srchEngOneVsAll->setScoreMode( SearchEngineI::complexityAdjustedScoreMode );
  $srchEngOneVsAll->setMaskLevel( 80 );


  # 1. Search all instances against each other
  $srchEngAllVsAll->setQuery( $pathToFastaFile );
  $srchEngAllVsAll->setSubject( $pathToFastaFile );

  print $CLASS
      . "::buildConsensus(): Running All-vs-All "
      . "on $pathToFastaFile \n"
      if ( $DEBUG );

  my $round = "1";
  print "Params: " . $srchEngAllVsAll->getParameters() . "\n"
            if ( $DEBUG );
  my ( $runStat, $familyCollection ) = $srchEngAllVsAll->search();

  print "Search complete\n" if ( $DEBUG );

  ## Cleanup after run
  # Remove ncbi databases
  system(   "rm $pathToFastaFile.nin "
          . "$pathToFastaFile.nhr $pathToFastaFile.nsq " )
      unless ( $DEBUG );

  print $CLASS
      . "::buildConsensus(): Returned "
      . $familyCollection->size()
      . " hits\n"
      if ( $DEBUG );

  my $malign;
  my $unalignedFastaSeqs = "";

  if ( $runStat ) {
    print STDERR "\nERROR from search engine (", $? >> 8, ") \n";
  }
  else {
    ## Find highest scoring element
    my $MOUT;
    open $MOUT, ">$wrkDir/$consName-cons.html";

    my $refID =
        &findHighestScoringAlignmentSet( searchCollection => $familyCollection );

    ##
    ## Prototype
    ##
    #if ( 1 ) {
    #  print "#\n";
    #  print "# PROTOTYPE: running Tensor Sketch to caculate distances...\n";
    #  print "#\n";
    #  system("$FindBin::RealBin/sketch --tuple_length 3 --embed_dim 16 --window_size 30 --stride 3 --alphabet dna5 -i $pathToFastaFile -o $wrkDir/sketch-dist.tsv >& /dev/null");
    #  open TSS,"<$wrkDir/sketch-dist.tsv" or die "Could not open $wrkDir/sketch-dist.tsv\n";
    #  my %tssSumDistsHash = ();
    #  my @tssSumDistsArray = ();
    #  while ( <TSS> ){
    #    if ( /^(\S+)\s+([\d\.]+)/ ) {
    #      push @tssSumDistsArray, [$1,$2];
    #      $tssSumDistsHash{$1} = $2;
    #    }
    #  }
    #  close TSS;
    #  @tssSumDistsArray = sort { $a->[1] <=> $b->[1] } @tssSumDistsArray;
    #  print "# Ten lowest sum distances according to TSS:\n";
    #  my $inTop = 0;
    #  for ( my $l = 0; $l < 5; $l++ ) {
    #    print "#  " . $tssSumDistsArray[$l]->[0] . "    " . $tssSumDistsArray[$l]->[1] . "\n";
    #    $inTop = 1 if ( $tssSumDistsArray[$l]->[0] eq $refID );
    #  }
    #  print "#\n";
    #  print "# Highest scoring sequence according to blast: $refID\n";
    #  print "#\n";
    #  #print "# Overriding refID with TSS result\n";
    #  #$refID = $tssSumDistsArray[0]->[0];
    #}

    if ( $familyCollection->size() > 0 ) {

      $db = FastaDB->new( fileName => $pathToFastaFile,
                          openMode => SeqDBI::ReadOnly );

      # Grab the reference sequence
      my $refSeq = $db->getSequence( $refID );

      # Develop a pseudo mutiple alignment based on the
      # remaining common sequence alignments.
      $malign = MultAln->new(
                              referenceSeq              => $refSeq,
                              searchCollection          => $familyCollection,
                              searchCollectionReference => MultAln::Query,
                              flankingSequenceDatabase  => $db,
                              maxFlankingSequenceLen    => -1
      );

      # 20190605: We do want to include the reference in round-1 because it's
      # a real input sequence.  Also, matrix was unused here.
      #$cons = $malign->consensus( "$matrixDir/linupmatrix" );
      $cons = $malign->consensus( inclRef => 1 );

      my (
           $leftMostSequence,  $leftMostSequenceID,
           $rightMostSequence, $rightMostSequenceID
          )
          = &getLongestFlankingExtension( multAln => $malign );

      writeHTMLMultAlign(
                          multAln         => $malign,
                          inclRef         => 1,
                          destination     => $MOUT,
                          leftFlankingID  => $leftMostSequenceID,
                          rightFlankingID => $rightMostSequenceID
      );
      $malign->serializeOUT( "$wrkDir/$consName-cons.malign" );
      undef $malign;

      # Print unalignable sequences
      my %seqNames = ();
      my $qryID    = $familyCollection->get( 0 )->getQueryName();
      $seqNames{$qryID} = 1;
      for ( my $j = 0 ; $j < $familyCollection->size() ; $j++ ) {
        my $sbjID = $familyCollection->get( $j )->getSubjName();
        $seqNames{$sbjID} = 1;
      }
      my $unAlignSeqs    = "";
      my $numUnAlignSeqs = 0;
      foreach my $seqID ( $db->getIDs() ) {
        unless ( exists $seqNames{$seqID} ) {
          $unAlignSeqs .=
              "Unaligned ( $seqID ): " . $db->getSequence( $seqID ) . "\n";
          $numUnAlignSeqs++;
        }
      }
      if ( $numUnAlignSeqs > 0 ) {
        print $MOUT "$numUnAlignSeqs sequences could not be aligned to "
            . "this particular reference sequence:\n";
        print $MOUT "<PRE>\n$unAlignSeqs\n</PRE>\n";
      }

      print $CLASS
          . "::buildConsensus(): "
          . $familyCollection->size()
          . " elements left after initial multiple alignment\n"
          if ( $DEBUG );

      ## Build Consensus
      $cons =~ s/-//g;

      $round1ConsLen        = length( $cons );
      $round1NumUnAlignSeqs = $numUnAlignSeqs;

      if ( $DEBUG ) {
        print "  - Unaligned sequences = $numUnAlignSeqs\n";
        print "  - Consensus Length = " . length( $cons ) . "\n";
      }

      ##########################################
      ######### Consensus refinement ###########
      ##########################################
      my $prevCons = "";
      my $newConsBlocks;
      my $finalRound = 0;
      my $sumScore = 0;
      do {
        print $MOUT "\n<br>\n<br><hr>\n<br>\n<br>";
        $round++;
        if ( $DEBUG ) {
          print " -------------------ROUND $round ------------------ \n";
        }
        $prevCons = $cons;

        # Save the consensus to a fasta file
        open CONF, ">$wrkDir/$consName-cons-$round.fa";
        print CONF ">$consName ( "
            . $familyCollection->size()
            . " hsps from initial "
            . "mulitiple alignment)\n";
        my $consFa = $cons;
        $consFa =~ s/-//g;
        print CONF "$consFa\n";
        close CONF;
        $leftMostSequence  = "";
        $rightMostSequence = "";

        # MAKEBLASTDB the database
        #. "-parse_seqids -dbtype nucl -in "
        system(   "$NCBIBLASTDB_PRGM -blastdb_version 4 -out "
                . "$wrkDir/$consName-cons-$round.fa "
                . "-dbtype nucl -in "
                . "$wrkDir/$consName-cons-$round.fa >> "
                . "$wrkDir/makeblastdb.log 2>&1" );

        # Setup the search parameters
        # TODO: Get version of RMBLAST if 2.13+ use mt_mode=1
        if ( $engineHasQueryThreading ) {
          $srchEngOneVsAll->setThreadByQuery(1);
        }
        $srchEngOneVsAll->setSubject( "$wrkDir/$consName-cons-$round.fa" );
        $srchEngOneVsAll->setQuery( $pathToFastaFile );

        print $CLASS
            . "::buildConsensus(): Cons-vs-All "
            . "$consName\n"
            if ( $DEBUG );

        print "Params: " . $srchEngOneVsAll->getParameters() . "\n"
            if ( $DEBUG );
        ( $runStat, $familyCollection ) = $srchEngOneVsAll->search();


        # Remove the ncbi databases
        system( "rm $wrkDir/$consName-cons-$round.fa.n*" ) unless ( $DEBUG );

        print $CLASS
            . "::buildConsensus(): RMBlast returned "
            . $familyCollection->size()
            . " hits\n"
            if ( $DEBUG );

        if ( $runStat ) {
          print STDERR "\nERROR from search engine (", $? >> 8, ") \n";
        }
        else {

          # Create multiple alignment of results
          $malign = MultAln->new(
                                  referenceSeq     => $consFa,
                                  searchCollection => $familyCollection,
                                  searchCollectionReference => MultAln::Subject,
                                  flankingSequenceDatabase  => $db,
                                  maxFlankingSequenceLen    => -1
          );

          # Build consensus
          $cons = $malign->consensus( "$matrixDir/linupmatrix" );
          my $sumDiv = $malign->kimuraDivergence( $cons );
          $avgKDiv = 0;
          if ( $malign->getNumAlignedSeqs() > 0 ) {
            $avgKDiv = $sumDiv / $malign->getNumAlignedSeqs();
          }
          $malign->serializeOUT( "$wrkDir/$consName-malign-$round.ser" )
              if ( $DEBUG );

          # Determine if we are at the end of our loop
          if ( $finalRound == 0 && ( $prevCons eq $cons || $round == 10 ) ) {
            if ( $DEBUG ) {
              print "  - Consensus stable...resolving low quality blocks..\n";
            }
            $newConsBlocks = resolveLowQualityBlocks( multAln => $malign );

            # Patch up the cons
            for ( my $j = 0 ; $j <= $#{$newConsBlocks} ; $j++ ) {
              my $colWidth =
                  $newConsBlocks->[ $j ]->{'end'} -
                  $newConsBlocks->[ $j ]->{'start'} + 1;
              my $colSeq = $newConsBlocks->[ $j ]->{'cons'};
              my $seq = $colSeq . "-" x ( $colWidth - length( $colSeq ) );
              substr( $cons, $newConsBlocks->[ $j ]->{'start'}, length( $seq ) )
                  = $seq;
            }
            writeHTMLMultAlign(
                                multAln         => $malign,
                                destination     => $MOUT,
                                leftFlankingID  => $leftMostSequenceID,
                                rightFlankingID => $rightMostSequenceID,
                                printHistogram  => 1,
                                newConsBlocks   => $newConsBlocks,
                                finalConsensus  => $cons
            );
            $finalRound++;
          }
          else {
            $finalRound++ if ( $finalRound == 1 );
            if ( $finalRound == 2 ) {

              # TODO: Put this back when we work on this again
              #writeHTMLMultAlign(
              #                    multAln         => $malign,
              #                    destination     => $MOUT,
              #                    leftFlankingID  => $leftMostSequenceID,
              #                    rightFlankingID => $rightMostSequenceID,
              #                    printDupDelCols => 1
              #);
              writeHTMLMultAlign(
                                  multAln         => $malign,
                                  destination     => $MOUT,
                                  leftFlankingID  => $leftMostSequenceID,
                                  rightFlankingID => $rightMostSequenceID,
              );
            }
            else {
              writeHTMLMultAlign(
                                  multAln         => $malign,
                                  destination     => $MOUT,
                                  leftFlankingID  => $leftMostSequenceID,
                                  rightFlankingID => $rightMostSequenceID
              );
            }

            #$malign->_getEndStartPairs() if ( $finalRound == 2 );
          }

          # Determine which sequences were not aligned.
          my %seqNames = ();
          my $sbjID    = $familyCollection->get( 0 )->getSubjName();
          $seqNames{$sbjID} = 1;
          $sumScore = 0;
          for ( my $j = 0 ; $j < $familyCollection->size() ; $j++ ) {

            # Testing
            #print "OUT: "
            #    . $familyCollection->get( $j )
            #    ->toStringFormatted( SearchResult::AlignWithQuerySeq ) . "\n";
            my $qryID = $familyCollection->get( $j )->getQueryName();
            $sumScore += $familyCollection->get( $j )->getScore();
            $seqNames{$qryID} = 1;
          }
          $unAlignSeqs        = "";
          $unalignedFastaSeqs = "";
          $numUnAlignSeqs     = 0;
          foreach my $seqID ( $db->getIDs() ) {
            unless ( exists $seqNames{$seqID} ) {
              $unalignedFastaSeqs .=
                  ">$seqID\n" . $db->getSequence( $seqID ) . "\n";
              $unAlignSeqs .=
                  "Unaligned ( $seqID ): " . $db->getSequence( $seqID ) . "\n";
              $numUnAlignSeqs++;
            }
          }
          if ( $numUnAlignSeqs > 0 ) {
            print $MOUT "$numUnAlignSeqs sequences could not be aligned to "
                . "this reference sequence:\n";
            print $MOUT "<PRE>\n$unAlignSeqs\n</PRE>\n";
          }

          # Find out how many uniq sequences made it into the
          # final multiple alignment.
          %uniqSeqIDs = ();
          foreach my $seqNum ( 0 .. $malign->getNumAlignedSeqs() - 1 ) {
           $uniqSeqIDs{ $malign->getAlignedName( $seqNum ) } = {
                               consStart => $malign->getAlignedStart( $seqNum ),
                               consEnd   => $malign->getAlignedEnd( $seqNum )
            };
          }

          # TODO: Get alignment position ( ie. consensus pos ) so we can
          #       store this in with the final instances.
          $finalMultiAlignmentSize = keys( %uniqSeqIDs );
        }

        #close $MOUT;
      } until ( $finalRound == 2 );

      my $gaplessCons = $cons;
      $gaplessCons =~ s/-//g;
      #print "  - sumScore = $sumScore\n";
      unless ( $options{'quiet'} ) {
        print "  - numRounds = $round\n";
        print "  - sumScore = $sumScore\n";
        print "  - Consensus Length = "
            . length( $gaplessCons )
            . " ( orig = $round1ConsLen )\n";
        print "  - Avg Kimura Divergence = "
            . sprintf( "%0.2f", $avgKDiv ) . "\n";
        print "  - Unaligned sequences = $numUnAlignSeqs ( orig = $round1NumUnAlignSeqs )\n";
      }
    }
    else {
      if ( $DEBUG ) {
        print $CLASS
          . "::buildConsensus(): $pathToFastaFile ( $consName ) "
          . "didn't have any hsps.\n";
      }else{
        print "  Refiner: Family $consName didn't produce any hsps.\n";
      }
      exit;
    }
  }    # if Run stat from first pass

  ## Testing: Saving out unaligned fasta for experimentation
  #open OUT, ">unaligned.fa"
  #    or die "Refiner Failed: Could not open unaligned\.fa file!\n";
  #print OUT $unalignedFastaSeqs;
  #close OUT;

  adjustIdentifiers($malign, $db);

  #$malign->toSTK( filename => "$pathToFastaFile.refiner.stk", includeTemplate => 1, nuclRF => 1 );
  $malign->toSTK( filename => "$pathToFastaFile.refiner.stk" );

  print "  Build Consensus: " . elapsedTime( 0 ) . "\n"
      unless ( $options{'quiet'} );
  undef @multiSeqs;
  return $cons, $finalMultiAlignmentSize, $avgKDiv;

}

sub adjustIdentifiers {
  my $malign = shift;
  my $db = shift;

  # Adjust identifiers in MultAln
  my %onlyUniq;
  # 01/31/22: RMH - This "delete in forward iteration" error causes
  #                 some sequences to retain the 'gi|#' identifier
  #                 rather than being correctly translated back to
  #                 genomic identifiers.
  #for ( my $i = 0 ; $i < $malign->getNumAlignedSeqs() ; $i++ ) {
  for ( my $i = $malign->getNumAlignedSeqs()-1; $i >= 0 ; $i-- ) {
    my $id     = $malign->getAlignedName( $i );
    my $dbDesc = $db->getDescription( $id );
    my $refOrient = $malign->getAlignedOrientation( $i );

    # If we are provided a translation system -- do the translation
    #  e.g hg38:chr1:300-400
    #    or
    #      chr1:300-400
    #      gi|1:300-400
    #      gi-1:300-400
    #      gi-1:300..400
    #
    my $assembly;
    my $seqID;
    my $start;
    my $end;
    if ( $dbDesc =~ /^\s*(\S+:)?(\S+):(\d+)-(\d+)\s*.*$/ ) {
      $assembly = $1;
      $seqID    = $2;
      $start    = $3;
      $end      = $4;

      # LTR Retriever prints out stuff like this:
      #    gi-1:11224114..11230723_INT#LTR/Gypsy
    }
    elsif ( $id =~ /^\s*(\S+):(\d+)\.\.(\d+)/ ) {
      $seqID = $1;
      $start = $2;
      $end   = $3;
    }else {
      # We don't have a recognizable identifier
      $seqID = $id;
    }

    if ( $seqID && $start && $end ) {
      if ( $options{'giToID'}
           && exists $genomeDBToSeqID{$seqID} )
      {
        $seqID = $genomeDBToSeqID{$seqID};
      }
      $malign->setAlignedName( $i, $seqID );
      my $adjStart;
      my $adjEnd;

      #if ( $end < $start ) {
      #  $malign->setAlignedOrientation( $i, "-" );
      #  $adjStart = $malign->getAlignedSeqStart( $i ) + $end - 1;
      #  $adjEnd   = $malign->getAlignedSeqEnd( $i ) + $end - 1;
      #}
      #else {
      #  $adjStart = $malign->getAlignedSeqStart( $i ) + $start - 1;
      #  $adjEnd   = $malign->getAlignedSeqEnd( $i ) + $start - 1;
      #}
      # 2/1/22: RMH - Fixed a bug with coordinate translation.  The
      #               coordinates stored in the Family-#.fa file
      #               are one-based, fully-closed.  They are also
      #               stored in reversed order if the sequence
      #               is reverse complemented.  In the above code
      #               we added the coordinates to the end position
      #               rather than subtracting them from the start!
      if ( $end < $start ) {
        if ( $refOrient eq "C" || $refOrient eq "-" ) {
          $malign->setAlignedOrientation( $i, "+" );
        }else{
          $malign->setAlignedOrientation( $i, "-" );
        }
        # AlignedStart is always lower than AlignedEnd in the MALIGN obj
        $adjEnd = $start - $malign->getAlignedSeqStart( $i ) + 1;
        $adjStart   = $start - $malign->getAlignedSeqEnd( $i ) + 1;
      }
      else { 

       if ( $refOrient eq "C" || $refOrient eq "-" ) {
          $malign->setAlignedOrientation( $i, "-" );
        }else{
          $malign->setAlignedOrientation( $i, "+" );
        }
 
        $adjStart = $start + $malign->getAlignedSeqStart( $i ) - 1;
        $adjEnd   = $start + $malign->getAlignedSeqEnd( $i ) - 1;
      }
      $malign->setAlignedSeqStart( $i, $adjStart );
      $malign->setAlignedSeqEnd( $i, $adjEnd );
    }
    elsif ( $dbDesc ) {
      warn "Could not parse identifier: $dbDesc\n";
    }
    my $finalID = "$seqID:" . $malign->getAlignedSeqStart( $i ) . "-" . $malign->getAlignedSeqEnd( $i );
    if ( exists $onlyUniq{$finalID}  ) {
      # TODO: make this an function of MultAln
      splice(@{ $malign->{'alignCol'} }, $i+1, 1);
    }
    $onlyUniq{$finalID} = 1;
  }
}

##############################################################################
##############################################################################

##-------------------------------------------------------------------------##
##
##  Use: my = writeHTMLMultAlign( multAln => $multAlignRef,
##                                    [destination => $filename|$FH],
##                                    [leftFlankingID => 1],
##                                    [rightFlankingID => 1] );
##
##
##
##-------------------------------------------------------------------------##
sub writeHTMLMultAlign {
  my %parameters = @_;

  my $method = "writeHTMLMultAlign";
  croak $CLASS. "::$method() missing multAln parameter!\n"
      if ( !exists $parameters{'multAln'} );

  my $mAlign = $parameters{'multAln'};

  my $inclRef = 0;
  if ( defined $parameters{'inclRef'} ) {
    $inclRef = 1;
  }

  my $OUT = *STDOUT;
  if ( defined $parameters{'destination'} ) {
    if ( ref( $parameters{'destination'} ) !~ /GLOB|FileHandle/ ) {
      print $CLASS
          . "::$method() Opening file "
          . $parameters{'destination'} . "\n"
          if ( $DEBUG );
      open $OUT, $parameters{'destination'}
          or die $CLASS
          . "::$method: Unable to open "
          . "results file: $parameters{'destination'} : $!";
    }
    else {
      $OUT = $parameters{'destination'};
    }
  }

  print $OUT <<"END";
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
  <title>Alignments</title>
  <style type="text/css">
  font.lowQual {
    background: #CD5C5C;
  }
  font.deletion {
    background: #FF0000;
  }
  font.duplication {
    background: #0000FF;
  }
  font.unknown {
    background: #FFFF00;
  }
  font.dupFlank {
    background: #C0C0C0;
  }
  </style>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
</head>
END

  # Find max padding.
  my $maxLeftLen  = 0;
  my $maxRightLen = 0;
  my $maxQueryEnd = length( $mAlign->getReferenceSeq() );
  foreach my $seqNum ( 0 .. $mAlign->getNumAlignedSeqs() - 1 ) {
    my $relLeftLen =
        length( $mAlign->getLeftFlankingSequence( $seqNum ) ) -
        $mAlign->getAlignedStart( $seqNum );
    my $relRightLen =
        length( $mAlign->getRightFlankingSequence( $seqNum ) ) -
        ( $maxQueryEnd - $mAlign->getAlignedEnd( $seqNum ) );
    $maxLeftLen  = $relLeftLen  if ( $maxLeftLen < $relLeftLen );
    $maxRightLen = $relRightLen if ( $maxRightLen < $relRightLen );
  }

  ## Calculate Del/Dup candidates
  my @dupDelCols = ();
  if ( $parameters{'printDupDelCols'} == 1 ) {
    my $endStartPairs = $mAlign->_getEndStartPairs();
    foreach my $pair ( @{$endStartPairs} ) {
      if ( abs( $pair->{'refEnd'} - $pair->{'refStart'} ) < 50 ) {
        my $adjStart = 0;
        my $adjEnd   = 0;
        my $absStart = $pair->{'refEnd'};
        my $absEnd   = $pair->{'refStart'};
        if ( $pair->{'refEnd'} > $pair->{'refStart'} ) {
          $absStart = $pair->{'refStart'};
          $absEnd   = $pair->{'refEnd'};
        }
        $adjStart = $mAlign->getAlignPosFromBPPos( $absStart );
        $adjEnd   = $mAlign->getAlignPosFromBPPos( $absEnd );
        print " adjStart = $adjStart adjEnd=$adjEnd\n" if ( $DEBUG );
        if ( $pair->{'avgGapWidth'} > 30 ) {
          print "Calling a deletion\n";

          # Deletion
          push @dupDelCols,
              {
                'start' => $adjStart,
                'end'   => $adjEnd,
                'type'  => "deletion"
              };

        }
        elsif ( $pair->{'avgGapWidth'} < 0 ) {

          # Duplication
          print "Calling a duplication\n";
          my $flnkStart =
              $mAlign->getAlignPosFromBPPos(
                                    $absStart - abs( $pair->{'avgGapWidth'} ) );
          my $flnkEnd = $mAlign->getAlignPosFromBPPos( $absStart - 1 );
          push @dupDelCols,
              {
                'start' => $flnkStart,
                'end'   => $flnkEnd,
                'type'  => 'dupFlank'
              };
          push @dupDelCols,
              {
                'start' => $adjStart,
                'end'   => $adjEnd,
                'type'  => "duplication"
              };
          $flnkStart = $mAlign->getAlignPosFromBPPos( $absEnd + 1 );
          $flnkEnd   =
              $mAlign->getAlignPosFromBPPos(
                                      $absEnd + abs( $pair->{'avgGapWidth'} ) );
          push @dupDelCols,
              {
                'start' => $flnkStart,
                'end'   => $flnkEnd,
                'type'  => 'dupFlank'
              };
        }
        else {

          # Unknown
          print "Calling an Unknown\n";
          push @dupDelCols,
              {
                'start' => $adjStart,
                'end'   => $adjEnd,
                'type'  => "unknown"
              };
        }
      }
    }
  }

  ## Calculate low scoring columns
  my $matrix = SequenceSimilarityMatrix->new();
  $matrix->parseFromFile(
                    "/opt/gensoft/exe/RepeatModeler/2.0.5/share/Matrices/wublast/nt/comparison.matrix" );
  my $columns;
  my $scoreArray;
  if ( defined $parameters{'threshold'} ) {
    ( $columns, $scoreArray ) = $mAlign->getLowScoringAlignmentColumns(
                                           matrix    => $matrix,
                                           threshold => $parameters{'threshold'}
    );
  }
  else {
    ( $columns, $scoreArray ) =
        $mAlign->getLowScoringAlignmentColumns( matrix => $matrix );
  }

  #print "Low scoring columns:\n";
  #foreach my $col ( @{$columns} ) {
  #  print "Col  start = " . $col->[0] . " end = " . $col->[1] . "\n";
  #}

  print $OUT "<PRE>\n";

  # Print out scoreArray just for fun
  #   find the largest/smallest score
  my $max = 0;
  for ( my $j = 0 ; $j <= $#{$scoreArray} ; $j++ ) {
    my $num = sprintf( "%0.1f", $scoreArray->[ $j ] );
    $max = length( $num ) if ( $max < length( $num ) );
    $scoreArray->[ $j ] = $num;
  }
  my $numCols = $max;
  my @lines   = ();
  foreach my $num ( @{$scoreArray} ) {
    my $paddedNum = ' ' x ( $numCols );
    if ( $num != 0 ) {
      $paddedNum = ' ' x ( $numCols - length( $num ) ) . $num;
    }
    for ( my $j = 0 ; $j < $numCols ; $j++ ) {
      $lines[ $j ] .= substr( $paddedNum, $j, 1 );
    }
  }
  my $label = "lowQualScore";
  my $paddedLabel = $label . " " x ( 30 - length( $label ) );
  foreach my $line ( @lines ) {
    print $OUT $paddedLabel . ": " . ' ' x ( $maxLeftLen ) . $line . "\n";
  }

  # First print the consensus sequence
  my $lineStart = 0;
  my $name      = "consensus";
  my $namePad   = 30 - length( $name );
  my $seq;

# 20190605: Allow user to specify inclusion of the reference in consensus calling
#    $mAlign->consensus(
#                  "/opt/gensoft/exe/RepeatModeler/2.0.5/share/Matrices/linupmatrix" );
  if ( $inclRef ) {
    $seq = $mAlign->consensus( inclRef => 1 );
  }
  else {
    $seq = $mAlign->consensus();
  }
  print $OUT "<b><i>$name</i></b>"
      . ' ' x $namePad . ": "
      . ' ' x ( $maxLeftLen )
      . "<font color=\"blue\">$seq</font>\n";

  my $name    = "Reference ( " . $mAlign->getReferenceName() . " )";
  my $namePad = 30 - length( $name );
  my $seq     = $mAlign->getReferenceSeq();
  print $OUT "<b><i>$name</i></b>"
      . ' ' x $namePad . ": "
      . ' ' x ( $maxLeftLen )
      . "<font color=\"blue\">$seq</font>\n";

  # Now print the reference and the instances.
  for ( my $i = 0 ; $i < $mAlign->getNumAlignedSeqs() ; $i++ ) {
    $name = $mAlign->getAlignedName( $i );
    if ( length( $name ) > 30 ) {
      $name = substr( $name, 0, 30 );
    }
    $namePad = 30 - length( $name );

    my $lfSeq = $mAlign->getLeftFlankingSequence( $i );

    my $seq .=
        ' ' x
        ( $maxLeftLen - ( length( $lfSeq ) - $mAlign->getAlignedStart( $i ) ) );
    if ( $parameters{'leftFlankingID'} eq $i - 1 ) {
      $seq .= "<font color=\"blue\">" . lc( $lfSeq ) . "</font>";
    }
    else {
      $seq .= lc( $lfSeq );
    }

    # Highlight low scoring columns
    if ( $parameters{'printDupDelCols'} == 1 ) {
      $seq .= "<b>";
      my $seqPos = 0;
      foreach my $col ( @dupDelCols ) {
        my $start = $col->{'start'} - $mAlign->getAlignedStart( $i );
        my $end   = $col->{'end'} - $mAlign->getAlignedStart( $i );
        next if ( $start < 0 && $end < 0 );
        $start = 0 if ( $start < 0 );
        $end = length( $mAlign->getAlignedSeq( $i ) ) - $start - 1
            if ( $end < 0 );
        $seq .=
            substr( $mAlign->getAlignedSeq( $i ), $seqPos, $start - $seqPos );
        $seq .= "<font class=\"" . $col->{'type'} . "\">";
        $seq .=
            substr( $mAlign->getAlignedSeq( $i ), $start, $end - $start + 1 );
        $seq .= "</font>";
        $seqPos = $end + 1;
      }
      if ( $seqPos < length( $mAlign->getAlignedSeq( $i ) ) - 1 ) {
        $seq .= substr( $mAlign->getAlignedSeq( $i ), $seqPos );
      }
      $seq .= "</b>";
    }
    elsif ( $#{$columns} >= 0 ) {
      $seq .= "<b>";
      my $seqPos = 0;
      foreach my $col ( @{$columns} ) {
        my $start = $col->[ 0 ] - $mAlign->getAlignedStart( $i );
        my $end   = $col->[ 1 ] - $mAlign->getAlignedStart( $i );
        next if ( $start < 0 && $end < 0 );
        $start = 0 if ( $start < 0 );
        $end = length( $mAlign->getAlignedSeq( $i ) ) - $start - 1
            if ( $end < 0 );
        $seq .=
            substr( $mAlign->getAlignedSeq( $i ), $seqPos, $start - $seqPos );
        $seq .= "<font class=\"lowQual\">";
        $seq .=
            substr( $mAlign->getAlignedSeq( $i ), $start, $end - $start + 1 );
        $seq .= "</font>";
        $seqPos = $end + 1;
      }
      if ( $seqPos < length( $mAlign->getAlignedSeq( $i ) ) - 1 ) {
        $seq .= substr( $mAlign->getAlignedSeq( $i ), $seqPos );
      }
      $seq .= "</b>";
    }
    else {
      $seq .= "<b>" . $mAlign->getAlignedSeq( $i ) . "</b>";
    }

    if ( $parameters{'rightFlankingID'} eq $i - 1 ) {
      $seq .=
            "<font color=\"blue\">"
          . lc( $mAlign->getRightFlankingSequence( $i ) )
          . "</font>";
    }
    else {
      $seq .= lc( $mAlign->getRightFlankingSequence( $i ) );
    }

    print $OUT "<b>$name</b>" . ' ' x $namePad . ": $seq\n";
  }

  if ( defined $parameters{'printHistogram'} ) {
    print $OUT "\n\n";
    my @columnSeqs = ();
    my $maxRows    = 0;
    foreach my $col ( @{$columns} ) {
      my ( $cons, $seqsRef ) = $mAlign->getAlignmentBlock(
                                                           start => $col->[ 0 ],
                                                           end   => $col->[ 1 ],
                                                           rawSequences => 1
      );
      push @columnSeqs, [ sort { length( $b ) <=> length( $a ) } @{$seqsRef} ];
      $maxRows = $#{$seqsRef} + 1 if ( $maxRows < ( $#{$seqsRef} + 1 ) );
    }
    $label = "blockSeqs";
    $paddedLabel = $label . " " x ( 30 - length( $label ) );
    for ( my $i = 0 ; $i < $maxRows ; $i++ ) {
      my $seq = " " x ( $maxLeftLen );
      my $pos = 0;
      for ( my $j = 0 ; $j <= $#{$columns} ; $j++ ) {
        my $col      = $columns->[ $j ];
        my $colWidth = $col->[ 1 ] - $col->[ 0 ] + 1;
        $seq .= " " x ( $col->[ 0 ] - $pos );
        my $colSeqArray = $columnSeqs[ $j ];
        my $colSeq      = "";
        if ( $#{$colSeqArray} >= 0 ) {
          $colSeq = shift @{$colSeqArray};
          $colSeq = "." if ( $colSeq eq "" );
        }
        $seq .= $colSeq . " " x ( $colWidth - length( $colSeq ) );
        $pos = $col->[ 1 ] + 1;
      }
      print $OUT "$paddedLabel: $seq\n";
    }
    print $OUT "\n\n";
    if ( defined $parameters{'newConsBlocks'} ) {
      my $newConsBlocks = $parameters{'newConsBlocks'};
      $label = "blockSeqCons";
      $paddedLabel = $label . " " x ( 30 - length( $label ) );
      my $seq = " " x ( $maxLeftLen );
      my $pos = 0;
      for ( my $j = 0 ; $j <= $#{$newConsBlocks} ; $j++ ) {
        my $colWidth =
            $newConsBlocks->[ $j ]->{'end'} -
            $newConsBlocks->[ $j ]->{'start'} + 1;
        $seq .= " " x ( $newConsBlocks->[ $j ]->{'start'} - $pos );
        my $colSeq = $newConsBlocks->[ $j ]->{'cons'};
        $seq .=
              "<font color=\"blue\">" . $colSeq
            . "</font>"
            . "*" x ( $colWidth - length( $colSeq ) );
        $pos = $newConsBlocks->[ $j ]->{'end'} + 1;
      }
      print $OUT "$paddedLabel: $seq\n";
    }
    if ( defined $parameters{'finalConsensus'} ) {
      $label = "originalCons";
      $paddedLabel = $label . " " x ( 30 - length( $label ) );
      my $seq =
            " " x ( $maxLeftLen )
          . "<font color=\"red\">"
          . $mAlign->consensus( "/opt/gensoft/exe/RepeatModeler/2.0.5/share/Matrices/linupmatrix" )
          . "</font>";
      print $OUT "$paddedLabel: $seq\n";
      $label = "finalCons";
      $paddedLabel = $label . " " x ( 30 - length( $label ) );
      my $seq =
            " " x ( $maxLeftLen )
          . "<font color=\"blue\">"
          . $parameters{'finalConsensus'}
          . "</font>";
      print $OUT "$paddedLabel: $seq\n";
    }
  }

  print $OUT "</PRE>\n";

}

## TODO: Should we consider the reference
##       when looking for the longest flanking extension?
sub getLongestFlankingExtension {
  my %parameters = @_;

  my $mAlign = $parameters{'multAln'};

  my $maxLeftFlankingSeq  = "";
  my $maxLeftID           = -1;
  my $maxRightFlankingSeq = "";
  my $maxRightEnd         = length( $mAlign->getReferenceSeq() );
  my $maxRightID          = -1;
  foreach my $seqNum ( 0 .. $mAlign->getNumAlignedSeqs() - 1 ) {
    if ( $mAlign->getAlignedStart( $seqNum ) == 0
         && length( $mAlign->getLeftFlankingSequence( $seqNum ) ) >
         length( $maxLeftFlankingSeq ) )
    {
      $maxLeftFlankingSeq = $mAlign->getLeftFlankingSequence( $seqNum );
      $maxLeftID          = $seqNum;
    }
    if ( $mAlign->getAlignedEnd( $seqNum ) == $maxRightEnd
         && length( $mAlign->getRightFlankingSequence( $seqNum ) ) >
         length( $maxRightFlankingSeq ) )
    {
      $maxRightFlankingSeq = $mAlign->getRightFlankingSequence( $seqNum );
      $maxRightID          = $seqNum;
    }
  }
  ## TODO: The IDs have been adjusted to reflect the number in the Search
  ##       result collection....should adjust this back after we move
  ##       this routine into an object.
  print $CLASS
      . "::getLongestFlankingExtension: Returned ($maxLeftFlankingSeq," . " "
      . ( $maxLeftID - 1 )
      . ", $maxRightFlankingSeq, "
      . ( $maxRightID - 1 ) . "\n"
      if ( $DEBUG );

  return (
           $maxLeftFlankingSeq,  $maxLeftID - 1,
           $maxRightFlankingSeq, $maxRightID - 1
  );
}

##-------------------------------------------------------------------------##
## Use
##-------------------------------------------------------------------------##
sub resolveLowQualityBlocks {
  my %parameters = @_;

  croak $CLASS. "::resolveLowQualityBlocks: multAln parameter is missing!\n"
      if ( !defined $parameters{'multAln'} );
  my $mAlign = $parameters{'multAln'};

  my $matrix = SequenceSimilarityMatrix->new();
  $matrix->parseFromFile(
                    "/opt/gensoft/exe/RepeatModeler/2.0.5/share/Matrices/wublast/nt/comparison.matrix" );
  my $lupMatrix = SequenceSimilarityMatrix->new();
  $lupMatrix->parseFromFile( "/opt/gensoft/exe/RepeatModeler/2.0.5/share/Matrices/linupmatrix" );

  ## TODO: pass in penalties
  my ( $columns, $scoreArray ) =
      $mAlign->getLowScoringAlignmentColumns( matrix => $matrix );

  my @columnCons = ();
  foreach my $col ( @{$columns} ) {
    my $blockWidth = $col->[ 1 ] - $col->[ 0 ] + 1;

    if ( $blockWidth > 1 && $blockWidth <= 50 ) {
      my ( $refSeq, $instSeqs ) = $mAlign->getAlignmentBlock(
                                                           start => $col->[ 0 ],
                                                           end   => $col->[ 1 ],
                                                           rawSequences => 1
      );

      print "Considering column with cons: $refSeq\n" if ( $DEBUG );

      # Only consider blocks with 3 or more sequences
      if ( ( $#{$instSeqs} + 1 ) >= 4 ) {

        # Determine the most frequent length sequence
        my %seqLengthHisto = ();
        foreach my $seq ( @{$instSeqs} ) {
          print "  S: " . $seq . "\n" if ( $DEBUG );
          $seqLengthHisto{ length( $seq ) }++;
        }
	# RMH: 9/19/23 This wasn't deterministic.  If two lengths
	#      had equal counts it randomly chose one or the other.
	#      Now it always chooses the longest one first.
	my @keysByLenCountThenLen = 
            sort { $seqLengthHisto{$b} <=> $seqLengthHisto{$a} ||
	           $b <=> $a }
            keys( %seqLengthHisto );
        my $mostFreqLen   = shift @keysByLenCountThenLen;
        my $mostFreqCount = $seqLengthHisto{$mostFreqLen};

        # Check to see if it's the same length as the reference/consensus
print("mostFreqLen = $mostFreqLen, refSeq = $refSeq len=" . length($refSeq) . "\n") if ( $DEBUG );
        if ( $mostFreqLen != length( $refSeq ) ) {

          print "  - Block mostfreqLen = $mostFreqLen occurs in "
              . "$mostFreqCount out of "
              . ( $#{$instSeqs} + 1 )
              . " sequences.\n"
              if ( $DEBUG );
          if (    $mostFreqCount > ( .5 * ( $#{$instSeqs} + 1 ) )
               && $mostFreqCount >= 3 )
          {

            # There is a much better choice here.
            my @newConsSeqs = ();
            foreach my $seq ( @{$instSeqs} ) {
              if ( length( $seq ) == $mostFreqLen ) {
                push @newConsSeqs, $seq;
              }
            }

          # create a new consensus
          #my $newCons = MultAln::buildConsensusFromArray(
          #                                          matrix    => $lupMatrix,
          #                                          sequences => \@newConsSeqs
          #);
          # Lineupmatrix is now the hardcoded default.  No need to load it here.
            my $newCons =
                MultAln::buildConsensusFromArray( sequences => \@newConsSeqs );
            print "  -- Made a call! newCons = $newCons\n" if ( $DEBUG );
            push @columnCons,
                {
                  'start' => $col->[ 0 ],
                  'end'   => $col->[ 1 ],
                  'cons'  => $newCons
                };

          }
          else {
            print "  -- Aligning block....\n" if ( $DEBUG );
            ## Perform an all vs all of the sequences
            my %seqScore = ();
            my @results  = ();
            for ( my $i = 0 ; $i <= $#{$instSeqs} ; $i++ ) {
              for ( my $j = 0 ; $j <= $#{$instSeqs} ; $j++ ) {
                next if ( $j == $i );
                my $searchResult = NeedlemanWunschGotohAlgorithm::search(
                  querySeq   => $instSeqs->[ $i ],
                  subjectSeq => $instSeqs->[ $j ],

                  #matrixFile =>
                  #    "/opt/gensoft/exe/RepeatModeler/2.0.5/share/Matrices/linupmatrix",
                  matrix         => $lupMatrix,
                  insOpenPenalty => -25,
                  insExtPenalty  => -5,
                  delOpenPenalty => -25,
                  delExtPenalty  => -5
                );

                $seqScore{$i} += $searchResult->getScore();
                push @{ $results[ $i ] }, $searchResult;
              }
            }

	    # RMH: 9/19/23 Added secondary sort to ensure deterministic behaviour
            my @instSeqSortedByScore = sort { $seqScore{$b} <=> $seqScore{$a} ||
	                                      $a cmp $b }
                keys( %seqScore );
            my $maxScoreQueryIdx = shift @instSeqSortedByScore;
            my $maxScore         = $seqScore{$maxScoreQueryIdx};

            # For now just print out the alignments
            print "---------------Alignments for col-----------------\n"
                if ( $DEBUG );
            my $src = SearchResultCollection->new();
            print "Score = $maxScore\n" if ( $DEBUG );
            foreach my $align ( @{ $results[ $maxScoreQueryIdx ] } ) {
              print ""
                  . $align->toStringFormatted( SearchResult::AlignWithQuerySeq )
                  . "\n"
                  if ( $DEBUG );
              $src->add( $align );
            }

            my $ma = MultAln->new( searchCollection          => $src,
                                   searchCollectionReference => MultAln::Query
            );

            #my $newCons =
            #    $ma->consensus(
            #        "/opt/gensoft/exe/RepeatModeler/2.0.5/share/Matrices/linupmatrix" );
            my $newCons = $ma->consensus();

            print "The new block consensus = $newCons\n" if ( $DEBUG );
            print "--------------------------------------------------\n"
                if ( $DEBUG );

            $newCons =~ s/-//g;

            push @columnCons,
                {
                  'start' => $col->[ 0 ],
                  'end'   => $col->[ 1 ],
                  'cons'  => $newCons
                };
            undef $ma;
            undef $src;

          }
        }
        else {

          #print " -- Same length as consensus!\n";
        }
      }
    }
    elsif ( $blockWidth > 100 && $DEBUG ) {
      warn $CLASS
          . "::resolveLowQualityBlocks(): Skipping low quality block "
          . "because it is too big to align with perl. blockWidth=$blockWidth\n";
    }
  }

  return ( \@columnCons );

}

##-------------------------------------------------------------------------##
## Use: findHighestScoringAlignmentSet(
##                                      useSubjAsRef => scalar,
##                                      searchCollection => ref,
##                                    );
##
##  Returns
##
##    Given a set of All-vs-All alignments find the set of hits against
##    one sequence which scores the highest.  In this case score is the
##    sum of all the individual alignment scores.  Prior to suming the
##    scores alignments are trimmed if they overlap an existing alignment
##    by more than 20%, are on the same strand and have a lower score.
##
##    Modifies searchCollection.
##
##-------------------------------------------------------------------------##
sub findHighestScoringAlignmentSet {
  my %parameters = @_;

  # Which sequence (Query/Subject) represents the reference
  # sequence and which one represents the instance sequences.
  my $refStart      = "getQueryStart";
  my $refEnd        = "getQueryEnd";
  my $refName       = "getQueryName";
  my $refRemaining  = "getQueryRemaining";
  my $instStart     = "getSubjStart";
  my $instEnd       = "getSubjEnd";
  my $instName      = "getSubjName";
  my $instRemaining = "getSubjRemaining";
  if ( defined $parameters{'useSubjectAsRef'} ) {
    $refStart      = "getSubjStart";
    $refEnd        = "getSubjEnd";
    $refName       = "getSubjName";
    $refRemaining  = "getSubjRemaining";
    $instStart     = "getQueryStart";
    $instEnd       = "getQueryEnd";
    $instName      = "getQueryName";
    $instRemaining = "getQueryRemaining";
  }
  print "findHighestScoringAlignmentSet:\n" if ( $DEBUG );

  my $searchCollection = $parameters{'searchCollection'};
  my $highestScoringElement;

  # Deal with multiple instances.  Only keep ones which can
  # be seen as part of a global alignment
  my %elements   = ();
  my %deleteHash = ();
  my %dbl_mask_level = ();

  for ( my $l = $searchCollection->size() - 1 ; $l >= 0 ; $l-- ) {
    my $resultRef = $searchCollection->get( $l );

    my $refID  = $resultRef->$refName();
    my $instID = $resultRef->$instName();

    if ( $refID eq $instID ) {
      $deleteHash{$l} = 1;
      next;
    }

    push @{ $dbl_mask_level{$refID}->{$instID} },
        {
          'ref_start' => $resultRef->$refStart(),
          'ref_end'   => $resultRef->$refEnd(),
          'inst_start' => $resultRef->$instStart(),
          'inst_end'   => $resultRef->$instEnd(),
          'score' => $resultRef->getScore(),
          'index' => $l
        };

    if ( $resultRef->getOrientation() eq "C" ) {
      push @{ $elements{$refID}->{'reverse'}->{$instID} },
          {
            'start' => $resultRef->$refStart(),
            'end'   => $resultRef->$refEnd(),
            'score' => $resultRef->getScore(),
            'index' => $l
          };
    }
    else {
      push @{ $elements{$refID}->{'forward'}->{$instID} },
          {
            'start' => $resultRef->$refStart(),
            'end'   => $resultRef->$refEnd(),
            'score' => $resultRef->getScore(),
            'index' => $l
          };
    }
  }

  my $maskLevel = 80;

  # Single masklevel
  if ( 1 ) {
    foreach my $seqID ( keys( %elements ) ) {
  
      print "Looking for overlaps in set $seqID\n" if ( $DEBUG );
  
      # Only consider elements with more than one match
      foreach my $strand ( 'forward', 'reverse' ) {
        my $strandRec = $elements{$seqID}->{$strand};
        foreach my $instanceName ( keys( %{$strandRec} ) ) {
          if ( $#{ $strandRec->{$instanceName} } > 0 ) {
  
            # Sort by score
	    # RMH: 9/19/23
	    #   - Added secondary sorts start, and length to 
	    #     make sure this deterministic.
            my @sortedResults =
                sort { $b->{'score'} <=> $a->{'score'} ||
		       $a->{'start'} <=> $b->{'start'} ||
	               ($b->{'end'} - $b->{'start'}) <=> ($a->{'end'} - $a->{'start'}) }
                @{ $strandRec->{$instanceName} };
  
            print "     - $instanceName appears $#sortedResults times.\n" if ( $DEBUG );
            for ( my $i = 0 ; $i <= $#sortedResults ; $i++ ) {
              for ( my $j = $i + 1 ; $j <= $#sortedResults ; $j++ ) {
                my $overlap = 0;
                my $perc    = 0;
                my $result1 = $sortedResults[ $i ];
                my $result2 = $sortedResults[ $j ];
  
                # Get members
                my $begin1 = $result1->{'start'};
                my $begin2 = $result2->{'start'};
                my $end1   = $result1->{'end'};
                my $end2   = $result2->{'end'};
  
                # Check if they overlap
                next if ( $begin2 > $end1 || $begin1 > $end2 );
  
                print "OVERLAP: $begin1-$end1 and $begin2-$end2\n" if ( $DEBUG );
  
                # Calc overlap
                $overlap = $begin1 - $begin2 if ( $begin2 < $begin1 );
                $overlap += $end2 - $end1 if ( $end2 > $end1 );
                $perc = ( $overlap / ( $end2 - $begin2 + 1 ) ) * 100
                    if ( $overlap );
                if     ( $perc < ( 100 - $maskLevel ) ) {
  
                  print "Scheduling deletion for " . $result2->{'index'} . "\n"
                      if ( $DEBUG );
                  $deleteHash{ $result2->{'index'} } = 1;
                }
              }
            }
          }
        }
      }
    }
    undef %elements;
  }else {
    # Experimental double masklevel
    foreach my $seqID ( keys( %dbl_mask_level ) ) {
      print "Looking for overlaps in set $seqID\n" if ( 1 || $DEBUG );
        my $strandRec = $dbl_mask_level{$seqID};
        foreach my $instanceName ( keys( %{$strandRec} ) ) {
          if ( $#{ $strandRec->{$instanceName} } > 0 ) {
            # Sort by score
	    # RMH: 9/19/23
	    #   - Added secondary sorts start, and length to 
	    #     make sure this deterministic.
            my @sortedResults =
                sort { $b->{'score'} <=> $a->{'score'} ||
		       $a->{'start'} <=> $b->{'start'} ||
	               ($b->{'end'} - $b->{'start'}) <=> ($a->{'end'} - $a->{'start'}) }
                @{ $strandRec->{$instanceName} };
  
            print "    - $instanceName appears " . ($#sortedResults+1) . " times.\n" if ( $DEBUG );
            for ( my $i = 0 ; $i <= $#sortedResults ; $i++ ) {
              for ( my $j = $i + 1 ; $j <= $#sortedResults ; $j++ ) {
                my $result1 = $sortedResults[ $i ];
                my $result2 = $sortedResults[ $j ];
  
                # Get members
                my $rbegin1 = $result1->{'ref_start'};
                my $rbegin2 = $result2->{'ref_start'};
                my $rend1   = $result1->{'ref_end'};
                my $rend2   = $result2->{'ref_end'};
                my $ibegin1 = $result1->{'inst_start'};
                my $ibegin2 = $result2->{'inst_start'};
                my $iend1   = $result1->{'inst_end'};
                my $iend2   = $result2->{'inst_end'};
  
                print "      o  $rbegin1-$rend1 and $rbegin2-$rend2, $ibegin1-$iend1 and $ibegin2-$iend2\n" if ( $DEBUG );
  
                # Calc overlap
                my $roverlap = 0;
                unless ( $rbegin1 > $rend2 || $rend1 < $rbegin2 ) {
                  $roverlap = min($rend1, $rend2) - max($rbegin1, $rbegin2) + 1;
                }
                my $rperc = 0;
                $rperc = ( $roverlap / ( $rend2 - $rbegin2 + 1 ) ) * 100
                    if ( $roverlap );
  
                my $ioverlap = 0;
                unless ( $ibegin1 > $iend2 || $iend1 < $ibegin2 ) {
                  $ioverlap = min($iend1, $iend2) - max($ibegin1, $ibegin2) + 1;
                }
                my $iperc = 0;
                $iperc = ( $ioverlap / ( $iend2 - $ibegin2 + 1 ) ) * 100
                    if ( $ioverlap );
  
                print "        rperc = $rperc / $roverlap, iperc = $iperc\n" if ( $DEBUG );
  
                if     ( $rperc > $maskLevel || $iperc > $maskLevel ) {
                  print "        Scheduling deletion for " . $result2->{'index'} . "\n"
                      if ( $DEBUG );
                  $deleteHash{ $result2->{'index'} } = 1;
                }
              }
            }
          }
        }
      }
    undef %dbl_mask_level;
  }

  # Remove all hits which were filtered above
  if ( keys( %deleteHash ) ) {
    foreach my $index ( sort { $b <=> $a } keys( %deleteHash ) ) {
      $searchCollection->remove( $index );
    }
  }

  my %results = ();
  for ( my $l = 0 ; $l < $searchCollection->size() ; $l++ ) {
    my $resultRef = $searchCollection->get( $l );

    next if ( $resultRef->$instName() eq $resultRef->$refName() );

    print "Considering a hit: "
        . $resultRef->$refName() . " to "
        . $resultRef->$instName() . "\n"
        if ( $DEBUG );

    my $refID = $resultRef->$refName();
    $elements{$refID} += $resultRef->getScore();

    # DISABLE
    if ( $DEBUG ) {
      push @{ $results{ $resultRef->$refName() } }, $resultRef;
    }
  }

  if ( keys %elements ) { 
    # RMH: 9/19/23 added secondary sort after total score use
    #      the ID itself.
    my @sortedElementKeys =
        sort { $elements{$b} <=> $elements{$a} ||
	       $a cmp $b }
	   keys( %elements );

    if ( $DEBUG ) {
      foreach my $key ( @sortedElementKeys ) {
        print "Results for $key ( score = " . $elements{$key} . " )\n";
        foreach my $result ( @{ $results{$key} } ) {
          print "" . $result->toStringFormatted( SearchResult::NoAlign ) . "";
        }
      }
    }

    $highestScoringElement = shift @sortedElementKeys;
    print "Highest scoring element vs family is $highestScoringElement with "
        . "a score of "
        . $elements{$highestScoringElement} . "\n"
        if ( $DEBUG );

    # Keep only the alignments against the highest scoring
    # element.
    for ( my $l = $searchCollection->size() - 1 ; $l >= 0 ; $l-- ) {
      my $resultRef = $searchCollection->get( $l );
      my $refID     = $resultRef->$refName();
      my $instID    = $resultRef->$instName();
      if (
           $refID ne $highestScoringElement
           || (    $refID eq $highestScoringElement
                && $instID eq $highestScoringElement )
          )
      {
        $searchCollection->remove( $l );
      }
    }
  }
  else {
    $searchCollection->clear();
  }

  return ( $highestScoringElement );
}

##-------------------------------------------------------------------------##
## Use:  my ( $tempDir ) = &createTempDir( \@tmpPath );
##
##  Returns
##
##-------------------------------------------------------------------------##
sub createTempDir {
  my $tmpPathRef = shift;

  ## Get date
  my $date = localtime( time() );

  # Windows does not support the use of ":" in a filename.
  $date =~ s/[ ,\t,\n:]//g;

  my $runnumber = "$$" . ".$date";
  my $tempDir   = "";
  foreach my $directory ( @{$tmpPathRef} ) {

    if ( $directory =~ /\/$/ ) {
      $tempDir = $directory . "RM_$runnumber";
    }
    else {
      $tempDir = $directory . "/RM_$runnumber";
    }

    if ( -d "$tempDir" || mkdir $tempDir, 0777 ) {
      if ( open( IN, ">$tempDir/deleteMe" ) ) {
        close IN;
        unlink( "$tempDir/deleteMe" );
        last;
      }
    }
    $tempDir = "";
  }
  return ( $tempDir );
}

##-------------------------------------------------------------------------##
## Use: my $string = elapsedTime( $index );
##
##   Returns
##
##      Great little utility for measuring the elapsed
##      time between one or more lines of perl code.
##
##      --- Depends on a global variable $TimeBefore
##-------------------------------------------------------------------------##
sub elapsedTime {
  my ( $TimeHistIdx ) = @_;
  if ( defined $TimeBefore[ $TimeHistIdx ] ) {
    my $DiffTime = time - $TimeBefore[ $TimeHistIdx ];
    $TimeBefore[ $TimeHistIdx ] = time;
    my $Min = int( $DiffTime / 60 );
    $DiffTime -= $Min * 60;
    my $Hours = int( $Min / 60 );
    $Min -= $Hours * 60;
    my $Sec = $DiffTime;
    return "$Hours:$Min:$Sec Elapsed Time";
  }
  else {
    $TimeBefore[ $TimeHistIdx ] = time;
    return 0;
  }
}

sub max {
  my $a = shift;
  my $b = shift;
  if ( $a >= $b ) {
    return $a; 
  }else {
    return $b;
  }
}

sub min {
  my $a = shift;
  my $b = shift;
  if ( $a <= $b ) {
    return $a; 
  }else {
    return $b;
  }
}


1;

