#! /opt/gensoft/adm/bin/perl
use lib "/opt/gensoft/exe/RepeatModeler/2.0.5/lib/perl5";
use lib "/opt/gensoft/exe/RepeatModeler/2.0.5/lib/perl5";
##---------------------------------------------------------------------------##
##  File:
##      @(#) Linup
##  Author:
##      Robert M. Hubley   rhubley@systemsbiology.org
##  Description:
##
#******************************************************************************
#*  This software is provided ``AS IS'' and any express or implied            *
#*  warranties, including, but not limited to, the implied warranties of      *
#*  merchantability and fitness for a particular purpose, are disclaimed.     *
#*  In no event shall the authors or the Institute for Systems Biology        *
#*  liable for any direct, indirect, incidental, special, exemplary, or       *
#*  consequential damages (including, but not limited to, procurement of      *
#*  substitute goods or services; loss of use, data, or profits; or           *
#*  business interruption) however caused and on any theory of liability,     *
#*  whether in contract, strict liability, or tort (including negligence      *
#*  or otherwise) arising in any way out of the use of this software, even    *
#*  if advised of the possibility of such damage.                             *
#*                                                                            *
#******************************************************************************
#
# ChangeLog
#
#     $Log: Linup,v $
#     Revision 1.12  2017/04/05 00:03:32  rhubley
#     Cleanup before a distribution
#
#
###############################################################################
#
# To Do:
#

=head1 NAME

Linup - Display or convert MSA into various formats

=head1 SYNOPSIS

  Linup [-version] [-i] [-matrix <matrix_file>
                          [-cgParam #] [-taParam # ] [-cgTransParam #]
                        ]
                        [[[-trimLeft #] [-trimRight #] [-trimAmbig]] | -revcomp] 
                        [-name familyname]
                        [-normalizeCoord] [-showScore] 
                        [-includeFlanking # -genome <2bit_file>]
                        [-malignOut <file>]
                        [-stockholm | -msf | 
                         -consensus | -msa
                         -fasta | -stats | -align]
        <crossmatch file> | <stockholm file> | <msa fasta> | <malign file>


=head1 DESCRIPTION

Output Formats:
  MSF - GCG Wisconsin Package Format
  Stockholm - Stockholm format used by Dfam/Pfam/Rfam
  MSA - Aligned sequences stored in FASTA/A2M format
  FASTA - All sequences without alignment characters

The options are:

=over 4

=item -version

Displays the version of the program

=item -i

Include reference sequence in new consensus calculation.

=item -stockholm

Write out multiple alignment in Stockholm format.

=item -msf

Write out the multiple aligment in MSF format.

=item -msa

Write out the multiple alignment in MSA ( FASTA/A2M ) format.

=item -fasta

Write out the sequences in FASTA format.  NOTE: This
removes the gap characters and only exports the raw
sequences.

=item -consensus

Calculate the consensus and output in FASTA format excluding
the gap characters.

=item -showScore

Include score in the default Linup output format.

=item -revcomp

Reverse complement the multiple alignment.  Currently this
may not be used at the same time as -trimLeft/-trimRight.

=item -genome <2bit file>

This parameter is used in conjunction with the -msa and 
-includeFlanking parameters.  The 2bit file should contain
the sequences (and flanking sequence) that were used to
generate the alignments.  

=item -includeFlanking #

Include up to #bp of flanking sequence when using the -msa output
format and only if the -genome parameter is specified.  The same 
size string is appended/prepended to each aligned sequence 
regardless of available flanking bases using the "-" to pad out 
any differences.  The amount of available sequence is dependent 
on what is in the -genome file.

=item -malignOut <file>

Serialize the state of the MultAln object to file.  May be read
back in as input at a later time or used to by utilities that 
support MultAln.

=item -trimLeft # | -trimRight #

Trim the left or right side of the MSA by provided number of 
consensus bases ( not alignment columns ).  

=back

=head1 SEE ALSO

=head1 COPYRIGHT

Copyright 2012-2021 Robert Hubley, Institute for Systems Biology

=head1 AUTHOR

Robert Hubley <rhubley@systemsbiology.org>
Arian Smit <asmit@systemsbiology.org>

=cut

#
# Module Dependence
#
use strict;
use FindBin;
use Getopt::Long;
use Data::Dumper;
use Cwd;
use File::Spec;
use File::Basename;
use File::Temp qw/ tempfile tempdir /;

# RepeatModeler Libraries
use lib $FindBin::RealBin;
use lib "$FindBin::RealBin/..";
use RepModelConfig;
use lib $RepModelConfig::configuration->{'REPEATMASKER_DIR'}->{'value'};
use MultAln;
use SeedAlignment;

# RepeatMasker Libraries
use RepeatUtil;
use SearchResult;
use SearchResultCollection;
use WUBlastSearchEngine;
use NCBIBlastSearchEngine;
use CrossmatchSearchEngine;
use FastaDB;


my $Version    = $RepModelConfig::VERSION;
#my $ucscToolsDir = "/usr/local/bin";
my $ucscToolsDir =  $RepModelConfig::configuration->{'UCSCTOOLS_DIR'}->{'value'};

#
# Magic numbers/constants here
#  ie. my $PI = 3.14159;
#
my $DEBUG = 0;
$DEBUG = 1 if ( $RepModelConfig::DEBUG == 1 );

#
# Option processing
#  e.g.
#   -t: Single letter binary option
#   -t=s: String parameters
#   -t=i: Number paramters
#
my @getopt_args = (
                    '-version',          # print out the version and exit
                    '-i',
                    '-align',
                    '-stockholm',
                    '-showScore',
                    '-malignOut=s',
                    '-msf',
                    '-fasta',
                    '-msa',
                    '-consensus',
                    '-consIncludeGaps',
                    '-consNoHeader',
                    '-name=s',
                    '-trimLeft=s',
                    '-trimRight=s',
                    '-trimAmbig',
                    '-normalizeCoord',
                    '-matrix=s',
                    '-revcomp',
                    '-stats',
                    '-genome=s',
                    '-includeFlanking=i',
                    '-noTemplate',
                    '-cgParam=s',
                    '-taParam=s',
                    '-gesp',
                    '-cgTransParam=s',
);

my %options = ();
Getopt::Long::config( "noignorecase", "bundling_override" );
unless ( GetOptions( \%options, @getopt_args ) )
{
  usage();
}

sub usage
{
  print "$0 - $Version\n";
  exec "pod2text $0";
  exit;
}

if ( $options{'version'} )
{
  print "$Version\n";
  exit;
}

my $inputFile = $ARGV[ 0 ];
if ( !-s $inputFile )
{
  print "\nCannot locate file!: $inputFile\n\n";
  usage();
}
my $inputFileDir = cwd;
my($filename, $dirs, $suffix) = fileparse($inputFile);
$inputFileDir = $dirs;


my $inclRef = 0;
$inclRef = 1 if ( $options{'i'} );

my $inclTempl = 1;
$inclTempl = 0 if ( $options{'noTemplate'} );

my $revComp = 0;
$revComp = 1 if ( $options{'revcomp'} );
if ( $revComp && ( $options{'trimLeft'} || $options{'trimRight'} )){
  print "\n\nCannot use -revcomp and -trimLeft/-trimRight at the same time\n\n";
  usage();
}
if ( $revComp && $options{'includeFlanking'} ){
  print "\n\nCannot use -revcomp and -includeFlanking at the same time\n\n";
  usage();
}

my $matrixFile;
my $cgParam;
my $taParam;
my $cgTransParam;
if ( $options{'matrix'} )
{
  $matrixFile = $options{'matrix'};
  if ( !exists $options{'cgParam'} )
  {
    print "\nMissing cgParam parameter.  Must be specified\n"
        . "if the matrix parameter is used.\n";
    usage();
  }
  $cgParam = $options{'cgParam'};
  if ( !exists $options{'taParam'} )
  {
    print "\nMissing taParam parameter.  Must be specified\n"
        . "if the matrix parameter is used.\n";
    usage();
  }
  $taParam = $options{'taParam'};
  if ( !exists $options{'cgTransParam'} )
  {
    print "\nMissing cgTransParam parameter.  Must be specified\n"
        . "if the matrix parameter is used.\n";
    usage();
  }
  $cgTransParam = $options{'cgTransParam'};
}

if ( $options{'includeFlanking'} && ! ( $options{'genome'} && $options{'msa'} ) ) {
  print "\nThe includeFlanking option is only valid with the -genome and -msa output format!\n";
  usage();
}

my ($mAlign, $fileType) = RepeatUtil::openAsMultAln($inputFile);

my $trimLeft = 0;
$trimLeft = $options{'trimLeft'} if ( defined $options{'trimLeft'} );
my $trimRight = 0;
$trimRight = $options{'trimRight'} if ( defined $options{'trimRight'} );

if ( $trimLeft || $trimRight )
{
  print STDERR "Trimming alignment: left = $trimLeft, right = $trimRight\n";
  $mAlign->trimAlignments( left => $trimLeft, right => $trimRight );
}
if ( exists $options{'trimAmbig'} ) {
  my ($ambigLeft, $ambigRight) = $mAlign->trimAlignments( left => -1, right => -1 );
  if ( $ambigLeft > 0 || $ambigRight > 0 ) {
    print STDERR "Ambiguous edges trimmed: left = $ambigLeft, right = $ambigRight\n";
  }
}
if ( defined $options{'normalizeCoord'} ) {
    $mAlign->normalizeSeqRefs();
}

if ( $revComp ) {
  $mAlign->reverseComplement();
}

if ( $options{'gesp'} ){
  my $garr = $mAlign->_getEndStartPairs();
  print "GetEndStartPairs: " . Dumper($garr) . "\n";
  exit;
}

my $cons;
if ( $matrixFile eq "" )
{
  $cons = $mAlign->consensus( inclRef => $inclRef );
} else
{
  # TODO: Finish implementing
  # open up matrix file and create object
}

my ( $null, $totDiv, $avgDiv ) = $mAlign->kimuraDivergence( $cons );

if ( $options{'stockholm'} )
{
  if ( $options{'name'} ) { 
    $mAlign->setReferenceName( $options{'name'} );
  }
  if ( $fileType eq "stockholm" ) {
    # NOTE: This doesn't provide a way to force the use of a template at this time
    open my $IN, "<$inputFile" or die "Could not open $inputFile for reading";
    my $seedAlign = SeedAlignment->new();
    $seedAlign->read_stockholm( $IN );
    close $IN;
    my $header = $seedAlign->toString(SeedAlignment::ExportHeadersOnly);
    $mAlign->toSTK( headerOverride => $header, consRF => 1 );
  }else {
    $mAlign->toSTK( includeTemplate => $inclTempl, consRF => 1 );
  }

  # TODO : streamline STK generation.  Perhaps have
  #        MultAln produce a "SeedAlignment" object.
  #        Then this object can be manipulated and
  #        exported as STK.
  #my $seedAlign = SeedAlignment->new();
  #$seedAlign->read_stockholm( $IN );
 
} elsif ( $options{'msf'} )
{
  $mAlign->toMSF( includeReference => 1 );
} elsif ( $options{'align'} ) {
  $mAlign->toAlign();
} elsif ( $options{'fasta'} )
{
  $mAlign->toFASTA( seqOnly => 1 );
} elsif ( $options{'consensus'} ) {
  if ( !$options{'consIncludeGaps'} ) {
    $cons =~ s/-//g;
  }
  if ( $options{'consNoHeader'} ) {
    print "$cons\n";
  } else {
    if ( $options{'name'} ) {
      print ">" . $options{'name'} . "\n$cons\n\n";
    }elsif ( $mAlign->getReferenceName() ne "" ) {
      print ">" . $mAlign->getReferenceName() . "\n$cons\n\n";
    }else {
      print ">consensus\n$cons\n\n";
    }
  }
}elsif ( $options{'msa'} ) {
  if ( exists $options{'includeFlanking'} && exists $options{'genome'} && $options{'includeFlanking'} > 0 ) 
  {
    my $seqLens = &getSequenceLengths($options{'genome'});
    &generateMSAWithFlanking($mAlign, $options{'genome'}, $seqLens, $options{'includeFlanking'}, $options{'includeFlanking'});
    #$mAlign->toFASTA( includeFlanking => $options{'includeFlanking'}, includeReference => $inclRef, includeConsensus => 1 );
  }else {
    $mAlign->toFASTA();
  }
}elsif ( $options{'stats'} ) {
  print "Alignment Stats:\n";
  print "\tReference Sequence\t\t\t\t\t\t\t\tConsensus Sequence\n";
  print "seq_id\tTranS\tTranSMod\tTranV\tKimura\tKimuraMod\tCpGSites\tWellCharBases\t\tTranS\tTranSmod\tTranV\tKimura\tKimuraMod\tCpGSites\tWellCharBases\n";
  my $count = 0;
  my $sum_kimura = 0;
  my $sum_kimuraMod = 0;
  my $sum_ckimura = 0;
  my $sum_ckimuraMod = 0;
  for ( my $i = 0 ; $i < $mAlign->getNumAlignedSeqs() ; $i++ ) {
    my $start = $mAlign->getAlignedStart( $i );
    my $end   = $mAlign->getAlignedEnd( $i );
    my $name = $mAlign->getAlignedName( $i );
    my $kdOrig = $mAlign->getAlignedDiv( $i );
 
    my ( $trans, $transv, $transMod, $kimura, $kimuraMod, $cpgSites, $wcb ) =
                                            $mAlign->kimuraDivergenceAlt( seqidx => $i );
    $sum_kimura += $kimura;
    $sum_kimuraMod += $kimuraMod;
    $kimura = sprintf("%0.2f",$kimura);
    $kimuraMod = sprintf("%0.2f",$kimuraMod);
    my ( $ctrans, $ctransv, $ctransMod, $ckimura, $ckimuraMod, $ccpgSites, $cwcb ) =
                                            $mAlign->kimuraDivergenceAlt( seqidx => $i, consensus => $cons );
    $sum_ckimura += $ckimura;
    $sum_ckimuraMod += $ckimuraMod;
    $ckimura = sprintf("%0.2f",$ckimura);
    $ckimuraMod = sprintf("%0.2f",$ckimuraMod);

   print "$name\t$trans\t$transMod\t$transv\t$kimura\t$kimuraMod\t$cpgSites\t$wcb\t\t";
   print "$ctrans\t$ctransv\t$ctransMod\t$ckimura\t$ckimuraMod\t$ccpgSites\t$cwcb\n";
   $count++;
  }
  print "\n\n";
  print "Total sequences: $count\n";
  print "Relative to Reference Sequence (if available):\n";
  print "\tKimura Divergence:\t" . sprintf("%0.2f", $sum_kimura / $count) . "\n";
  print "\tKimura Divergence (CpG Mod):\t" . sprintf("%0.2f", $sum_kimuraMod / $count) . "\n\n";
  print "Relative to Consensus Sequence:\n";
  print "\tKimura Divergence:\t" . sprintf("%0.2f", $sum_ckimura / $count) . "\n";
  print "\tKimura Divergence (CpG Mod):\t" . sprintf("%0.2f", $sum_ckimuraMod / $count) . "\n\n";
}else
{
  $mAlign->printAlignments(
                            blockSize => 100,
                            showCons  => 1,
                            inclRef   => $inclRef,
                            showScore => $options{'showScore'}
  );
  print "Avg Kimura Div: $avgDiv\n";
  $cons =~ s/\-//g;
  print "Cons length: " . length($cons) . "\n";
  if ( $options{'name'} ) {
    print "\n\n>" . $options{'name'} . "\n$cons\n\n";
  }else {
    print "\n\n>" . $mAlign->getReferenceName() . "\n$cons\n\n";
  }

}
#print stderr "Avg Kimura Div: $avgDiv\n";

if ( $options{'malignOut'} ) {
  $mAlign->serializeOUT( $options{'malignOut'} );
}


################################### SUBROUTINES ###################################


sub getSequenceLengths{
  my $twoBitFile = shift;

  my %seqLens = ();
  open IN, "$ucscToolsDir/twoBitInfo $twoBitFile stdout|"
      or die "Could not run $ucscToolsDir/twoBitInfo on $twoBitFile!\n";
  my $lines = 0;
  while ( <IN> )
  {
    $lines++;
    if ( /^(\S+)\s+(\d+)/ )
    {
      $seqLens{$1} = $2;
    }
  }
  close IN;
  return \%seqLens;
}

sub generateMSAWithFlanking{
  my $mAlign = shift;
  my $twoBitFile = shift;
  my $seqLens = shift;
  my $flankleft = shift;
  my $flankright = shift;

  my $DEBUG      = 0;

  my @ranges = ();
  my @order  = ();
  my ( $tmpFH, $tmpFilename ) =
      tempfile( UNLINK => 0, SUFFIX => ".seqlist", DIR => "." );
  my $maxAlignedLen = 0;
  for ( my $i = 0 ; $i < $mAlign->getNumAlignedSeqs() ; $i++ )
  {
    my $seq         = $mAlign->getAlignedSeq( $i );
    my $raw_id      = $mAlign->getAlignedName( $i );
    my $align_start = $mAlign->getAlignedSeqStart( $i );
    my $align_end   = $mAlign->getAlignedSeqEnd( $i );
    my $orient      = $mAlign->getAlignedOrientation( $i );
    my $seqOffset   = $mAlign->getAlignedStart( $i );
    if ( length( $seq ) + $seqOffset > $maxAlignedLen ) {
      $maxAlignedLen = length( $seq ) + $seqOffset;
    }
    my $align_remain = 0;
  
    my $id = "";
    my $start;
    my $end;
  
    # These have traditionally been in 1-based full-closed coordinates
    if ( $raw_id =~ /^(\S+)\_(\d+)\_(\d+)\_?([R\+\-]?)$/ )
    {
      $id    = $1;
      $start = $2;
      $end   = $3;
    } elsif ( $raw_id =~ /^(\S+):(\d+)-(\d+)\_?([R\+\-]?)$/ )
    {
      $id    = $1;
      $start = $2;
      $end   = $3;
    } else
    {
      die "I don't know how to parse this id: $raw_id\n";
    }
    $ranges[ $i ] = { 'id' => $id, 'seq' => $seq };
  
    if ( !exists $seqLens->{$id} )
    {
      warn "Could not find $id in 2bit file!\n";
    }
    my $slen = $seqLens->{$id};
  
    # Does the current sequence orientation still make sense or has
    # it reverted back to the forward strand?
    if (    $raw_id =~ /_R/ || $raw_id =~ /_-$/ 
         || $orient eq "-" && !( $raw_id =~ /_R/ && $orient eq "-" ) )
    {
  
      # Reversed orientation ( overall )
      # 1-based, fully closed
      $ranges[ $i ]->{'orient'} = "-";
      my $leftStart = $end - $align_start;
      my $leftEnd   = $leftStart - 1;
      if ( $leftEnd + $flankleft > $slen )
      {
        $leftEnd = $slen;
      } else
      {
        $leftEnd += $flankleft;
      }
      if ( $leftEnd - $leftStart > 0 )
      {
        $ranges[ $i ]->{'leftStart'} = $leftStart;
        $ranges[ $i ]->{'leftEnd'}   = $leftEnd;
        push @order, [ "L", $i ];
  
        #push @ranges, [ $i, $id, "L", $leftStart, $leftEnd, "-" ];
        print $tmpFH "$id:" . ( $leftStart - 1 ) . "-" . $leftEnd . "\n";
      }
  
      my $rightEnd   = $end - $align_end + 2;
      my $rightStart = $rightEnd + 1;
      if ( $rightStart - $flankright < 1 )
      {
        $rightStart = 1;
      } else
      {
        $rightStart -= $flankright;
      }
      if ( $rightEnd - $rightStart > 0 )
      {
        $ranges[ $i ]->{'rightStart'} = $rightStart;
        $ranges[ $i ]->{'rightEnd'}   = $rightEnd;
        push @order, [ "R", $i ];
        print $tmpFH "$id:" . ( $rightStart - 1 ) . "-" . $rightEnd . "\n";
      }
    } else
    {
      # Normal left/right
      $ranges[ $i ]->{'orient'} = "+";
      my $leftEnd = $start + $align_start -
          2;    # -2 = shift to left of alignment and correct for 1-based math.
      my $leftStart = $leftEnd + 1;
      if ( $leftStart - $flankleft < 1 )
      {
        $leftStart = 1;
      } else
      {
        $leftStart -= $flankleft;
      }
      if ( $leftEnd - $leftStart > 0 )
      {
        $ranges[ $i ]->{'leftStart'} = $leftStart;
        $ranges[ $i ]->{'leftEnd'}   = $leftEnd;
        push @order, [ "L", $i ];
        print $tmpFH "$id:" . ( $leftStart - 1 ) . "-" . $leftEnd . "\n";
      }
  
      my $rightStart = $end + $align_start;
      my $rightEnd   = $rightStart - 1;
      if ( $rightEnd + $flankright > $slen )
      {
        $rightEnd = $slen;
      } else
      {
        $rightEnd += $flankright;
      }
      if ( $rightEnd - $rightStart > 0 )
      {
        $ranges[ $i ]->{'rightStart'} = $rightStart;
        $ranges[ $i ]->{'rightEnd'}   = $rightEnd;
        push @order, [ "R", $i ];
        print $tmpFH "$id:" . ( $rightStart - 1 ) . "-" . $rightEnd . "\n";
      }
    }
  }
  close $tmpFH;
  
  # Generate sequences
  my $cmd = "$ucscToolsDir/twoBitToFa -seqList=$tmpFilename $twoBitFile stdout";
  open IN, "$cmd|" or die "Could not run $cmd!\n";
  my $idx = 0;
  my $seq = "";
  my $id;
  my $start;
  my $end;
  
  while ( <IN> )
  {
    if (    />(\S+)\:(\d+)-(\d+)/
         || />(\S+)/ )
    {
      my $tmp_id    = $1;
      my $tmp_start = $2;
      my $tmp_end   = $3;
      $tmp_start = 0 if ( !defined $tmp_start );
      $tmp_end = $seqLens->{$tmp_id} if ( !defined $tmp_end );
      if ( $seq )
      {
        my $leftOrRight = $order[ $idx ][ 0 ];
        my $rangeIdx    = $order[ $idx ][ 1 ];
        my $outID       = $id . "_" . ( $start + 1 ) . "_$end";
        if ( $ranges[ $rangeIdx ]->{'orient'} eq "-" )
        {
          $seq = reverse( $seq );
          $seq =~ tr/ACGTYRMKHBVD/TGCARYKMDVBH/;
          $outID .= "_R";
        }
        if ( $leftOrRight eq "R" )
        {
          $ranges[ $rangeIdx ]->{'rightFlank'} = $seq;
        } else
        {
          $ranges[ $rangeIdx ]->{'leftFlank'} = $seq;
        }
        $idx++;
      }
      $id    = $tmp_id;
      $start = $tmp_start;
      $end   = $tmp_end;
      $seq   = "";
      next;
    }
    s/[\n\r\s]+//g;
    $seq .= uc( $_ );
  }
  if ( $seq )
  {
    my $leftOrRight = $order[ $idx ][ 0 ];
    my $rangeIdx    = $order[ $idx ][ 1 ];
    my $outID       = $id . "_" . ( $start + 1 ) . "_$end";
    if ( $ranges[ $rangeIdx ]->{'orient'} eq "-" )
    {
      $seq = reverse( $seq );
      $seq =~ tr/ACGTYRMKHBVD/TGCARYKMDVBH/;
      $outID .= "_R";
    }
    if ( $leftOrRight eq "R" )
    {
      $ranges[ $rangeIdx ]->{'rightFlank'} = $seq;
    } else
    {
      $ranges[ $rangeIdx ]->{'leftFlank'} = $seq;
    }
  }
  close IN;
  
  for ( my $i = 0 ; $i < $mAlign->getNumAlignedSeqs() ; $i++ )
  {
    my $seq         = $mAlign->getAlignedSeq( $i );
    my $raw_id      = $mAlign->getAlignedName( $i );
    my $align_start = $mAlign->getAlignedSeqStart( $i );
    my $align_end   = $mAlign->getAlignedSeqEnd( $i );
    my $orient      = $mAlign->getAlignedOrientation( $i );
    my $seqOffset   = $mAlign->getAlignedStart( $i );
  
    my $leftFlank = "";
    if ( exists $ranges[ $i ]->{'leftFlank'} )
    {
      $leftFlank = $ranges[ $i ]->{'leftFlank'};
    }
    $leftFlank = "-" x ( $flankleft - length( $leftFlank ) ) . $leftFlank;
    my $rightFlank = "";
    if ( exists $ranges[ $i ]->{'rightFlank'} )
    {
      $rightFlank = $ranges[ $i ]->{'rightFlank'};
    }
    $rightFlank = $rightFlank . "-" x ( $flankright - length( $rightFlank ) );
  
    $seq = "-" x ( $seqOffset ) . $seq;
    $seq = $seq . "-" x ( $maxAlignedLen - length( $seq ) );
  
    #push @seqs, [ $leftFlank, $seq, $rightFlank ];
    print ">$raw_id\n" . $leftFlank . $seq . $rightFlank . "\n";
  }
  #@seqs = sort { reverse( $a->[ 0 ] ) cmp reverse( $b->[ 0 ] ) } @seqs;
}
        




1;
