#!/usr/bin/env python
# splitcombine - program to split volcombine for parallel processing
#
# Author: David Mastronarde
#
# $Id: splitcombine,v bbc2abd22ec9 2024/09/11 17:19:17 mast $
#
progname = 'splitcombine'
prefix = 'ERROR: ' + progname + ' - '

def warning(text):
   prnstr(' ', file=sys.stderr)
   prnstr('WARNING: ' + text + '\n', file=sys.stderr)

#### MAIN PROGRAM  ####
#
# load System Libraries
import os, sys, re, glob

#
# Setup runtime environment
if os.getenv('IMOD_DIR') != None:
   IMOD_DIR = os.environ['IMOD_DIR']
   if sys.platform == 'cygwin' and sys.version_info[0] > 2:
      IMOD_DIR = IMOD_DIR.replace('\\', '/')
      if IMOD_DIR[1] == ':' and IMOD_DIR[2] == '/':
         IMOD_DIR = '/cygdrive/' + IMOD_DIR[0].lower() + IMOD_DIR[2:]
   sys.path.insert(0, os.path.join(IMOD_DIR, 'pylib'))
   from imodpy import *
   addIMODbinIgnoreSIGHUP()
else:
   sys.stdout.write(prefix + " IMOD_DIR is not defined!\n")
   sys.exit(1)

#
# load IMOD Libraries
from pip import *
from pysed import *

# Fallbacks from ../manpages/autodoc2man 3 1 splitcombine
options = ["comfile:CommandFile:FN:", "tempdir:TemporaryDirectory:FN:",
           "local:LocalTempPath:FN:", "global:GlobalTempPath:FN:", "help:Usage:B:"]

(numOpts, numNonOpts) = PipReadOrParseOptions(sys.argv, options, progname, 0, 0, 0)

comfile = PipGetInOutFile('CommandFile', 0)
if comfile == None:
   comfile = 'volcombine'
(comfile, rootname) = completeAndCheckComFile(comfile)
comExt = comfile[-4:]

tempdir = PipGetString('TemporaryDirectory', '')

local = PipGetString('LocalTempPath', 'gibberish')
iflocal = 1 - PipGetErrNo()
globdir = PipGetString('GlobalTempPath', 'reallyjunk')
if iflocal + 1 - PipGetErrNo() == 1:
   exitError('If you enter one of -local and -global, you must enter both')

# Escape both forward and backward slashes since this is going into pysedcd
if iflocal:
   local = local.replace('\\', '\\\\')
   local = local.replace('/', '\\/')
   globdir = globdir.replace('\\', '\\\\')
   globdir = globdir.replace('/', '\\/')

# Read command file
comlines = readTextFile(comfile)
optionLine1 = optionLine2 = optionLine3 = optionLine4 = None
sectStarts = []
gotAssemble = False
opt1Match = re.compile('set *combinefft_red.*=')
opt2Match = re.compile('set *combinefft_low.*=')
usingTmp = usingUsrTmp = False
gotInitChunk = False
lockName = ''

for ln in range(len(comlines)):
   line = comlines[ln]
   if not gotAssemble and line.find('COMBINING PIECE') >= 0:
      sectStarts.append(ln)
   if not gotAssemble and line.find('ASSEMBLING') >= 0:
      sectStarts.append(ln)
      gotAssemble = True
   if not optionLine1 and re.search(opt1Match, line):
      optionLine1 = line
   if not optionLine2 and re.search(opt2Match, line):
      optionLine2 = line
   if not optionLine3 and line.find('setenv IMOD_BRIEF_HEADER') >= 0:
      optionLine3 = line
   if not optionLine3 and line.find('setenv IMOD_OUTPUT_FORMAT') >= 0:
      optionLine4 = line
   if not usingUsrTmp and line.find('/usr/tmp') >= 0:
      usingUsrTmp = True
   if not usingTmp and line.find('/tmp') >= 0:
      usingTmp = True
   if not gotInitChunk and 'INITIALIZING CHUNKED' in line:
      gotInitChunk = True
   
numChunks = len(sectStarts) - 1
if numChunks < 1 or not gotAssemble:
   exitError('The command file is missing chunks or the assemblevol section')

if usingUsrTmp:
   warning('This command file accesses /usr/tmp and will not run on multiple machines')
elif usingTmp:
   warning('This command file accesses /tmp and may not run on multiple machines')

# Try to extract the master temporary directory from the first chunk
inputFFTa = optionValue(comlines[sectStarts[0]:sectStarts[1]], 'AInputFFT', 0)
inputFFTb = optionValue(comlines[sectStarts[0]:sectStarts[1]], 'BInputFFT', 0)
outputFFT = optionValue(comlines[sectStarts[0]:sectStarts[1]], 'OutputFFT', 0)
fftOpts = (inputFFTa, inputFFTb, outputFFT)
sumdir = ''
for line in fftOpts:
   if line:
      line = line.replace('\\', '/')
      sumdir = os.path.dirname(line)
      if sumdir:
         break

if sumdir and (not os.path.isdir(sumdir) or not os.access(sumdir, os.W_OK)):
   exitError('Unable to write sum*.rec to directory ' + sumdir)

tmprec = '$tmpdir\\/rec.'
tmpmat = '$tmpdir\\/mat.'

# Remove any previous files now in case the number has changed
cleanChunkFiles(rootname)

localcom = [fmtstr('/{}/s//{}/g', local, globdir)]
if gotInitChunk:
   if not outputFFT:
      exitError('Cannot get name of output file for combine')
   sumname = os.path.basename(outputFFT)
   lockName = sumname + '.lock'
   localcom += ['/INITIALIZING CHUNKED/a/$b3dtouch ' + lockName + '/',
                '/TaperPadsInXYZ/a/LockFileForHDF  ' + lockName + '/']
pysed(localcom, comlines[0:sectStarts[0]], rootname + '-start' + comExt)

for num in range(1,numChunks+1):
   comname = fmtstr('{}-{:03d}{}', rootname, num, comExt)
   outlines = ['$set tmpext = `hostname`.$$']
   if tempdir:
      outlines.append('$set tmpdir = "' + tempdir + '"')
   else:
      outlines.extend(['$set tmpdir = /usr/tmp',
                       '$if ($?IMOD_DIR) then',
                       '$if (-e "$IMOD_DIR/bin/settmpdir") source ' + \
                       '"$IMOD_DIR/bin/settmpdir"',
                       '$endif'])
   if optionLine1:
      outlines.append(optionLine1)
   if optionLine2:
      outlines.append(optionLine2)
   if optionLine3:
      outlines.append(optionLine3)
   if optionLine4:
      outlines.append(optionLine4)
   outlines.extend(comlines[sectStarts[num-1]:sectStarts[num]])

   # doctor the filenames.  Need to replace all leading paths before rec. and
   # mat. to get rid of temporary directory.  Match all back to space or tab
   # But need to put escapes in front of the $tmpdir entries at start of line
   # This is all for backward compatibility.  Had to match .st and .fft explicitly 
   # to avoid matching new style filenames
   sedcom = localcom + \
            ['/STATUS:/d',
             r'/rec\.st/s//rec.st.$tmpext/g',
             r'/mat\.st/s//mat.st.$tmpext/g',
             r'/rec\.fft/s//rec.fft.$tmpext/g',
             r'/mat\.fft/s//mat.fft.$tmpext/g',
             r'/^[^ 	]*rec\.st' + '/s//\\' + tmprec + 'st/g',
             r'/^[^ 	]*mat\.st/s//\\' + tmpmat + 'st/g',
             r'/[ 	][^ 	]*rec\.st/s// ' + tmprec + 'st/g', 
             r'/[ 	][^ 	]*mat\.st/s// ' + tmpmat + 'st/g',
             r'/^[^ 	]*rec\.fft/s//\\' + tmprec + 'fft/g',
             r'/^[^ 	]*mat\.fft/s//\\' + tmpmat + 'fft/g',
             r'/[ 	][^ 	]*rec\.fft/s// ' + tmprec + 'fft/g', 
             r'/[ 	][^ 	]*mat\.fft/s// ' + tmpmat + 'fft/g']
   sedlines = pysed(sedcom, outlines, None)
   writeTextFile(comname, sedlines)

outlines = comlines[sectStarts[numChunks]:]
if gotInitChunk:
   outlines.append(fmtstr('$collectmmm pixels= {} {} {}', rootname, numChunks, sumname))
outlines.append(fmtstr('$b3dremove -g {0}-[0-9][0-9][0-9]*' + comExt + '* ' +
                       '{0}-[0-9][0-9][0-9]*.log* {1}', rootname, lockName))
pysed(localcom, outlines, rootname + '-finish.com')
prnstr(fmtstr('{} command files created and ready to run with\n'
              '  processchunks or the parallel processing interface', numChunks + 2))
sys.exit(0)
