# -*- coding: utf-8 -*-
################################################################################
# MacSyFinder - Detection of macromolecular systems in protein datasets #
# using systems modelling and similarity search. #
# Authors: Sophie Abby, Bertrand Néron #
# Copyright © 2014 Institut Pasteur (Paris) and CNRS. #
# See the COPYRIGHT file for details #
# #
# MacsyFinder is distributed under the terms of the GNU General Public License #
# (GPLv3). See the COPYING file for details. #
################################################################################
import os
import sys
import inspect
from time import strftime
from ConfigParser import SafeConfigParser, NoSectionError, NoOptionError
_prefix_path = '$PREFIX'
_prefix_conf = '$PREFIXCONF'
_prefix_data = '$PREFIXDATA'
if 'MACSY_HOME' in os.environ and os.environ['MACSY_HOME']:
_prefix_path = os.environ['MACSY_HOME']
_prefix_conf = os.path.join(os.environ['MACSY_HOME'], 'etc')
_prefix_data = os.path.join(os.environ['MACSY_HOME'], 'data')
import logging
[docs]class Config(object):
"""
Parse configuration files and handle the configuration according to the following file location precedence:
/etc/macsyfinder/macsyfinder.conf < ~/.macsyfinder/macsyfinder.conf < .macsyfinder.conf
If a configuration file is given on the command-line, this file will be used.
*In fine* the arguments passed on the command-line have the highest priority.
"""
# if a new option is added think to add it also (if needed) in save
options = ('cfg_file', 'previous_run', 'sequence_db', 'db_type', 'replicon_topology', 'topology_file',
'inter_gene_max_space', 'min_mandatory_genes_required', 'min_genes_required', 'max_nb_genes', 'multi_loci',
'hmmer_exe', 'index_db_exe', 'e_value_res', 'i_evalue_sel', 'coverage_profile',
'def_dir', 'res_search_dir', 'res_search_suffix', 'profile_dir', 'profile_suffix', 'res_extract_suffix',
'out_dir',
'log_level', 'log_file', 'worker_nb', 'config_file', 'build_indexes')
[docs] def __init__(self, cfg_file="",
sequence_db=None,
db_type=None,
replicon_topology=None,
topology_file=None,
inter_gene_max_space=None,
min_mandatory_genes_required=None,
min_genes_required=None,
max_nb_genes=None,
multi_loci=None,
hmmer_exe=None,
index_db_exe=None,
e_value_res=None,
i_evalue_sel=None,
coverage_profile=None,
def_dir=None ,
res_search_dir=None,
res_search_suffix=None,
profile_dir=None,
profile_suffix=None,
res_extract_suffix=None,
out_dir=None,
log_level=None,
log_file=None,
worker_nb=None,
config_file=None,
previous_run=None,
build_indexes=None
):
"""
:param cfg_file: the path to the MacSyFinder configuration file to use
:type cfg_file: string
:param previous_run: the path to the results directory of a previous run
:type previous_run: string
:param sequence_db: the path to the sequence input dataset (fasta format)
:type sequence_db: string
:param db_type: the type of dataset to deal with.
\"unordered_replicon\" corresponds to a non-assembled genome,
\"unordered\" to a metagenomic dataset,
\"ordered_replicon\" to an assembled genome, and
\"gembase\" to a set of replicons where sequence identifiers follow this convention \">RepliconName_SequenceID\"."
:type db_type: string
:param replicon_topology: the topology ('linear' or 'circular') of the replicons. This option is meaningful
only if the db_type is 'ordered_replicon' or 'gembase'
:type replicon_topology: string
:param topology_file: a tabular file of mapping between replicon names and the corresponding topology
(e.g. \"RepliconA linear\")
:type topology_file: string
:param inter_gene_max_space:
:type inter_gene_max_space: list of list of 2 elements [[ string system, integer space] , ...]
:param min_mandatory_genes_required:
:type min_mandatory_genes_required: list of list of 2 elements [[ string system, integer ] , ...]
:param min_genes_required:
:type min_genes_required: list of list of 2 elements [[ string system, integer ] , ...]
:param max_nb_genes:
:type max_nb_genes: list of list of 2 elements [[ string system, integer ] , ...]
:param multi_loci:
:type multi_loci: string
:param hmmer_exe: the Hmmer \"hmmsearch\" executable
:type hmmer_exe: string
:param index_db_exe: the indexer executable (\"makeblastdb\" or \"formatdb\")
:type index_db_exe: string
:param e_value_res: maximal e-value for hits to be reported during Hmmer search
:type e_value_res: float
:param i_evalue_sel: maximal independent e-value for Hmmer hits to be selected for system detection
:type i_evalue_sel: float
:param coverage_profile: minimal profile coverage required in the hit alignment to allow the hit selection
for system detection
:type coverage_profile: float
:param def_dir: the path to the directory containing systems definition files (.xml)
:type def_dir: string
:param res_search_dir: the path to the directory where to store MacSyFinder search results directories.
:type res_search_dir: string
:param out_dir: The results are written in a directory. By default the directory is named macsyfinder-{date},
but this option allow to override this behavior. If out-dir option is set out-dir will be created
if outdir already exists it must be empty.
If out-dir and res-search-dir are sets res-search-dir will be ignore.
:type out_dir: string
:param res_search_suffix: the suffix to give to Hmmer raw output files
:type res_search_suffix: string
:param res_extract_suffix: the suffix to give to filtered hits output files
:type res_extract_suffix: string
:param profile_dir: path to the profiles directory
:type profile_dir: string
:param profile_suffix: the suffix of profile files. For each 'Gene' element,
the corresponding profile is searched in the 'profile_dir',
in a file which name is based on the Gene name + the profile suffix.
:type profile_suffix: string
:param log_level: the level of log output
:type log_level: int
:param log_file: the path to the directory to write MacSyFinder log files
:type log_file: string
:param worker_nb: maximal number of processes to be used in parallel (multi-thread run, 0 use all cores available)
:type worker_nb: int
:param build_indexes: build the indexes from the sequence dataset in fasta format
:type build_indexes: boolean
"""
self._new_cfg_name = "macsyfinder.conf"
if previous_run:
prev_config = os.path.join(previous_run, self._new_cfg_name)
if not os.path.exists(prev_config):
raise ValueError("No config file found in dir {}".format(previous_run))
config_files = [prev_config]
elif cfg_file:
config_files = [cfg_file]
else:
config_files = [os.path.join(_prefix_conf, 'macsyfinder.conf'),
os.path.expanduser('~/.macsyfinder/macsyfinder.conf'),
'macsyfinder.conf']
self._defaults = {'replicon_topology': 'circular',
'hmmer_exe': 'hmmsearch',
'index_db_exe': 'makeblastdb',
'e_value_res': "1",
'i_evalue_sel': "0.001",
'coverage_profile': "0.5",
'def_dir': os.path.join(_prefix_data, 'DEF'),
'res_search_dir': os.getcwd(),
'res_search_suffix': '.search_hmm.out',
'res_extract_suffix': '.res_hmm_extract',
'profile_dir': os.path.join(_prefix_data, 'profiles'),
'profile_suffix': '.hmm',
'log_level': logging.WARNING,
'worker_nb': '1'
}
self.parser = SafeConfigParser(defaults=self._defaults)
used_files = self.parser.read(config_files)
frame = inspect.currentframe()
args, _, _, values = inspect.getargvalues(frame)
cmde_line_opt = {}
for arg in args:
if arg in self.options and values[arg] is not None:
# the option in ConfigParser are store as string
# so in save method I dump some options only if
# they are != than the default values in ConfigParser
cmde_line_opt[arg] = str(values[arg])
self.options = self._validate(cmde_line_opt, values)
[docs] def _validate(self, cmde_line_opt, cmde_line_values):
"""
Get all configuration values and check the validity of their values.
Create the working directory
:param cmde_line_opt: the options from the command line
:type cmde_line_opt: dict, all values are cast in string
:param cmde_line_values: the options from the command line
:type cmde_line_values: dict, values are not cast
:return: all the options for this execution
:rtype: dictionary
"""
options = {}
if 'sequence_db' in cmde_line_opt:
cmde_line_opt['file'] = cmde_line_opt['sequence_db']
# All results and intermediate files are stored in a directory
# this directory is specify by out_dir option
# for compliance if out_dir option is not specified
# the output_dir will be the concatenation of research_dir and "macsyfinder-" + strftime("%Y%m%d_%H-%M-%S")
try:
options['out_dir'] = self.parser.get('directories', 'out_dir', vars=cmde_line_opt)
working_dir = options['out_dir']
except (NoSectionError, NoOptionError):
if 'out_dir' in cmde_line_opt:
options['out_dir'] = cmde_line_opt['out_dir']
working_dir = options['out_dir']
else:
try:
options['res_search_dir'] = self.parser.get('directories', 'res_search_dir', vars=cmde_line_opt)
except (NoSectionError, NoOptionError):
if 'res_search_dir' in cmde_line_opt:
options['res_search_dir'] = cmde_line_opt['res_search_dir']
else:
options['res_search_dir'] = self._defaults['res_search_dir']
working_dir = os.path.join(options['res_search_dir'], "macsyfinder-" + strftime("%Y%m%d_%H-%M-%S"))
if os.path.exists(working_dir) and os.listdir(working_dir):
raise ValueError("{0}: This results directory already exists and is not empty".format(working_dir))
elif not os.path.exists(working_dir):
try:
os.mkdir(working_dir)
except OSError as err:
raise ValueError("cannot create MacSyFinder working directory {0} : {1}".format(working_dir, err))
options['working_dir'] = working_dir
hmmer_path = os.path.join(working_dir, self.hmmer_dir)
try:
os.mkdir(hmmer_path)
except OSError as err:
raise ValueError("cannot create MacSyFinder hmmer directory {0} : {1}".format(hmmer_path, err))
try:
log_level = self.parser.get('general', 'log_level', vars=cmde_line_opt)
except (AttributeError, NoSectionError):
log_level = self._defaults['log_level']
else:
try:
log_level = int(log_level)
except ValueError:
try:
log_level = getattr(logging, log_level.upper())
except AttributeError:
log_level = logging.ERROR
options['log_level'] = log_level
log_error = []
try:
log_file = self.parser.get('general', 'log_file', vars=cmde_line_opt)
log_handler = logging.FileHandler(log_file)
options['log_file'] = log_file
except Exception as err:
if not isinstance(err, (NoOptionError, NoSectionError)):
log_error.append(err)
try:
log_file = os.path.join(options['working_dir'], 'macsyfinder.log')
log_handler = logging.FileHandler(log_file)
options['log_file'] = log_file
except Exception as err:
log_error.append(err)
log_handler = logging.StreamHandler(sys.stderr)
options['log_file'] = ''
handler_formatter = logging.Formatter("%(levelname)-8s : %(filename)-10s : L %(lineno)d : %(asctime)s : %(message)s")
log_handler.setFormatter(handler_formatter)
log_handler.setLevel(log_level)
root = logging.getLogger()
root.setLevel(logging.NOTSET)
logger = logging.getLogger('macsyfinder')
logger.setLevel(log_level)
logger.addHandler(log_handler)
f_out_log_handler = logging.FileHandler(os.path.join(working_dir, 'macsyfinder.out'))
f_out_handler_formatter = logging.Formatter("%(message)s")
f_out_log_handler.setFormatter(f_out_handler_formatter)
f_out_log_handler.setLevel(logging.INFO)
c_out_log_handler = logging.StreamHandler(sys.stdout)
c_out_handler_formatter = logging.Formatter("%(message)s")
c_out_log_handler.setFormatter(c_out_handler_formatter)
c_out_log_handler.setLevel(logging.INFO)
out_logger = logging.getLogger('macsyfinder.out')
out_logger.setLevel(logging.INFO)
out_logger.addHandler(f_out_log_handler)
out_logger.addHandler(c_out_log_handler)
self._log = logging.getLogger('macsyfinder.config')
for error in log_error:
self._log.warn(error)
try:
if cmde_line_opt.get('previous_run', None):
if os.path.exists(cmde_line_opt['previous_run']):
options['previous_run'] = cmde_line_opt['previous_run']
else:
raise ValueError("previous run directory '{0}' was not found".format(cmde_line_opt['previous_run']))
try:
options['sequence_db'] = self.parser.get('base', 'file', vars=cmde_line_opt)
except NoSectionError:
sequence_db = cmde_line_opt.get('sequence_db', None)
if sequence_db is None:
raise ValueError("No input sequence file specified")
else:
options['sequence_db'] = sequence_db
if not os.path.exists(options['sequence_db']):
raise ValueError("{0}: The input sequence file does not exist ".format(options['sequence_db']))
options['sequence_db'] = os.path.abspath(options['sequence_db'])
val_4_db_type = ('unordered_replicon', 'ordered_replicon', 'gembase', 'unordered')
if 'db_type' in cmde_line_opt:
options['db_type'] = cmde_line_opt['db_type']
else:
try:
options['db_type'] = self.parser.get('base', 'type')
except (NoSectionError, NoOptionError):
raise ValueError("You must specify the type of the input dataset ({0}).".format(', '.join(val_4_db_type)))
if options['db_type'] not in val_4_db_type:
raise ValueError("Allowed values for the input dataset are : {0}".format(', '.join(val_4_db_type)))
val_4_replicon_topology = ('linear', 'circular')
if 'replicon_topology' in cmde_line_opt:
options['replicon_topology'] = cmde_line_opt['replicon_topology']
else:
try:
options['replicon_topology'] = self.parser.get('base', 'replicon_topology')
except (NoSectionError, NoOptionError):
options['replicon_topology'] = self._defaults['replicon_topology']
if options['replicon_topology'] not in val_4_replicon_topology:
raise ValueError("Allowed values for dataset replicon_topology are : {0}".format(', '.join(val_4_replicon_topology)))
if options['replicon_topology'] == 'circular' and options['db_type'] in ('unordered_replicon', 'unordered'):
self._log.warning("As the input dataset type 'db_type' is set to {0},\
the replicon_topology file was ignored".format(options['db_type']))
if 'topology_file' in cmde_line_opt:
options['topology_file'] = cmde_line_opt['topology_file']
else:
try:
options['topology_file'] = self.parser.get('base', 'topology_file')
except (NoSectionError, NoOptionError):
options['topology_file'] = None
if options['topology_file'] is not None:
if not os.path.exists(options['topology_file']):
raise ValueError('topology_file cannot access {}: No such file'.format(options['topology_file']))
elif not os.path.isfile(options['topology_file']):
raise ValueError('topology_file {} is not a regular file'.format(options['topology_file']))
if self.parser.has_option("system", "inter_gene_max_space"):
options['inter_gene_max_space'] = {}
inter_gene_max_space = self.parser.get("system", "inter_gene_max_space")
inter_gene_max_space = inter_gene_max_space.split()
it = iter(inter_gene_max_space)
try:
for system in it:
interval = it.next()
try:
interval = int(interval)
options['inter_gene_max_space'][system] = interval
except ValueError:
raise ValueError("The 'inter_gene_max_space for system {0} must be an integer,\
but you provided {} in the configuration file".format(system, interval))
except StopIteration:
raise ValueError( "Invalid syntax for 'inter_gene_max_space': you must have a list of\
systems and corresponding 'inter_gene_max_space' separated by spaces")
if 'inter_gene_max_space' in cmde_line_values and cmde_line_values['inter_gene_max_space'] is not None:
if 'inter_gene_max_space' not in options:
options['inter_gene_max_space'] = {}
for item in cmde_line_values['inter_gene_max_space']:
system, interval = item
try:
interval = int(interval)
options['inter_gene_max_space'][system] = interval
except ValueError:
raise ValueError("The 'inter_gene_max_space for system {0} must be an integer,\
but you provided {1} on command line".format(system, interval))
if self.parser.has_option("system", "min_mandatory_genes_required"):
options['min_mandatory_genes_required'] = {}
min_mandatory_genes_required = self.parser.get("system", "min_mandatory_genes_required")
min_mandatory_genes_required = min_mandatory_genes_required.split()
it = iter(min_mandatory_genes_required)
try:
for system in it:
quorum_mandatory_genes = it.next()
try:
quorum_mandatory_genes = int(quorum_mandatory_genes)
options['min_mandatory_genes_required'][system] = quorum_mandatory_genes
except ValueError:
raise ValueError("The value for 'min_mandatory_genes_required' option for system {0}\
must be an integer, but you provided {1} in the configuration file".format(system, quorum_mandatory_genes))
except StopIteration:
raise ValueError("Invalid syntax for 'min_mandatory_genes_required': you must have a list of\
systems and corresponding 'min_mandatory_genes_required' separated by spaces")
if 'min_mandatory_genes_required' in cmde_line_values and \
cmde_line_values['min_mandatory_genes_required'] is not None:
if 'min_mandatory_genes_required' not in options:
options['min_mandatory_genes_required'] = {}
for item in cmde_line_values['min_mandatory_genes_required']:
system, quorum_mandatory_genes = item
try:
quorum_mandatory_genes = int(quorum_mandatory_genes)
options['min_mandatory_genes_required'][system] = quorum_mandatory_genes
except ValueError:
raise ValueError("The value for 'min_mandatory_genes_required' option for system {0} must be an\
integer, but you provided {1} on command line".format(system, quorum_mandatory_genes))
if self.parser.has_option("system", "min_genes_required"):
options['min_genes_required'] = {}
min_genes_required = self.parser.get("system", "min_genes_required")
min_genes_required = min_genes_required.split()
it = iter(min_genes_required)
try:
for system in it:
quorum_genes = it.next()
try:
quorum_genes = int(quorum_genes)
options['min_genes_required'][system] = quorum_genes
except ValueError:
raise ValueError("The value for 'min_genes_required' option for system {0} must be an\
integer, but you provided {1} in the configuration file".format(system, quorum_genes))
except StopIteration:
raise ValueError("Invalid syntax for 'min_genes_required': you must have a list of systems and\
corresponding 'min_mandatory_genes_required' separated by spaces")
if 'min_genes_required' in cmde_line_values and cmde_line_values['min_genes_required'] is not None:
if 'min_genes_required' not in options:
options['min_genes_required'] = {}
for item in cmde_line_values['min_genes_required']:
system, quorum_genes = item
try:
quorum_genes = int(quorum_genes)
options['min_genes_required'][system] = quorum_genes
except ValueError:
raise ValueError("The value for 'min_genes_required' option for system {0} must be an integer,\
but you provided {1} on command line".format(system, quorum_genes))
if self.parser.has_option("system", "max_nb_genes"):
options['max_nb_genes'] = {}
max_nb_genes = self.parser.get("system", "max_nb_genes")
max_nb_genes = max_nb_genes.split()
it = iter(max_nb_genes)
try:
for system in it:
max_genes = it.next()
try:
max_genes = int(max_genes)
options['max_nb_genes'][system] = max_genes
except ValueError:
raise ValueError("The value for 'max_nb_genes' option for system {0} must be an integer,\
but you provided {1} in the configuration file".format(system, max_genes))
except StopIteration:
raise ValueError("Invalid syntax for 'max_nb_genes': you must have a list of systems and\
corresponding 'max_nb_genes' separated by spaces")
if 'max_nb_genes' in cmde_line_values and cmde_line_values['max_nb_genes'] is not None:
if 'max_nb_genes' not in options:
options['max_nb_genes'] = {}
for item in cmde_line_values['max_nb_genes']:
system, max_genes = item
try:
max_genes = int(max_genes)
options['max_nb_genes'][system] = max_genes
except ValueError:
raise ValueError("The value for 'max_nb_genes' option for system {0} must be an integer, \
but you provided {1} on command line".format(system, max_genes))
if self.parser.has_option("system", "multi_loci"):
options['multi_loci'] = self.parser.get("system", "multi_loci").split(',')
else:
options['multi_loci'] = []
if 'multi_loci' in cmde_line_values and cmde_line_values['multi_loci'] is not None:
if 'min_genes_required' not in options:
options['multi_loci'] = []
for item in cmde_line_values['multi_loci'].split(','):
options['multi_loci'].append(item)
try:
options['hmmer_exe'] = self.parser.get('hmmer', 'hmmer_exe', vars=cmde_line_opt)
except NoSectionError:
if 'hmmer_exe' in cmde_line_opt:
options['hmmer_exe'] = cmde_line_opt['hmmer_exe']
else:
options['hmmer_exe'] = self._defaults['hmmer_exe']
try:
options['index_db_exe'] = self.parser.get('base', 'index_db_exe', vars=cmde_line_opt)
except NoSectionError:
if 'index_db_exe' in cmde_line_opt:
options['index_db_exe'] = cmde_line_opt['index_db_exe']
else:
options['index_db_exe'] = self._defaults['index_db_exe']
try:
e_value_res = self.parser.get('hmmer', 'e_value_res', vars=cmde_line_opt)
options['e_value_res'] = float(e_value_res)
except ValueError:
msg = "Invalid value for hmmer e_value_res :{0}: (float expected)".format(e_value_res)
raise ValueError(msg)
except NoSectionError:
if 'e_value_res' in cmde_line_opt:
options['e_value_res'] = float(cmde_line_opt['e_value_res'])
else:
options['e_value_res'] = float(self._defaults['e_value_res'])
try:
i_evalue_sel = self.parser.get('hmmer', 'i_evalue_sel', vars=cmde_line_opt)
options['i_evalue_sel'] = float(i_evalue_sel)
except ValueError:
msg = "Invalid value for hmmer i_evalue_sel :{0}: (float expected)".format(i_evalue_sel)
raise ValueError(msg)
except NoSectionError:
if 'i_evalue_sel' in cmde_line_opt:
options['i_evalue_sel'] = float(cmde_line_opt['i_evalue_sel'])
else:
options['i_evalue_sel'] = float(self._defaults['i_evalue_sel'])
if options['i_evalue_sel'] > options['e_value_res']:
raise ValueError("i_evalue_sel ({:f}) must be lower or equal than e_value_res ({:f})".format(options['i_evalue_sel'], options['e_value_res']))
try:
coverage_profile = self.parser.get('hmmer', 'coverage_profile', vars=cmde_line_opt)
options['coverage_profile'] = float(coverage_profile)
except ValueError:
msg = "Invalid value for hmmer coverage_profile :{0}: (float expected)".format(coverage_profile)
raise ValueError(msg)
except NoSectionError:
if 'coverage_profile' in cmde_line_opt:
options['coverage_profile'] = float(cmde_line_opt['coverage_profile'])
else:
options['coverage_profile'] = float(self._defaults['coverage_profile'])
try:
options['def_dir'] = self.parser.get('directories', 'def_dir', vars=cmde_line_opt)
except NoSectionError:
if 'def_dir' in cmde_line_opt:
options['def_dir'] = cmde_line_opt['def_dir']
else:
options['def_dir'] = self._defaults['def_dir']
if not os.path.exists(options['def_dir']):
raise ValueError("{0}: No such definition directory".format(options['def_dir']))
try:
options['profile_dir'] = self.parser.get('directories', 'profile_dir', vars=cmde_line_opt)
except NoSectionError:
if 'profile_dir' in cmde_line_opt:
options['profile_dir'] = cmde_line_opt['profile_dir']
else:
options['profile_dir'] = self._defaults['profile_dir']
if not os.path.exists(options['profile_dir']):
raise ValueError("{0}: No such profile directory".format(options['profile_dir']))
try:
options['res_search_suffix'] = self.parser.get('directories', 'res_search_suffix', vars=cmde_line_opt)
except NoSectionError:
if 'res_search_suffix' in cmde_line_opt:
options['res_search_suffix'] = cmde_line_opt['res_search_suffix']
else:
options['res_search_suffix'] = self._defaults['res_search_suffix']
try:
options['res_extract_suffix'] = self.parser.get('directories', 'res_extract_suffix', vars=cmde_line_opt)
except NoSectionError:
if 'res_extract_suffix' in cmde_line_opt:
options['res_extract_suffix'] = cmde_line_opt['res_extract_suffix']
else:
options['res_extract_suffix'] = self._defaults['res_extract_suffix']
try:
options['profile_suffix'] = self.parser.get('directories', 'profile_suffix', vars=cmde_line_opt)
except NoSectionError:
if 'profile_suffix' in cmde_line_opt:
options['profile_suffix'] = cmde_line_opt['profile_suffix']
else:
options['profile_suffix'] = self._defaults['profile_suffix']
try:
worker_nb = self.parser.get('general', 'worker_nb', vars=cmde_line_opt)
except NoSectionError:
if 'worker_nb' in cmde_line_opt:
worker_nb = cmde_line_opt['worker_nb']
else:
worker_nb = self._defaults['worker_nb']
try:
worker_nb = int(worker_nb)
if worker_nb >= 0:
options['worker_nb'] = worker_nb
except ValueError:
msg = "the number of worker must be an integer"
raise ValueError(msg)
except ValueError as err:
self._log.error(str(err), exc_info=True)
logging.shutdown()
if working_dir:
import shutil
try:
shutil.rmtree(working_dir)
except:
pass
raise err
# build_indexes is not meaningfull in configuration file
options['build_indexes'] = cmde_line_values['build_indexes']
return options
[docs] def save(self, dir_path):
"""
save the configuration used for this run in the ini format file
"""
parser = SafeConfigParser()
parser.add_section('base')
parser.set('base', 'file', str(self.options['sequence_db']))
parser.set('base', 'type', str(self.options['db_type']).lower())
cfg_opts = [('base', ('replicon_topology', 'topology_file', 'index_db_exe',)),
('system', ('inter_gene_max_space', 'min_mandatory_genes_required', 'min_genes_required', 'max_nb_genes', 'multi_loci')),
('hmmer', ('hmmer_exe', 'e_value_res', 'i_evalue_sel', 'coverage_profile')),
('directories', ('def_dir', 'res_search_dir', 'res_search_suffix', 'profile_dir', 'profile_suffix', 'res_extract_suffix')),
('general', ('log_level', 'log_file', 'worker_nb'))
]
for section, directives in cfg_opts:
if not parser.has_section(section):
parser.add_section(section)
for directive in directives:
try:
if self.options[directive] is not None:
if directive in ('inter_gene_max_space', 'min_mandatory_genes_required', 'min_genes_required', 'max_nb_genes'):
s = ''
for system, space in self.options[directive].items():
s += " {0} {1}".format(system, space)
parser.set(section, directive, s)
elif directive != 'log_file' or self.options[directive] != os.path.join(self.options['working_dir'], 'macsyfinder.log'):
parser.set(section, directive, str(self.options[directive]))
except KeyError:
pass
with open(os.path.join(dir_path, self._new_cfg_name), 'w') as new_cfg:
parser.write(new_cfg)
@property
def sequence_db(self):
"""
:return: the path to the input sequence dataset (in fasta format)
:rtype: string
"""
return self.options['sequence_db']
@property
def db_type(self):
"""
:return: the type of the input sequence data set. The allowed values are :
* 'unordered_replicon',
* 'ordered_replicon',
* 'gembase',
* 'unordered'
:rtype: string
"""
return self.options['db_type']
@property
def build_indexes(self):
"""
:return: True if the indexes must be rebuilt, False otherwise
:rtype: boolean
"""
return self.options['build_indexes']
@property
def replicon_topology(self):
"""
:return: the topology of the replicons. Two values are supported 'linear' (default) and circular.
Only relevant for 'ordered' datasets
:rtype: string
"""
return self.options['replicon_topology']
@property
def topology_file(self):
"""
:return: the path to the file of replicons topology.
:rtype: string
"""
return self.options['topology_file']
[docs] def inter_gene_max_space(self, system):
"""
:param system: the name of a system
:type system: string
:return: the maximum number of components with no match allowed between two genes
with a match to consider them contiguous (at the system level)
:rtype: integer
"""
try:
return self.options['inter_gene_max_space'][system]
except KeyError:
return None
[docs] def min_mandatory_genes_required(self, system):
"""
:param system: the name of a system
:type system: string
:return: the mandatory genes quorum to assess the system presence
:rtype: integer
"""
try:
return self.options['min_mandatory_genes_required'][system]
except KeyError:
return None
[docs] def min_genes_required(self, system):
"""
:param system: the name of a system
:type system: string
:return: the genes (mandatory+accessory) quorum to assess the system presence
:rtype: integer
"""
try:
return self.options['min_genes_required'][system]
except KeyError:
return None
[docs] def max_nb_genes(self, system):
"""
:param system: the name of a system
:type system: string
:return: the maximum number of genes to assess the system presence
:rtype: integer
"""
try:
return self.options['max_nb_genes'][system]
except KeyError:
return None
[docs] def multi_loci(self, system):
"""
:param system: the name of a system
:type system: string
:return: the genes (mandatory+accessory) quorum to assess the system presence
:rtype: boolean
"""
try:
return system in self.options['multi_loci']
except KeyError:
return False
@property
def hmmer_exe(self):
"""
:return: the name of the binary to execute for homology search from HMM protein profiles (Hmmer)
:rtype: string
"""
return self.options['hmmer_exe']
@property
def index_db_exe(self):
"""
:return: the name of the binary to index the input sequences dataset for Hmmer
:rtype: string
"""
return self.options['index_db_exe']
@property
def e_value_res(self):
"""
:return: The e_value threshold used by Hmmer to report hits in the Hmmer raw output file
:rtype: float
"""
return self.options['e_value_res']
@property
def i_evalue_sel(self):
"""
:return: the i_evalue threshold used to select a hit for systems detection and for the Hmmer report (filtered hits)
:rtype: float
"""
return self.options['i_evalue_sel']
@property
def coverage_profile(self):
"""
:return: the coverage threshold used to select a hit for systems detection and for the Hmmer report (filtered hits)
:rtype: float
"""
return self.options['coverage_profile']
@property
def def_dir(self):
"""
:return: the path to the directory where are stored definitions of secretion systems (.xml files)
:rtype: string
"""
return self.options['def_dir']
@property
def res_search_dir(self):
"""
:return the path to the directory to store results of MacSyFinder runs
:rtype: string
"""
return self.options['res_search_dir']
@property
def working_dir(self):
"""
:return: the path to the working directory to use for this run
:rtpe: string
"""
return self.options['working_dir']
@property
def res_search_suffix(self):
"""
:return: the suffix for Hmmer raw output files
:rtype: string
"""
return self.options['res_search_suffix']
@property
def profile_dir(self):
"""
:return: the path to the directory where are the HMM protein profiles which corresponds to Gene
:rtype: string
"""
return self.options['profile_dir']
@property
def profile_suffix(self):
"""
:return: the suffix for profile files
:rtype: string
"""
return self.options['profile_suffix']
@property
def res_extract_suffix(self):
"""
:return: the suffix of extract files (tabulated files after HMM output parsing and filtering of hits)
:rtype: string
"""
return self.options['res_extract_suffix']
@property
def worker_nb(self):
"""
:return: the maximum number of parallel jobs
:rtype: int
"""
return self.options.get('worker_nb', None)
@property
def previous_run(self):
"""
:return: the path to the previous run directory to use (to recover Hmmer raw output)
:rtype: string
"""
return self.options.get('previous_run', None)
@property
def hmmer_dir(self):
"""
:return: the name of the directory where the hmmer results are stored
:rtype: string
"""
return "hmmer_results"