#!/opt/gensoft/adm/bin/perl use strict; use warnings; use Cwd qw(abs_path getcwd); use File::Basename qw(dirname fileparse); use File::Copy::Recursive qw(rmove); use File::Path qw(remove_tree); use File::Spec::Functions qw(catfile catdir); use File::Temp qw(tempdir); use Getopt::Long; use Pod::Usage; =head1 SYNOPSIS create_examples [options] [list of examples to generate] If no examples are listed then all will be generated and a tar file will be created. Possible examples to generate are: ame centrimo dreme fimo glam2 glam2scan gomo mast mcast meme meme-chip momo motiph spamo tgene tomtom Options: -not generate examples not in the list -tar generates examples.tgz for uploading -debug skip various cleanup related functions to help with debugging -help displays this help message. =cut my @example_dirs = ( 'ame_example_output_files', 'centrimo_example_output_files', 'tgene_example_output_files', 'dreme_example_output_files', 'fimo_example_output_files', 'glam2_example_output_files', 'glam2scan_example_output_files', 'gomo_example_output_files', 'mast_example_output_files', 'mcast_example_output_files', 'meme_example_output_files', 'memechip_example_output_files', 'momo_example_output_files', 'motiph_example_output_files', 'spamo_example_output_files', 'tomtom_example_output_files' ); my $negate_list = 0; #FALSE my $tar = 0; #FALSE my $debug = 0; #FALSE my $help = 0; #FALSE my %listed_examples = (); GetOptions("not" => \$negate_list, "tar" => \$tar, "debug" => \$debug, "help|?" => \$help) or pod2usage(2); pod2usage(1) if $help; if (@ARGV) { for (my $i = 0; $i < scalar(@ARGV); $i++) { $listed_examples{$ARGV[$i]} = 1; } } else { # no examples specified, so generate everything and tar it $negate_list = 1; $tar = 1; } create_examples($negate_list, \%listed_examples); tar_examples() if $tar; update_makefile(); exit(0); sub create_examples { my ($negate_list, $listed_examples) = @_; # ame if ($negate_list xor $listed_examples->{"ame"}) { print "Creating AME example\n"; create_example( ["ame", "-oc", "ame_example_output_files", "-control", "--shuffle--", "-scoring", "avg", "-method", "fisher", "Klf1.fna", "JASPAR2018_CORE_non-redundant.meme"], "ame_example_output_files", ["example-datasets/Klf1.fna", "example-datasets/JASPAR2018_CORE_non-redundant.meme"], [\&test_html, "ame.html", \&test_exists, "ame.tsv", \&test_exists, "sequences.tsv"]); } # centrimo if ($negate_list xor $listed_examples->{"centrimo"}) { print "Creating CentriMo example\n"; create_example( ["centrimo", "--oc", "centrimo_example_output_files", "--local", "--ethresh", "1", "mm9_tss_500bp_sampled_1000.fna", "some_vertebrates.meme"], "centrimo_example_output_files", [ "example-datasets/mm9_tss_500bp_sampled_1000.fna", "example-datasets/some_vertebrates.meme"], [\&test_html, "centrimo.html", \&test_exists, "centrimo.tsv", \&test_exists, "site_counts.txt"]); } # tgene if ($negate_list xor $listed_examples->{"tgene"}) { print "Creating T-Gene example\n"; create_example( ["tgene", "-oc", "tgene_example_output_files", "-max-link-distances", "500000,1000", "-tissues", "A549,Bj,H1hesc,Hepg2,Hsmm,K562,Nhek,Sknshra,Ag04450,Gm12878,Helas3,Hmec,Huvec,Mcf7,Nhlf", "-histone-root", "Histone", "-histones", "H3k27ac,H3k4me3", "--rna-source", "Cage", "-expression-root", "Expression", "--lecat", "6", "--no-closest-locus", "-desc", "Predicted links from P300 binding sites to genes on human chromosome 21", "P300.chr21.bed", "gencode.v7.transcripts.chr21.gtf" ], "tgene_example_output_files", [ "example-datasets/P300.chr21.bed", "../../tests/tgene/Histone", "../../tests/tgene/Expression", "../../tests/tgene/gencode.v7.transcripts.chr21.gtf" ], [\&test_html, "tgene.html", \&test_exists, "links.tsv", \&test_exists, "HistLev+noise.H3k27ac.tsv", \&test_exists, "HistLev+noise.H3k4me3.tsv", \&test_exists, "TrExp+noise.tsv", \&test_exists, "TrExp.tsv" ] ); } # dreme if ($negate_list xor $listed_examples->{"dreme"}) { print "Creating DREME example\n"; create_example( ["dreme", "-oc", "dreme_example_output_files", "-png", "-p", "Klf1.fna"], "dreme_example_output_files", ["example-datasets/Klf1.fna"], [\&test_html, "dreme.html", \&test_exists, "dreme.txt", \&test_exists, "dreme.xml"]); } # fimo if ($negate_list xor $listed_examples->{"fimo"}) { print "Creating FIMO example\n"; create_example( ["fimo", "--oc", "fimo_example_output_files", "--parse-genomic-coord", "some_vertebrates.meme", "mm9_tss_500bp_sampled_1000.fna" ], "fimo_example_output_files", [ "example-datasets/mm9_tss_500bp_sampled_1000.fna", "example-datasets/some_vertebrates.meme"], [\&test_html, "fimo.html", \&test_exists, "fimo.tsv", \&test_exists, "fimo.gff", \&test_exists, "fimo.xml", \&test_exists, "cisml.xml" ]); } # glam2 if ($negate_list xor $listed_examples->{"glam2"}) { print "Creating GLAM2 example\n"; create_example( ["glam2", "-O", "glam2_example_output_files", "-M", "p", "At.faa"], "glam2_example_output_files", ["example-datasets/At.faa"], [\&test_html, "glam2.html", \&test_exists, "glam2.meme", \&test_exists, "glam2.txt"]); } # glam2scan -n 25 p aln /home/tbailey/trunk/INSTALLED/db/fasta_databases/c_elegans.aa if ($negate_list xor $listed_examples->{"glam2scan"}) { print "Creating GLAM2Scan example\n"; create_example( ["glam2scan", "-O", "glam2scan_example_output_files", "p", "At.glam2", "At.faa"], "glam2scan_example_output_files", [ "example-datasets/At.glam2", "example-datasets/At.faa"], [\&test_html, "glam2scan.html", \&test_exists, "glam2scan.txt"]); } # gomo if ($negate_list xor $listed_examples->{"gomo"}) { print "Creating GOMo example\n"; create_example( [\&gomo_runner, "dpinteract_subset.meme", # motifs for AMA & meme2images "bacteria_escherichia_coli_k12_1000_199.na", # sequences for AMA "bacteria_escherichia_coli_k12_1000_199.na.bfile", # background for AMA "bacteria_escherichia_coli_k12_1000_199.na.csv", # gene map for GOMo "go.dag" # GO DAG for GOMo ], "gomo_example_output_files", [ "example-datasets/dpinteract_subset.meme", "example-datasets/bacteria_escherichia_coli_k12_1000_199.na", "example-datasets/bacteria_escherichia_coli_k12_1000_199.na.bfile", "example-datasets/bacteria_escherichia_coli_k12_1000_199.na.csv", "example-datasets/go.dag" ], [\&test_html, "gomo.html", \&test_exists, "gomo.xml"]); } # mast if ($negate_list xor $listed_examples->{"mast"}) { print "Creating MAST example\n"; create_example( ["mast", "-oc", "mast_example_output_files", "-dl", "http://www.uniprot.org/uniprot/?query=SEQUENCEID&sort=score", "adh.meme", "adh.faa"], "mast_example_output_files", [ "example-datasets/adh.meme", "example-datasets/adh.faa"], [\&test_html, "mast.html", \&test_exists, "mast.txt", \&test_exists, "mast.xml"]); } # mcast if ($negate_list xor $listed_examples->{"mcast"}) { print "Creating MCAST example\n"; create_example( ["mcast", "-oc", "mcast_example_output_files", "-parse-genomic-coord", "Klf1.dreme", "Klf1.fna"], "mcast_example_output_files", ["example-datasets/Klf1.dreme", "example-datasets/Klf1.fna"], [\&test_html, "mcast.html", \&test_exists, "mcast.tsv", \&test_exists, "mcast.xml", \&test_exists, "cisml.xml"]); } # meme if ($negate_list xor $listed_examples->{"meme"}) { print "Creating MEME example\n"; create_example( ["meme", "lex0.fna", "-oc", "meme_example_output_files", "-dna", "-mod", "zoops", "-nmotifs", "3", "-revcomp"], "meme_example_output_files", ["example-datasets/lex0.fna"], [\&test_html, "meme.html", \&test_exists, "meme.txt", \&test_exists, "meme.xml"]); } # meme-chip if ($negate_list xor $listed_examples->{"meme-chip"}) { print "Creating MEME-ChIP example\n"; create_example( ["meme-chip", "-meme-p", "6", "-oc", "memechip_example_output_files", "-db", "JASPAR2018_CORE_non-redundant.meme", "Klf1.fna"], "memechip_example_output_files", [ "example-datasets/JASPAR2018_CORE_non-redundant.meme", "example-datasets/Klf1.fna"], [\&test_html, "index.html"]); } # momo if ($negate_list xor $listed_examples->{"momo"}) { print "Creating MoMo example\n"; create_example( ["momo", "motifx", "-oc", "momo_example_output_files", "--sequence-column", "Motif Peptide", "--width", "13", "--min-occurrences", "5", "--min-occurrences", "20", "--protein-database", "ensembl_Plasmodium_falciparum_38.200.fa", "pr8b00062_si_002.ptm"], "momo_example_output_files", [ "example-datasets/ensembl_Plasmodium_falciparum_38.200.fa", "example-datasets/pr8b00062_si_002.ptm"], [\&test_html, "momo.html", \&test_exists, "momo.txt"]); } # motiph if ($negate_list xor $listed_examples->{"motiph"}) { print "Creating MOTIPH example\n"; create_example( ['motiph', "-oc", "motiph_example_output_files", '--seed', 0, '--bg', 2.0, '--pseudocount', 0.01, 'spiked.aln', 'yeast.tree', 'MCM1.meme.html'], "motiph_example_output_files", [ "example-datasets/spiked.aln", "example-datasets/yeast.tree", "example-datasets/MCM1.meme.html"], [\&test_html, "motiph.html", \&test_exists, "motiph.txt", \&test_exists, "motiph.xml", \&test_exists, "motiph.gff"]); } # spamo if ($negate_list xor $listed_examples->{"spamo"}) { print "Creating SpaMo example\n"; create_example( ["spamo", "-oc", "spamo_example_output_files", "-png", "Klf1.fna", "Klf1.meme", "JASPAR2018_CORE_non-redundant.meme"], "spamo_example_output_files", [ "example-datasets/Klf1.fna", "example-datasets/Klf1.meme", "example-datasets/JASPAR2018_CORE_non-redundant.meme"], [\&test_html, "spamo.html", \&test_exists, "spamo.tsv"]); } # tomtom if ($negate_list xor $listed_examples->{"tomtom"}) { print "Creating Tomtom example\n"; create_example( ["tomtom", "-oc", "tomtom_example_output_files", "-min-overlap", "5", "-dist", "pearson", "-evalue", "-thresh", "10", "-no-ssc", "STRGGTCAN.meme", "JASPAR2018_CORE_non-redundant.meme"], "tomtom_example_output_files", [ "example-datasets/STRGGTCAN.meme", "example-datasets/JASPAR2018_CORE_non-redundant.meme"], [\&test_html, "tomtom.html", \&test_exists, "tomtom.tsv", \&test_exists, "tomtom.xml"]); } } sub create_example { my ($cmd, $output_directory, $inputs, $validation_tests) = @_; my $cwd = getcwd; my $example_area = abs_path(dirname(__FILE__)); print "Using example area $example_area\n"; chdir($example_area); # create a temporary folder my $work_area = tempdir("create_examples_XXXXXXXXXX", TMPDIR => 1); print "Created working area $work_area\n"; # copy all required files into the work area print "Copying inputs to working area\n"; for (my $i = 0; $i < scalar(@{$inputs}); $i++) { my $input = $inputs->[$i]; if (ref $input eq "HASH") { die("Input method undefined!\n") unless defined $input->{method}; if ($input->{method} eq "wget") { my $wget_status = system("wget", "-nv", "-P", $work_area, $input->{url}); die("wget failed!") if ($wget_status != 0); } else { die("Unknown input method!\n"); } } elsif (ref $input eq "") { # assume this is a file name relative to the current directory my ($input_name, $input_path) = fileparse($input); #link $input, catfile($work_area, $input_name); # DANGER-- if this is a directory, cleanup will wipe it out! symlink "$cwd/$input", catfile($work_area, $input_name); } else { die("Unknown input file type!\n"); } } # change to work area chdir($work_area); # run program print "Running program\n"; my $status; if (ref $cmd->[0] eq "CODE") { my @args = @{$cmd}; my $fn = shift @args; $status = $fn->($output_directory, @args); } else { $status = system(@{$cmd}); } # change back to where the examples are chdir($example_area); #check outputs goto cleanup if $status; for (my $i = 1; $i < scalar(@{$validation_tests}); $i += 2) { my $test = $validation_tests->[$i-1]; my $file_name = $validation_tests->[$i]; unless ($test->(catfile($work_area, $output_directory, $file_name))) { warn("Test failed on $file_name\n"); goto cleanup; } } # replace example print "Output tests pass, replacing $output_directory\n"; my $from_dir = catdir($work_area, $output_directory); my $to_dir = catdir($example_area, $output_directory); rmove($from_dir, $to_dir ) or die("failure: $!"); cleanup: unless ($debug) { print "Deleting work area: $work_area\n"; remove_tree($work_area); } } sub tar_examples { system('tar -czf examples.tgz ' . join(' ', @example_dirs)); } sub update_makefile { # update examples.mk which is included by Makefile.am system('echo "EXAMPLE_OUTPUT_FILES = \\\\" > examples.mk'); my $cmd = 'find ' . join(' ', @example_dirs) . q( -type f ! -iname '*.orig' | sed -e 's/^/ /' -e '$q;s/$/ \\\\/' >> examples.mk); system($cmd); # must run automake in top level dir my $example_area = abs_path(dirname(__FILE__)); chdir(catdir($example_area, '..', '..')); # regenerate Makefile.in system('automake doc/examples/Makefile'); # change back to examples dir chdir($example_area); } sub gomo_runner { my ($out, $motifs, $sequences, $bfile, $map, $dag, @ids) = @_; my @motif_ids = (); foreach my $id (@ids) { push(@motif_ids, '-motif', $id); } my $status; my @args = (); # run AMA push(@args, 'ama', '-oc', '.', '-pvalues', @motif_ids, $motifs, $sequences, $bfile); print join(" ", @args), "\n"; $status = system(@args); return $status if $status; if (!&test_exists('ama.xml')) { warn("Test failed on ama.xml"); return 1; } # run GOMo @args = ("gomo", '--oc', $out, '--dag', $dag, '--motifs', $motifs, $map, 'ama.xml'); print join(" ", @args), "\n"; $status = system(@args); return $status; } sub test_exists { my ($file) = @_; return (-e $file); } sub test_html { my ($file) = @_; # we assume the html is correct if it has a closing tag. return !! `grep "" $file 2> /dev/null | wc -l`; } 1;