Skip to content

Instantly share code, notes, and snippets.

@eladkehat
Last active June 15, 2017 11:10
Show Gist options
  • Select an option

  • Save eladkehat/5a40abc04c5753443d49b15ef83c5890 to your computer and use it in GitHub Desktop.

Select an option

Save eladkehat/5a40abc04c5753443d49b15ef83c5890 to your computer and use it in GitHub Desktop.
Run MUMmer on every pair of files from a list and plot the result. Fixes a bug in their gnuplot script.
import argparse
import itertools
import os
import subprocess
"""
Run MUMmer on every pair of files from the list of input files.
"""
# Default path to the MUMmer binary
# Get MUMmer at https://sourceforge.net/projects/mummer/
MUMMER_PATH = '/usr/local/bin/MUMmer3.23/'
def _run_in_subprocess(args):
res = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if res.stdout:
print(res.stdout.decode('ascii'))
if res.stderr:
print(res.stderr.decode('ascii'))
def run_mummer(mummer_path, file1, file2):
"""Runs MUMmer (actually the dnadiff script) on the specified pair of files."""
prefix = '_'.join((os.path.splitext(os.path.basename(filename))[0] for filename in (file1, file2)))
dnadiff_binary = mummer_path + 'nucmer'
args = [dnadiff_binary, '--maxmatch', '-p', prefix, file1, file2]
_run_in_subprocess(args)
return prefix
def fix_gp_file(filename):
"""
Due to a bug in MUMmerplot, there are invalid mouse directives inside the output gp file -
the file used to create the plot with gnuplot.
The simplest solution is to remove those lines from the file.
"""
with open(filename) as gpfile:
lines = gpfile.readlines()
with open(filename, 'w') as gpfile:
for line in itertools.filterfalse(lambda line: line.startswith('set mouse'), lines):
gpfile.write(line)
def run_mummer_plot(mummer_path, output_prefix):
"""Runs MUMmerplot on the delta file with the given output prefix."""
plot_binary = mummer_path + 'mummerplot'
delta_file = output_prefix + '.delta'
plot_args = [plot_binary, '--postscript', '-p', output_prefix, delta_file]
_run_in_subprocess(plot_args)
# A bug in MUMmerplot (it's old software) adds mouse directives that gnuplot can't process
gp_file = output_prefix + '.gp'
fix_gp_file(gp_file)
# Now run gnuplot
_run_in_subprocess(['gnuplot', gp_file])
# Remove redundant files
for suffix in ('.fplot', '.rplot'):
if os.path.isfile(output_prefix + suffix):
os.remove(output_prefix + suffix)
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('input_files', nargs='+', help='A list of input file names')
parser.add_argument('--mummer-path', default=MUMMER_PATH,
help='Path to the MUMmer binary executable. (default: %(default)s)')
parser.add_argument('--plot', action='store_true', help='Plot the results.')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_arguments()
print('{} input files'.format(len(args.input_files)))
for pair in itertools.combinations(args.input_files, 2):
print('MUM''ing {}, {}...'.format(*pair))
prefix = run_mummer(args.mummer_path, *pair)
if args.plot:
print('Plotting...')
run_mummer_plot(args.mummer_path, prefix)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment