Last active
June 15, 2017 11:10
-
-
Save eladkehat/5a40abc04c5753443d49b15ef83c5890 to your computer and use it in GitHub Desktop.
Run MUMmer on every pair of files from a list and plot the result. Fixes a bug in their gnuplot script.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import argparse | |
| import itertools | |
| import os | |
| import subprocess | |
| """ | |
| Run MUMmer on every pair of files from the list of input files. | |
| """ | |
| # Default path to the MUMmer binary | |
| # Get MUMmer at https://sourceforge.net/projects/mummer/ | |
| MUMMER_PATH = '/usr/local/bin/MUMmer3.23/' | |
| def _run_in_subprocess(args): | |
| res = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| if res.stdout: | |
| print(res.stdout.decode('ascii')) | |
| if res.stderr: | |
| print(res.stderr.decode('ascii')) | |
| def run_mummer(mummer_path, file1, file2): | |
| """Runs MUMmer (actually the dnadiff script) on the specified pair of files.""" | |
| prefix = '_'.join((os.path.splitext(os.path.basename(filename))[0] for filename in (file1, file2))) | |
| dnadiff_binary = mummer_path + 'nucmer' | |
| args = [dnadiff_binary, '--maxmatch', '-p', prefix, file1, file2] | |
| _run_in_subprocess(args) | |
| return prefix | |
| def fix_gp_file(filename): | |
| """ | |
| Due to a bug in MUMmerplot, there are invalid mouse directives inside the output gp file - | |
| the file used to create the plot with gnuplot. | |
| The simplest solution is to remove those lines from the file. | |
| """ | |
| with open(filename) as gpfile: | |
| lines = gpfile.readlines() | |
| with open(filename, 'w') as gpfile: | |
| for line in itertools.filterfalse(lambda line: line.startswith('set mouse'), lines): | |
| gpfile.write(line) | |
| def run_mummer_plot(mummer_path, output_prefix): | |
| """Runs MUMmerplot on the delta file with the given output prefix.""" | |
| plot_binary = mummer_path + 'mummerplot' | |
| delta_file = output_prefix + '.delta' | |
| plot_args = [plot_binary, '--postscript', '-p', output_prefix, delta_file] | |
| _run_in_subprocess(plot_args) | |
| # A bug in MUMmerplot (it's old software) adds mouse directives that gnuplot can't process | |
| gp_file = output_prefix + '.gp' | |
| fix_gp_file(gp_file) | |
| # Now run gnuplot | |
| _run_in_subprocess(['gnuplot', gp_file]) | |
| # Remove redundant files | |
| for suffix in ('.fplot', '.rplot'): | |
| if os.path.isfile(output_prefix + suffix): | |
| os.remove(output_prefix + suffix) | |
| def parse_arguments(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('input_files', nargs='+', help='A list of input file names') | |
| parser.add_argument('--mummer-path', default=MUMMER_PATH, | |
| help='Path to the MUMmer binary executable. (default: %(default)s)') | |
| parser.add_argument('--plot', action='store_true', help='Plot the results.') | |
| args = parser.parse_args() | |
| return args | |
| if __name__ == '__main__': | |
| args = parse_arguments() | |
| print('{} input files'.format(len(args.input_files))) | |
| for pair in itertools.combinations(args.input_files, 2): | |
| print('MUM''ing {}, {}...'.format(*pair)) | |
| prefix = run_mummer(args.mummer_path, *pair) | |
| if args.plot: | |
| print('Plotting...') | |
| run_mummer_plot(args.mummer_path, prefix) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment