Last active
December 10, 2015 17:58
-
-
Save ngcrawford/4470918 to your computer and use it in GitHub Desktop.
Quickie script to parallelize_jobs across cores on one processor using python's multiprocessing module. Essentially it's a poor man's version of GNU parallel.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import shlex | |
import argparse | |
import multiprocessing | |
from subprocess import Popen, PIPE | |
def get_args(): | |
"""Processes arguements""" | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-i','--infile', | |
nargs='?', | |
type=argparse.FileType('r'), | |
default=sys.stdin, | |
help='File containing commands; one per line. \ | |
Alternately accepts lines redirected from STDIN.\ | |
(e.g. "python parallelize_jobs.py -n 2 < cmds.txt")') | |
parser.add_argument('-n','--cores', | |
type=int, | |
default=multiprocessing.cpu_count(), | |
help='Number of cores to use. Defaults to \ | |
the number of available cores.') | |
parser.add_argument('-q','--quiet', | |
action='store_true', | |
help="Setting this flag turns off output printing.") | |
return parser.parse_args() | |
def job_runner(line): | |
"""Runs each job. Line needs to be properly parsable by shlex split.""" | |
cli_parts = shlex.split(line) | |
ft = Popen(cli_parts, stdin=PIPE, stderr=PIPE, stdout=PIPE).communicate() | |
return ft | |
def main(): | |
args = get_args() | |
pool = multiprocessing.Pool(args.cores) | |
# Clean lines, but don't read them into memory. | |
# Skip lines that are comments and/or start with #. | |
ilines = (line.strip() for line in args.infile \ | |
if line.startswith('#') is not True) | |
# Do not print output if -q/--quiet flag is set | |
if args.quiet is False: | |
for result in pool.map(job_runner, ilines): | |
for line in result: | |
sys.stdout.write(line) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Any reason why not to use GNU Parallel instead? Both require installing a single script. wget http://git.savannah.gnu.org/cgit/parallel.git/plain/src/parallel; chmod 755 parallel