Created
July 14, 2019 18:10
-
-
Save sagarkar10/5d877ec8eec758842bca2f2d7a2fb5b4 to your computer and use it in GitHub Desktop.
Comparison between Python MultiProcessing and MultiThreading
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import math | |
import time | |
import sys | |
from multiprocessing import Process | |
from threading import Thread | |
from multiprocessing.pool import ThreadPool as TPool | |
from multiprocessing import Pool as MPool | |
from loguru import logger | |
def costly_cpu_op(index): | |
t = time.time() | |
for i in range(1,100000000): | |
math.sqrt(i) | |
logger.info(f"{costly_cpu_op.__name__} at index:{index} took:{str(time.time()-t)} sec") | |
def costly_io_op(index): | |
t = time.time() | |
with open("train-balanced-sarcasm.csv", "r") as fr: | |
fr.read() | |
logger.info(f"{costly_io_op.__name__} at index:{index} took:{str(time.time()-t)} sec") | |
def run_simple_processing(func, count): | |
ts = time.time() | |
for index in range(count): | |
func(index) | |
logger.info(f"Time for simple-processing of {count} ops of {func.__name__} took :{time.time()-ts} sec") | |
def run_multiprocessing(func, count): | |
indexes = range(count) | |
process_list = [] | |
ts = time.time() | |
for i in indexes: | |
p = Process(target=func, args=(i,)) | |
process_list.append(p) | |
for process in process_list: | |
process.start() | |
for process in process_list: | |
process.join() | |
logger.info(f"Time for multi-processing of {count} ops of {func.__name__} took :{time.time()-ts} sec") | |
def run_multithreading(func, count): | |
indexes = range(count) | |
thread_list = [] | |
ts = time.time() | |
for i in indexes: | |
th = Thread(target=func, args=(i,)) | |
thread_list.append(th) | |
for thread in thread_list: | |
thread.start() | |
for thread in thread_list: | |
thread.join() | |
logger.info(f"Time for multi-threading of {count} ops of {func.__name__} took :{time.time()-ts} sec") | |
def run_pool_multiprocessing(func, count): | |
p = MPool(os.cpu_count()) | |
ts = time.time() | |
p.map_async(func, range(count)) | |
p.close() | |
p.join() | |
logger.info(f"Time for pool-multi-processing of {count} ops of {func.__name__} took :{time.time()-ts} sec") | |
def run_pool_multithreading(func, count): | |
p = TPool(os.cpu_count()) | |
ts = time.time() | |
p.map_async(func, range(count)) | |
p.close() | |
p.join() | |
logger.info(f"Time for pool-multi-threading of {count} ops of {func.__name__} took :{time.time()-ts} sec") | |
if __name__=="__main__": | |
run_mapper = { | |
"simple":run_simple_processing, | |
"mt":run_multithreading, | |
"pmt":run_pool_multithreading, | |
"mp":run_multiprocessing, | |
"pmp":run_pool_multiprocessing | |
} | |
func_mapper = { | |
"cpu":costly_cpu_op, | |
"io":costly_io_op | |
} | |
# run_type: [simple, mp, mt, pmp, pmt] | |
run_type = run_mapper[sys.argv[1]] | |
# func_type: [cpu, io] | |
func_type = func_mapper[sys.argv[2]] | |
# count: int (use os.cpu_count()) | |
count = os.cpu_count() | |
logger.add(f"logs/log_{sys.argv[1]}_{sys.argv[2]}_count_{count}", serialize=True, enqueue=True) | |
logger.info(f"Running {run_type.__name__} for {func_type.__name__} with count :{count}") | |
run_type(func_type, os.cpu_count()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment