asdf8601 · May 27, 2020 09:56
diff --git a/usage_example.py b/usage_example.py
 from distributed import Client, as_completed
 from dask import delayed
 from time import sleep
 import numpy as np
 from pprint import pprint


 # Define a time-consuming task
 def foo(n):
    print("Starting the {:d}-second task".format(n))
    sleep(n)
    print("Ending the {:d}-second task".format(n))
    return n

 ########################################################################################################################
 # Goal: parallel execution of the above task for the following set of params:
 ########################################################################################################################
 # Specify task details
 args_iterable = range(5, 15)  # launch task with 10 different sets of arguments
 is_pure = False  # specifiy task purity (https://toolz.readthedocs.io/en/latest/purity.html)
 # optional: it enables a finer control of caching

 c = Client(processes=False)  # setup cluster: scheduler + workers (choose to deploy them either as processes or threads)
 # We can also connect to a remotely deployed cluster (if available to us)
 pprint(c.scheduler_info())  # print information about the cluster we are currently connecting to

 ########################################################################################################################
 # Solution 1: using the standard concurrent.futures interface (PEP-3148)
 ########################################################################################################################

 # Send tasks
 send_mode = 'map'
 if send_mode == 'submit':
    fut_list = []
    for i in args_iterable:
        f = c.submit(foo, i, pure=is_pure)  # start executing foo(10) immediately, pure functions are cached
        fut_list.append(f)
 elif send_mode == 'map':
    fut_list = c.map(foo, args_iterable, pure=is_pure)


 # Receive task results
 rec_mode = 'as_completed'
 if rec_mode == 'gather':
    # Wait for all tasks to finish and gather the results in a list
    out = c.gather(fut_list)
    print('Result:', out)
 elif rec_mode == 'result':
    # Wait for the first one of the tasks to finish
    out = fut_list[0].result()
    print('Result:', out)
 elif rec_mode == 'as_completed':
    # Retrieve each task result as they finish
    for future, out in as_completed(fut_list, with_results=True):
        print('Result:', out)

 ########################################################################################################################
 # Solution 2: define a computation DAG by chaining Delayed-type objects
 ########################################################################################################################
 # Build a list of tasks and aggregat them into a parent task object, but do not execute them yet
 task_list = []
 for i in args_iterable:
    task_list.append(delayed(foo, pure=is_pure)(i))
 parent = delayed(task_list)

 # Execute tasks synchronously (interpreter blocks until all results are computed)
 out = parent.compute()
 print('Result:', out)
	from distributed import Client, as_completed
	from dask import delayed
	from time import sleep
	import numpy as np
	from pprint import pprint


	# Define a time-consuming task
	def foo(n):
	print("Starting the {:d}-second task".format(n))
	sleep(n)
	print("Ending the {:d}-second task".format(n))
	return n

	########################################################################################################################
	# Goal: parallel execution of the above task for the following set of params:
	########################################################################################################################
	# Specify task details
	args_iterable = range(5, 15) # launch task with 10 different sets of arguments
	is_pure = False # specifiy task purity (https://toolz.readthedocs.io/en/latest/purity.html)
	# optional: it enables a finer control of caching

	c = Client(processes=False) # setup cluster: scheduler + workers (choose to deploy them either as processes or threads)
	# We can also connect to a remotely deployed cluster (if available to us)
	pprint(c.scheduler_info()) # print information about the cluster we are currently connecting to

	########################################################################################################################
	# Solution 1: using the standard concurrent.futures interface (PEP-3148)
	########################################################################################################################

	# Send tasks
	send_mode = 'map'
	if send_mode == 'submit':
	fut_list = []
	for i in args_iterable:
	f = c.submit(foo, i, pure=is_pure) # start executing foo(10) immediately, pure functions are cached
	fut_list.append(f)
	elif send_mode == 'map':
	fut_list = c.map(foo, args_iterable, pure=is_pure)


	# Receive task results
	rec_mode = 'as_completed'
	if rec_mode == 'gather':
	# Wait for all tasks to finish and gather the results in a list
	out = c.gather(fut_list)
	print('Result:', out)
	elif rec_mode == 'result':
	# Wait for the first one of the tasks to finish
	out = fut_list[0].result()
	print('Result:', out)
	elif rec_mode == 'as_completed':
	# Retrieve each task result as they finish
	for future, out in as_completed(fut_list, with_results=True):
	print('Result:', out)

	########################################################################################################################
	# Solution 2: define a computation DAG by chaining Delayed-type objects
	########################################################################################################################
	# Build a list of tasks and aggregat them into a parent task object, but do not execute them yet
	task_list = []
	for i in args_iterable:
	task_list.append(delayed(foo, pure=is_pure)(i))
	parent = delayed(task_list)

	# Execute tasks synchronously (interpreter blocks until all results are computed)
	out = parent.compute()
	print('Result:', out)