Skip to content

Instantly share code, notes, and snippets.

@eriknw
Created December 6, 2019 15:18
Show Gist options
  • Save eriknw/82c204b7923f079e267ab245f7228f0a to your computer and use it in GitHub Desktop.
Save eriknw/82c204b7923f079e267ab245f7228f0a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import dask\n",
"import numpy as np\n",
"import dask.array as da\n",
"import dask.array.linalg\n",
"import xarray as xr\n",
"from dask.order import order\n",
"\n",
"def num_edges(dsk):\n",
" return sum(len(dask.core.get_dependencies(dsk, k)) for k, v in dsk.items())"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(41928, 60440)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = da.random.random((5000, 5000), chunks=(100, 100))\n",
"b = a.map_overlap(lambda e: 2 * e, depth=1)\n",
"dsk = dict(b.dask)\n",
"len(dsk), num_edges(dsk)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"617 ms ± 12.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # Master"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"534 ms ± 14.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # This PR"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(102886, 148165)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from skimage import filters\n",
"ar = da.random.random((4e4, 4e4), chunks=(512, 512))\n",
"res = ar.map_overlap(filters.gaussian, depth=10)\n",
"dsk = dict(res.dask)\n",
"len(dsk), num_edges(dsk)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.71 s ± 7.81 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # Master"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.49 s ± 7.84 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # This PR"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(9823, 12822)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ar = da.random.random((6000,64), chunks=(10,64))\n",
"u, s, v = da.linalg.svd_compressed(ar, 100, 0)\n",
"dsk = dask.base.collections_to_dsk([u, s, v], optimize_graph=False)\n",
"len(dsk), num_edges(dsk)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"720 ms ± 13.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # Master"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"255 ms ± 4.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # This PR"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(6217, 9216)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dsk = dask.base.collections_to_dsk([u, s, v], optimize_graph=True)\n",
"len(dsk), num_edges(dsk)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"476 ms ± 9.99 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # Master"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"48.3 ms ± 586 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit order(dsk) # This PR"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(54134, 73333)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = da.random.normal(size=(4e6, 30e2), chunks=(2e4, 3e1))\n",
"x = x.rechunk((int(1e4 / 10), int(30e2)))\n",
"xx = x.T.dot(x)\n",
"dsk = dict(xx.dask)\n",
"len(dsk), num_edges(dsk)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"704 ms ± 13.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # Master"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"690 ms ± 12.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # This PR"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(83008, 100508)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = da.random.random((1000, 1000), chunks=(20, 20))\n",
"b = a.map_overlap(lambda e: 2 * e, depth=1)\n",
"dsk = dask.base.collections_to_dsk(b)\n",
"len(dsk), num_edges(dsk)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.2 s ± 8.63 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # Master"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.13 s ± 3.87 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # This PR"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(77136, 97035)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = da.random.random((10000, 1000000), chunks=(1, 1000000))\n",
"a = xr.DataArray(data, dims=['time', 'x'],\n",
" coords={'day': ('time', np.arange(10000) % 100)})\n",
"clim = a.groupby('day').mean(dim='time')\n",
"anom = a.groupby('day') - clim\n",
"anom_mean = anom.mean(dim='time')\n",
"dsk = dict(anom_mean.data.dask)\n",
"len(dsk), num_edges(dsk)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1.17 s ± 8.06 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # Master"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"917 ms ± 7.54 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit order(dsk) # This PR"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"https://github.com/dask/dask/pull/3066#issuecomment-358107519"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(3200, 4094)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n = 100\n",
"x = da.random.normal(size=(n, 100), chunks=(1, 100))\n",
"y = da.random.normal(size=(n,), chunks=(1,))\n",
"xy = (x * y[:, None]).cumsum(axis=0)\n",
"xx = (x[:, None, :] * x[:, :, None]).cumsum(axis=0)\n",
"beta = da.stack([da.linalg.solve(xx[i], xy[i]) for i in range(xx.shape[0])],\n",
" axis=0)\n",
"ey = (x * beta).sum(axis=1)\n",
"dsk = dict(ey.dask)\n",
"len(dsk), num_edges(dsk)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"29.3 ms ± 937 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit order(dsk) # Master"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"24.8 ms ± 691 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%timeit order(dsk) # This PR"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment