Skip to content

Instantly share code, notes, and snippets.

@ivirshup
Created August 14, 2024 20:58
Show Gist options
  • Save ivirshup/018bb8ae1ea7746db768c3672b8a007b to your computer and use it in GitHub Desktop.
Save ivirshup/018bb8ae1ea7746db768c3672b8a007b to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ubuntu/miniforge3/envs/cellxgene-census-dev/lib/python3.11/site-packages/tiledb/cloud/config.py:96: UserWarning: You must first login before you can run commands. Please run tiledb.cloud.login.\n",
" warnings.warn(\n"
]
}
],
"source": [
"import multiprocessing\n",
"multiprocessing.set_start_method(\"spawn\")\n",
"\n",
"import dask.array as da\n",
"import tiledb\n",
"import numpy as np\n",
"from scipy import sparse\n",
"\n",
"import dask.distributed as dd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
" <div style=\"width: 24px; height: 24px; background-color: #e1e1e1; border: 3px solid #9D9D9D; border-radius: 5px; position: absolute;\"> </div>\n",
" <div style=\"margin-left: 48px;\">\n",
" <h3 style=\"margin-bottom: 0px;\">Client</h3>\n",
" <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Client-51d833f8-5a7f-11ef-82a2-023ca6c22285</p>\n",
" <table style=\"width: 100%; text-align: left;\">\n",
"\n",
" <tr>\n",
" \n",
" <td style=\"text-align: left;\"><strong>Connection method:</strong> Cluster object</td>\n",
" <td style=\"text-align: left;\"><strong>Cluster type:</strong> distributed.LocalCluster</td>\n",
" \n",
" </tr>\n",
"\n",
" \n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:8787/status\" target=\"_blank\">http://127.0.0.1:8787/status</a>\n",
" </td>\n",
" <td style=\"text-align: left;\"></td>\n",
" </tr>\n",
" \n",
"\n",
" </table>\n",
"\n",
" \n",
"\n",
" \n",
" <details>\n",
" <summary style=\"margin-bottom: 20px;\"><h3 style=\"display: inline;\">Cluster Info</h3></summary>\n",
" <div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-mod-trusted jp-OutputArea-output\">\n",
" <div style=\"width: 24px; height: 24px; background-color: #e1e1e1; border: 3px solid #9D9D9D; border-radius: 5px; position: absolute;\">\n",
" </div>\n",
" <div style=\"margin-left: 48px;\">\n",
" <h3 style=\"margin-bottom: 0px; margin-top: 0px;\">LocalCluster</h3>\n",
" <p style=\"color: #9D9D9D; margin-bottom: 0px;\">fbd898ae</p>\n",
" <table style=\"width: 100%; text-align: left;\">\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Dashboard:</strong> <a href=\"http://127.0.0.1:8787/status\" target=\"_blank\">http://127.0.0.1:8787/status</a>\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Workers:</strong> 4\n",
" </td>\n",
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Total threads:</strong> 4\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Total memory:</strong> 123.85 GiB\n",
" </td>\n",
" </tr>\n",
" \n",
" <tr>\n",
" <td style=\"text-align: left;\"><strong>Status:</strong> running</td>\n",
" <td style=\"text-align: left;\"><strong>Using processes:</strong> True</td>\n",
"</tr>\n",
"\n",
" \n",
" </table>\n",
"\n",
" <details>\n",
" <summary style=\"margin-bottom: 20px;\">\n",
" <h3 style=\"display: inline;\">Scheduler Info</h3>\n",
" </summary>\n",
"\n",
" <div style=\"\">\n",
" <div>\n",
" <div style=\"width: 24px; height: 24px; background-color: #FFF7E5; border: 3px solid #FF6132; border-radius: 5px; position: absolute;\"> </div>\n",
" <div style=\"margin-left: 48px;\">\n",
" <h3 style=\"margin-bottom: 0px;\">Scheduler</h3>\n",
" <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Scheduler-d772a300-8a73-47cb-8d51-b233ae112baa</p>\n",
" <table style=\"width: 100%; text-align: left;\">\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Comm:</strong> tcp://127.0.0.1:45619\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Workers:</strong> 4\n",
" </td>\n",
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Dashboard:</strong> <a href=\"http://127.0.0.1:8787/status\" target=\"_blank\">http://127.0.0.1:8787/status</a>\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Total threads:</strong> 4\n",
" </td>\n",
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Started:</strong> Just now\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Total memory:</strong> 123.85 GiB\n",
" </td>\n",
" </tr>\n",
" </table>\n",
" </div>\n",
" </div>\n",
"\n",
" <details style=\"margin-left: 48px;\">\n",
" <summary style=\"margin-bottom: 20px;\">\n",
" <h3 style=\"display: inline;\">Workers</h3>\n",
" </summary>\n",
"\n",
" \n",
" <div style=\"margin-bottom: 20px;\">\n",
" <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n",
" <div style=\"margin-left: 48px;\">\n",
" <details>\n",
" <summary>\n",
" <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: 0</h4>\n",
" </summary>\n",
" <table style=\"width: 100%; text-align: left;\">\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Comm: </strong> tcp://127.0.0.1:39161\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Total threads: </strong> 1\n",
" </td>\n",
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:36119/status\" target=\"_blank\">http://127.0.0.1:36119/status</a>\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Memory: </strong> 30.96 GiB\n",
" </td>\n",
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Nanny: </strong> tcp://127.0.0.1:43707\n",
" </td>\n",
" <td style=\"text-align: left;\"></td>\n",
" </tr>\n",
" <tr>\n",
" <td colspan=\"2\" style=\"text-align: left;\">\n",
" <strong>Local directory: </strong> /tmp/dask-scratch-space/worker-y140q1k2\n",
" </td>\n",
" </tr>\n",
"\n",
" \n",
"\n",
" \n",
"\n",
" </table>\n",
" </details>\n",
" </div>\n",
" </div>\n",
" \n",
" <div style=\"margin-bottom: 20px;\">\n",
" <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n",
" <div style=\"margin-left: 48px;\">\n",
" <details>\n",
" <summary>\n",
" <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: 1</h4>\n",
" </summary>\n",
" <table style=\"width: 100%; text-align: left;\">\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Comm: </strong> tcp://127.0.0.1:34127\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Total threads: </strong> 1\n",
" </td>\n",
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:40553/status\" target=\"_blank\">http://127.0.0.1:40553/status</a>\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Memory: </strong> 30.96 GiB\n",
" </td>\n",
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Nanny: </strong> tcp://127.0.0.1:45867\n",
" </td>\n",
" <td style=\"text-align: left;\"></td>\n",
" </tr>\n",
" <tr>\n",
" <td colspan=\"2\" style=\"text-align: left;\">\n",
" <strong>Local directory: </strong> /tmp/dask-scratch-space/worker-qoyj7d6g\n",
" </td>\n",
" </tr>\n",
"\n",
" \n",
"\n",
" \n",
"\n",
" </table>\n",
" </details>\n",
" </div>\n",
" </div>\n",
" \n",
" <div style=\"margin-bottom: 20px;\">\n",
" <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n",
" <div style=\"margin-left: 48px;\">\n",
" <details>\n",
" <summary>\n",
" <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: 2</h4>\n",
" </summary>\n",
" <table style=\"width: 100%; text-align: left;\">\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Comm: </strong> tcp://127.0.0.1:37497\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Total threads: </strong> 1\n",
" </td>\n",
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:45205/status\" target=\"_blank\">http://127.0.0.1:45205/status</a>\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Memory: </strong> 30.96 GiB\n",
" </td>\n",
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Nanny: </strong> tcp://127.0.0.1:45659\n",
" </td>\n",
" <td style=\"text-align: left;\"></td>\n",
" </tr>\n",
" <tr>\n",
" <td colspan=\"2\" style=\"text-align: left;\">\n",
" <strong>Local directory: </strong> /tmp/dask-scratch-space/worker-bzkel24l\n",
" </td>\n",
" </tr>\n",
"\n",
" \n",
"\n",
" \n",
"\n",
" </table>\n",
" </details>\n",
" </div>\n",
" </div>\n",
" \n",
" <div style=\"margin-bottom: 20px;\">\n",
" <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n",
" <div style=\"margin-left: 48px;\">\n",
" <details>\n",
" <summary>\n",
" <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: 3</h4>\n",
" </summary>\n",
" <table style=\"width: 100%; text-align: left;\">\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Comm: </strong> tcp://127.0.0.1:36239\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Total threads: </strong> 1\n",
" </td>\n",
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:41359/status\" target=\"_blank\">http://127.0.0.1:41359/status</a>\n",
" </td>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Memory: </strong> 30.96 GiB\n",
" </td>\n",
" </tr>\n",
" <tr>\n",
" <td style=\"text-align: left;\">\n",
" <strong>Nanny: </strong> tcp://127.0.0.1:40621\n",
" </td>\n",
" <td style=\"text-align: left;\"></td>\n",
" </tr>\n",
" <tr>\n",
" <td colspan=\"2\" style=\"text-align: left;\">\n",
" <strong>Local directory: </strong> /tmp/dask-scratch-space/worker-qxm06rhx\n",
" </td>\n",
" </tr>\n",
"\n",
" \n",
"\n",
" \n",
"\n",
" </table>\n",
" </details>\n",
" </div>\n",
" </div>\n",
" \n",
"\n",
" </details>\n",
"</div>\n",
"\n",
" </details>\n",
" </div>\n",
"</div>\n",
" </details>\n",
" \n",
"\n",
" </div>\n",
"</div>"
],
"text/plain": [
"<Client: 'tcp://127.0.0.1:45619' processes=4 threads=4, memory=123.85 GiB>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cluster = dd.LocalCluster(n_workers=4, threads_per_worker=1)\n",
"client = dd.Client(cluster)\n",
"client"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"class TileDBSparseArrayWriteWrapper():\n",
" def __init__(self, uri):\n",
" self.uri = uri\n",
" # self.tiledb_array = tiledb_array\n",
" \n",
" # @property\n",
" # def tiledb_array(self):\n",
" # return tiledb.open(self.uri, mode=\"w\")\n",
"\n",
" def __setitem__(self, k: tuple[slice, ...], v: sparse.spmatrix):\n",
" assert all(isinstance(k_i, slice) for k_i in k)\n",
" row_slice, col_slice = k\n",
" row_offset = row_slice.start if row_slice.start is not None else 0\n",
" col_offset = col_slice.start if col_slice.start is not None else 0\n",
" v_coo = v.tocoo()\n",
" tiledb_array = tiledb.open(self.uri, mode=\"w\")\n",
" tiledb_array[v_coo.row + row_offset, v_coo.col + col_offset] = v.data\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def write_dask_array_as_tiledb(output_path, X, *, filters=tiledb.FilterList([])):\n",
" # Create output array\n",
" attrs = [tiledb.Attr(dtype=X.dtype, filters=filters)]\n",
" domain = tiledb.Domain(\n",
" tiledb.Dim(\n",
" name=\"obs\",\n",
" domain=(0, X.shape[0] - 1),\n",
" tile=min(X.shape[0], 256),\n",
" dtype=np.uint32,\n",
" filters=filters,\n",
" ),\n",
" tiledb.Dim(\n",
" name=\"var\",\n",
" domain=(0, X.shape[1] - 1),\n",
" tile=min(X.shape[1], 2048),\n",
" dtype=np.uint32,\n",
" filters=filters,\n",
" ),\n",
" )\n",
" schema = tiledb.ArraySchema(\n",
" domain=domain,\n",
" sparse=True,\n",
" allows_duplicates=True,\n",
" attrs=attrs,\n",
" cell_order=\"row-major\",\n",
" tile_order=\"row-major\",\n",
" capacity=1024000,\n",
" )\n",
" tiledb.Array.create(output_path, schema)\n",
" # X_write = tiledb.open(\"X\", mode=\"w\")\n",
"\n",
" # Write\n",
" X_write = TileDBSparseArrayWriteWrapper(output_path)\n",
" X.store(X_write, lock=False, compute=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def to_sparse_matrix(tdb_array: tiledb.SparseArray) -> sparse.spmatrix:\n",
" data, row, col = tdb_array[:].values()\n",
" return sparse.coo_matrix((data, (row, col)), shape=tdb_array.shape)\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Proof of concept with generated data:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <td>\n",
" <table style=\"border-collapse: collapse;\">\n",
" <thead>\n",
" <tr>\n",
" <td> </td>\n",
" <th> Array </th>\n",
" <th> Chunk </th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" \n",
" <tr>\n",
" <th> Shape </th>\n",
" <td> (10000, 10000) </td>\n",
" <td> (1000, 1000) </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Dask graph </th>\n",
" <td colspan=\"2\"> 100 chunks in 2 graph layers </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Data type </th>\n",
" <td colspan=\"2\"> int64 scipy.sparse._csr.csr_matrix </td>\n",
" </tr>\n",
" </tbody>\n",
" </table>\n",
" </td>\n",
" <td>\n",
" <svg width=\"170\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"120\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"12\" x2=\"120\" y2=\"12\" />\n",
" <line x1=\"0\" y1=\"24\" x2=\"120\" y2=\"24\" />\n",
" <line x1=\"0\" y1=\"36\" x2=\"120\" y2=\"36\" />\n",
" <line x1=\"0\" y1=\"48\" x2=\"120\" y2=\"48\" />\n",
" <line x1=\"0\" y1=\"60\" x2=\"120\" y2=\"60\" />\n",
" <line x1=\"0\" y1=\"72\" x2=\"120\" y2=\"72\" />\n",
" <line x1=\"0\" y1=\"84\" x2=\"120\" y2=\"84\" />\n",
" <line x1=\"0\" y1=\"96\" x2=\"120\" y2=\"96\" />\n",
" <line x1=\"0\" y1=\"108\" x2=\"120\" y2=\"108\" />\n",
" <line x1=\"0\" y1=\"120\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
" <line x1=\"12\" y1=\"0\" x2=\"12\" y2=\"120\" />\n",
" <line x1=\"24\" y1=\"0\" x2=\"24\" y2=\"120\" />\n",
" <line x1=\"36\" y1=\"0\" x2=\"36\" y2=\"120\" />\n",
" <line x1=\"48\" y1=\"0\" x2=\"48\" y2=\"120\" />\n",
" <line x1=\"60\" y1=\"0\" x2=\"60\" y2=\"120\" />\n",
" <line x1=\"72\" y1=\"0\" x2=\"72\" y2=\"120\" />\n",
" <line x1=\"84\" y1=\"0\" x2=\"84\" y2=\"120\" />\n",
" <line x1=\"96\" y1=\"0\" x2=\"96\" y2=\"120\" />\n",
" <line x1=\"108\" y1=\"0\" x2=\"108\" y2=\"120\" />\n",
" <line x1=\"120\" y1=\"0\" x2=\"120\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.0,0.0 120.0,0.0 120.0,120.0 0.0,120.0\" style=\"fill:#ECB172A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"60.000000\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >10000</text>\n",
" <text x=\"140.000000\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,140.000000,60.000000)\">10000</text>\n",
"</svg>\n",
" </td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"dask.array<csr_matrix, shape=(10000, 10000), dtype=int64, chunksize=(1000, 1000), chunktype=scipy.csr_matrix>"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = da.random.poisson(.01, size=(10_000, 10_000), chunks=(1000,1000))\n",
"X = X.map_blocks(sparse.csr_matrix, meta=sparse.csr_matrix((0, 0), dtype=X.dtype))\n",
"X"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.009800000000000001"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Checking level of sparsity\n",
"tmp = X[:1000, :1000].compute()\n",
"(tmp != 0).mean()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"!rm -r X_poisson"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ubuntu/miniforge3/envs/cellxgene-census-dev/lib/python3.11/site-packages/tiledb/cloud/config.py:96: UserWarning: You must first login before you can run commands. Please run tiledb.cloud.login.\n",
" warnings.warn(\n",
"/home/ubuntu/miniforge3/envs/cellxgene-census-dev/lib/python3.11/site-packages/tiledb/cloud/config.py:96: UserWarning: You must first login before you can run commands. Please run tiledb.cloud.login.\n",
" warnings.warn(\n",
"/home/ubuntu/miniforge3/envs/cellxgene-census-dev/lib/python3.11/site-packages/tiledb/cloud/config.py:96: UserWarning: You must first login before you can run commands. Please run tiledb.cloud.login.\n",
" warnings.warn(\n",
"/home/ubuntu/miniforge3/envs/cellxgene-census-dev/lib/python3.11/site-packages/tiledb/cloud/config.py:96: UserWarning: You must first login before you can run commands. Please run tiledb.cloud.login.\n",
" warnings.warn(\n"
]
}
],
"source": [
"write_dask_array_as_tiledb(\"X_poisson\", X)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Round trip test\n",
"\n",
"from_disk = to_sparse_matrix(tiledb.open(\"X_poisson\"))\n",
"from_dask = X.compute()\n",
"\n",
"assert (from_disk != from_dask).nnz == 0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test with real data"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"import h5py\n",
"from anndata.experimental import read_elem_as_dask # This will be in anndata 0.11, which is currently unreleased"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"f = h5py.File(\"/home/ubuntu/github/cellxgene-census/b58c5c69-cb34-443d-a273-e440e130649a.h5ad\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
" <tr>\n",
" <td>\n",
" <table style=\"border-collapse: collapse;\">\n",
" <thead>\n",
" <tr>\n",
" <td> </td>\n",
" <th> Array </th>\n",
" <th> Chunk </th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" \n",
" <tr>\n",
" <th> Shape </th>\n",
" <td> (1033409, 45854) </td>\n",
" <td> (1000, 45854) </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Dask graph </th>\n",
" <td colspan=\"2\"> 1034 chunks in 2 graph layers </td>\n",
" </tr>\n",
" <tr>\n",
" <th> Data type </th>\n",
" <td colspan=\"2\"> float32 scipy.sparse._csr.csr_matrix </td>\n",
" </tr>\n",
" </tbody>\n",
" </table>\n",
" </td>\n",
" <td>\n",
" <svg width=\"83\" height=\"170\" style=\"stroke:rgb(0,0,0);stroke-width:1\" >\n",
"\n",
" <!-- Horizontal lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"33\" y2=\"0\" style=\"stroke-width:2\" />\n",
" <line x1=\"0\" y1=\"6\" x2=\"33\" y2=\"6\" />\n",
" <line x1=\"0\" y1=\"12\" x2=\"33\" y2=\"12\" />\n",
" <line x1=\"0\" y1=\"18\" x2=\"33\" y2=\"18\" />\n",
" <line x1=\"0\" y1=\"25\" x2=\"33\" y2=\"25\" />\n",
" <line x1=\"0\" y1=\"31\" x2=\"33\" y2=\"31\" />\n",
" <line x1=\"0\" y1=\"37\" x2=\"33\" y2=\"37\" />\n",
" <line x1=\"0\" y1=\"44\" x2=\"33\" y2=\"44\" />\n",
" <line x1=\"0\" y1=\"50\" x2=\"33\" y2=\"50\" />\n",
" <line x1=\"0\" y1=\"56\" x2=\"33\" y2=\"56\" />\n",
" <line x1=\"0\" y1=\"63\" x2=\"33\" y2=\"63\" />\n",
" <line x1=\"0\" y1=\"69\" x2=\"33\" y2=\"69\" />\n",
" <line x1=\"0\" y1=\"75\" x2=\"33\" y2=\"75\" />\n",
" <line x1=\"0\" y1=\"82\" x2=\"33\" y2=\"82\" />\n",
" <line x1=\"0\" y1=\"88\" x2=\"33\" y2=\"88\" />\n",
" <line x1=\"0\" y1=\"94\" x2=\"33\" y2=\"94\" />\n",
" <line x1=\"0\" y1=\"101\" x2=\"33\" y2=\"101\" />\n",
" <line x1=\"0\" y1=\"107\" x2=\"33\" y2=\"107\" />\n",
" <line x1=\"0\" y1=\"113\" x2=\"33\" y2=\"113\" />\n",
" <line x1=\"0\" y1=\"120\" x2=\"33\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Vertical lines -->\n",
" <line x1=\"0\" y1=\"0\" x2=\"0\" y2=\"120\" style=\"stroke-width:2\" />\n",
" <line x1=\"33\" y1=\"0\" x2=\"33\" y2=\"120\" style=\"stroke-width:2\" />\n",
"\n",
" <!-- Colored Rectangle -->\n",
" <polygon points=\"0.0,0.0 33.76909709939219,0.0 33.76909709939219,120.0 0.0,120.0\" style=\"fill:#8B4903A0;stroke-width:0\"/>\n",
"\n",
" <!-- Text -->\n",
" <text x=\"16.884549\" y=\"140.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" >45854</text>\n",
" <text x=\"53.769097\" y=\"60.000000\" font-size=\"1.0rem\" font-weight=\"100\" text-anchor=\"middle\" transform=\"rotate(-90,53.769097,60.000000)\">1033409</text>\n",
"</svg>\n",
" </td>\n",
" </tr>\n",
"</table>"
],
"text/plain": [
"dask.array<make_dask_chunk, shape=(1033409, 45854), dtype=float32, chunksize=(1000, 45854), chunktype=scipy.csr_matrix>"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = read_elem_as_dask(f[\"X\"])\n",
"X"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"!rm -r X"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 18.1 s, sys: 4.04 s, total: 22.1 s\n",
"Wall time: 3min 54s\n"
]
}
],
"source": [
"%%time\n",
"write_dask_array_as_tiledb(\"X\", X, filters=tiledb.FilterList([tiledb.ZstdFilter(level=5)]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "cellxgene-census-dev",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment