Created
January 28, 2025 03:13
-
-
Save ivirshup/4c09193e30534bdcfe9710efed4f2b16 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import tiledbsoma\n", | |
"import pandas as pd, numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Fetching from census" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"BUILD_URI = \"s3://ivirshup-tmp/census-builds/presencemtx-2024-12-04T22:27:58Z\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"census = tiledbsoma.open(\n", | |
" f\"{BUILD_URI}/soma\",\n", | |
" context=tiledbsoma.SOMATileDBContext(tiledb_config={\"vfs.s3.region\": \"us-west-2\"})\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"census_info = census[\"census_info\"][\"datasets\"].read().concat().to_pandas()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"hs_var = census[\"census_data\"][\"homo_sapiens\"][\"ms\"][\"RNA\"][\"var\"].read().concat().to_pandas()\n", | |
"mm_var = census[\"census_data\"][\"mus_musculus\"][\"ms\"][\"RNA\"][\"var\"].read().concat().to_pandas()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"hs_presence = census[\"census_data\"][\"homo_sapiens\"][\"ms\"][\"RNA\"][\"feature_dataset_presence_matrix\"].read().coos().concat().to_scipy().tocsr().astype(bool).toarray()\n", | |
"mm_presence = census[\"census_data\"][\"mus_musculus\"][\"ms\"][\"RNA\"][\"feature_dataset_presence_matrix\"].read().coos().concat().to_scipy().tocsr().astype(bool).toarray()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Fetching \"ground truth\"\n", | |
"\n", | |
"Make a dict of dataset_id to var_names derived from the original h5ads." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/ubuntu/miniforge3/envs/census-api-dev/lib/python3.11/site-packages/anndata/utils.py:429: FutureWarning: Importing read_elem from `anndata.experimental` is deprecated. Import anndata.io.read_elem instead.\n", | |
" warnings.warn(msg, FutureWarning)\n" | |
] | |
} | |
], | |
"source": [ | |
"import h5py\n", | |
"import fsspec\n", | |
"import dask\n", | |
"from anndata.experimental import read_elem\n", | |
"# or from anndata.io import read_elem for 0.11\n", | |
"\n", | |
"fs = fsspec.filesystem('s3')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def read_var_names(uri, fs=fs):\n", | |
" with h5py.File(fs.open(uri)) as f:\n", | |
" index_key = f[\"var\"].attrs[\"_index\"]\n", | |
" return read_elem(f[\"var\"][index_key])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"h5ad_uris = [f\"{BUILD_URI}/h5ads/{dataset_id}.h5ad\" for dataset_id in census_info[\"dataset_id\"]]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
" <div style=\"width: 24px; height: 24px; background-color: #e1e1e1; border: 3px solid #9D9D9D; border-radius: 5px; position: absolute;\"> </div>\n", | |
" <div style=\"margin-left: 48px;\">\n", | |
" <h3 style=\"margin-bottom: 0px;\">Client</h3>\n", | |
" <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Client-7fc39a61-dd25-11ef-9c4e-023ca6c22285</p>\n", | |
" <table style=\"width: 100%; text-align: left;\">\n", | |
"\n", | |
" <tr>\n", | |
" \n", | |
" <td style=\"text-align: left;\"><strong>Connection method:</strong> Cluster object</td>\n", | |
" <td style=\"text-align: left;\"><strong>Cluster type:</strong> distributed.LocalCluster</td>\n", | |
" \n", | |
" </tr>\n", | |
"\n", | |
" \n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:8787/status\" target=\"_blank\">http://127.0.0.1:8787/status</a>\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\"></td>\n", | |
" </tr>\n", | |
" \n", | |
"\n", | |
" </table>\n", | |
"\n", | |
" \n", | |
"\n", | |
" \n", | |
" <details>\n", | |
" <summary style=\"margin-bottom: 20px;\"><h3 style=\"display: inline;\">Cluster Info</h3></summary>\n", | |
" <div class=\"jp-RenderedHTMLCommon jp-RenderedHTML jp-mod-trusted jp-OutputArea-output\">\n", | |
" <div style=\"width: 24px; height: 24px; background-color: #e1e1e1; border: 3px solid #9D9D9D; border-radius: 5px; position: absolute;\">\n", | |
" </div>\n", | |
" <div style=\"margin-left: 48px;\">\n", | |
" <h3 style=\"margin-bottom: 0px; margin-top: 0px;\">LocalCluster</h3>\n", | |
" <p style=\"color: #9D9D9D; margin-bottom: 0px;\">cb5b5520</p>\n", | |
" <table style=\"width: 100%; text-align: left;\">\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Dashboard:</strong> <a href=\"http://127.0.0.1:8787/status\" target=\"_blank\">http://127.0.0.1:8787/status</a>\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Workers:</strong> 8\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Total threads:</strong> 32\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Total memory:</strong> 123.85 GiB\n", | |
" </td>\n", | |
" </tr>\n", | |
" \n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\"><strong>Status:</strong> running</td>\n", | |
" <td style=\"text-align: left;\"><strong>Using processes:</strong> True</td>\n", | |
"</tr>\n", | |
"\n", | |
" \n", | |
" </table>\n", | |
"\n", | |
" <details>\n", | |
" <summary style=\"margin-bottom: 20px;\">\n", | |
" <h3 style=\"display: inline;\">Scheduler Info</h3>\n", | |
" </summary>\n", | |
"\n", | |
" <div style=\"\">\n", | |
" <div>\n", | |
" <div style=\"width: 24px; height: 24px; background-color: #FFF7E5; border: 3px solid #FF6132; border-radius: 5px; position: absolute;\"> </div>\n", | |
" <div style=\"margin-left: 48px;\">\n", | |
" <h3 style=\"margin-bottom: 0px;\">Scheduler</h3>\n", | |
" <p style=\"color: #9D9D9D; margin-bottom: 0px;\">Scheduler-a0f1e895-c786-460c-8a1c-33c2205aeb1d</p>\n", | |
" <table style=\"width: 100%; text-align: left;\">\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Comm:</strong> tcp://127.0.0.1:38885\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Workers:</strong> 8\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Dashboard:</strong> <a href=\"http://127.0.0.1:8787/status\" target=\"_blank\">http://127.0.0.1:8787/status</a>\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Total threads:</strong> 32\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Started:</strong> Just now\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Total memory:</strong> 123.85 GiB\n", | |
" </td>\n", | |
" </tr>\n", | |
" </table>\n", | |
" </div>\n", | |
" </div>\n", | |
"\n", | |
" <details style=\"margin-left: 48px;\">\n", | |
" <summary style=\"margin-bottom: 20px;\">\n", | |
" <h3 style=\"display: inline;\">Workers</h3>\n", | |
" </summary>\n", | |
"\n", | |
" \n", | |
" <div style=\"margin-bottom: 20px;\">\n", | |
" <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n", | |
" <div style=\"margin-left: 48px;\">\n", | |
" <details>\n", | |
" <summary>\n", | |
" <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: 0</h4>\n", | |
" </summary>\n", | |
" <table style=\"width: 100%; text-align: left;\">\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Comm: </strong> tcp://127.0.0.1:40785\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Total threads: </strong> 4\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:40389/status\" target=\"_blank\">http://127.0.0.1:40389/status</a>\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Memory: </strong> 15.48 GiB\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Nanny: </strong> tcp://127.0.0.1:45935\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\"></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td colspan=\"2\" style=\"text-align: left;\">\n", | |
" <strong>Local directory: </strong> /tmp/dask-scratch-space/worker-19rn_gm0\n", | |
" </td>\n", | |
" </tr>\n", | |
"\n", | |
" \n", | |
"\n", | |
" \n", | |
"\n", | |
" </table>\n", | |
" </details>\n", | |
" </div>\n", | |
" </div>\n", | |
" \n", | |
" <div style=\"margin-bottom: 20px;\">\n", | |
" <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n", | |
" <div style=\"margin-left: 48px;\">\n", | |
" <details>\n", | |
" <summary>\n", | |
" <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: 1</h4>\n", | |
" </summary>\n", | |
" <table style=\"width: 100%; text-align: left;\">\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Comm: </strong> tcp://127.0.0.1:42095\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Total threads: </strong> 4\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:42533/status\" target=\"_blank\">http://127.0.0.1:42533/status</a>\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Memory: </strong> 15.48 GiB\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Nanny: </strong> tcp://127.0.0.1:38721\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\"></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td colspan=\"2\" style=\"text-align: left;\">\n", | |
" <strong>Local directory: </strong> /tmp/dask-scratch-space/worker-y_h3uv3b\n", | |
" </td>\n", | |
" </tr>\n", | |
"\n", | |
" \n", | |
"\n", | |
" \n", | |
"\n", | |
" </table>\n", | |
" </details>\n", | |
" </div>\n", | |
" </div>\n", | |
" \n", | |
" <div style=\"margin-bottom: 20px;\">\n", | |
" <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n", | |
" <div style=\"margin-left: 48px;\">\n", | |
" <details>\n", | |
" <summary>\n", | |
" <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: 2</h4>\n", | |
" </summary>\n", | |
" <table style=\"width: 100%; text-align: left;\">\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Comm: </strong> tcp://127.0.0.1:42919\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Total threads: </strong> 4\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:35645/status\" target=\"_blank\">http://127.0.0.1:35645/status</a>\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Memory: </strong> 15.48 GiB\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Nanny: </strong> tcp://127.0.0.1:36869\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\"></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td colspan=\"2\" style=\"text-align: left;\">\n", | |
" <strong>Local directory: </strong> /tmp/dask-scratch-space/worker-p7wx8584\n", | |
" </td>\n", | |
" </tr>\n", | |
"\n", | |
" \n", | |
"\n", | |
" \n", | |
"\n", | |
" </table>\n", | |
" </details>\n", | |
" </div>\n", | |
" </div>\n", | |
" \n", | |
" <div style=\"margin-bottom: 20px;\">\n", | |
" <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n", | |
" <div style=\"margin-left: 48px;\">\n", | |
" <details>\n", | |
" <summary>\n", | |
" <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: 3</h4>\n", | |
" </summary>\n", | |
" <table style=\"width: 100%; text-align: left;\">\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Comm: </strong> tcp://127.0.0.1:42635\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Total threads: </strong> 4\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:44985/status\" target=\"_blank\">http://127.0.0.1:44985/status</a>\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Memory: </strong> 15.48 GiB\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Nanny: </strong> tcp://127.0.0.1:43843\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\"></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td colspan=\"2\" style=\"text-align: left;\">\n", | |
" <strong>Local directory: </strong> /tmp/dask-scratch-space/worker-xg2obcp3\n", | |
" </td>\n", | |
" </tr>\n", | |
"\n", | |
" \n", | |
"\n", | |
" \n", | |
"\n", | |
" </table>\n", | |
" </details>\n", | |
" </div>\n", | |
" </div>\n", | |
" \n", | |
" <div style=\"margin-bottom: 20px;\">\n", | |
" <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n", | |
" <div style=\"margin-left: 48px;\">\n", | |
" <details>\n", | |
" <summary>\n", | |
" <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: 4</h4>\n", | |
" </summary>\n", | |
" <table style=\"width: 100%; text-align: left;\">\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Comm: </strong> tcp://127.0.0.1:43009\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Total threads: </strong> 4\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:34861/status\" target=\"_blank\">http://127.0.0.1:34861/status</a>\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Memory: </strong> 15.48 GiB\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Nanny: </strong> tcp://127.0.0.1:38079\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\"></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td colspan=\"2\" style=\"text-align: left;\">\n", | |
" <strong>Local directory: </strong> /tmp/dask-scratch-space/worker-iwyuh458\n", | |
" </td>\n", | |
" </tr>\n", | |
"\n", | |
" \n", | |
"\n", | |
" \n", | |
"\n", | |
" </table>\n", | |
" </details>\n", | |
" </div>\n", | |
" </div>\n", | |
" \n", | |
" <div style=\"margin-bottom: 20px;\">\n", | |
" <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n", | |
" <div style=\"margin-left: 48px;\">\n", | |
" <details>\n", | |
" <summary>\n", | |
" <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: 5</h4>\n", | |
" </summary>\n", | |
" <table style=\"width: 100%; text-align: left;\">\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Comm: </strong> tcp://127.0.0.1:37681\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Total threads: </strong> 4\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:45525/status\" target=\"_blank\">http://127.0.0.1:45525/status</a>\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Memory: </strong> 15.48 GiB\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Nanny: </strong> tcp://127.0.0.1:44711\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\"></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td colspan=\"2\" style=\"text-align: left;\">\n", | |
" <strong>Local directory: </strong> /tmp/dask-scratch-space/worker-zduyjxqz\n", | |
" </td>\n", | |
" </tr>\n", | |
"\n", | |
" \n", | |
"\n", | |
" \n", | |
"\n", | |
" </table>\n", | |
" </details>\n", | |
" </div>\n", | |
" </div>\n", | |
" \n", | |
" <div style=\"margin-bottom: 20px;\">\n", | |
" <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n", | |
" <div style=\"margin-left: 48px;\">\n", | |
" <details>\n", | |
" <summary>\n", | |
" <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: 6</h4>\n", | |
" </summary>\n", | |
" <table style=\"width: 100%; text-align: left;\">\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Comm: </strong> tcp://127.0.0.1:46225\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Total threads: </strong> 4\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:37113/status\" target=\"_blank\">http://127.0.0.1:37113/status</a>\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Memory: </strong> 15.48 GiB\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Nanny: </strong> tcp://127.0.0.1:44969\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\"></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td colspan=\"2\" style=\"text-align: left;\">\n", | |
" <strong>Local directory: </strong> /tmp/dask-scratch-space/worker-7eumurfa\n", | |
" </td>\n", | |
" </tr>\n", | |
"\n", | |
" \n", | |
"\n", | |
" \n", | |
"\n", | |
" </table>\n", | |
" </details>\n", | |
" </div>\n", | |
" </div>\n", | |
" \n", | |
" <div style=\"margin-bottom: 20px;\">\n", | |
" <div style=\"width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;\"> </div>\n", | |
" <div style=\"margin-left: 48px;\">\n", | |
" <details>\n", | |
" <summary>\n", | |
" <h4 style=\"margin-bottom: 0px; display: inline;\">Worker: 7</h4>\n", | |
" </summary>\n", | |
" <table style=\"width: 100%; text-align: left;\">\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Comm: </strong> tcp://127.0.0.1:41581\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Total threads: </strong> 4\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Dashboard: </strong> <a href=\"http://127.0.0.1:38115/status\" target=\"_blank\">http://127.0.0.1:38115/status</a>\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Memory: </strong> 15.48 GiB\n", | |
" </td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td style=\"text-align: left;\">\n", | |
" <strong>Nanny: </strong> tcp://127.0.0.1:43573\n", | |
" </td>\n", | |
" <td style=\"text-align: left;\"></td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td colspan=\"2\" style=\"text-align: left;\">\n", | |
" <strong>Local directory: </strong> /tmp/dask-scratch-space/worker-piimz93m\n", | |
" </td>\n", | |
" </tr>\n", | |
"\n", | |
" \n", | |
"\n", | |
" \n", | |
"\n", | |
" </table>\n", | |
" </details>\n", | |
" </div>\n", | |
" </div>\n", | |
" \n", | |
"\n", | |
" </details>\n", | |
"</div>\n", | |
"\n", | |
" </details>\n", | |
" </div>\n", | |
"</div>\n", | |
" </details>\n", | |
" \n", | |
"\n", | |
" </div>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
"<Client: 'tcp://127.0.0.1:38885' processes=8 threads=32, memory=123.85 GiB>" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from dask.distributed import Client\n", | |
"client = Client()\n", | |
"client" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tasks = [dask.delayed(read_var_names)(uri) for uri in h5ad_uris]\n", | |
"names = dict(zip(census_info[\"dataset_id\"], dask.compute(tasks)[0]))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Checking" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"results = []\n", | |
"for i, row in enumerate(hs_presence):\n", | |
" if row.sum() == 0:\n", | |
" results.append(True)\n", | |
" continue\n", | |
" dataset_id = census_info.loc[i, \"dataset_id\"]\n", | |
" if hs_var.loc[row, \"feature_id\"].isin(names[dataset_id]).all():\n", | |
" results.append(True)\n", | |
" else:\n", | |
" results.append(False)\n", | |
"assert all(results)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"results = []\n", | |
"for i, row in enumerate(mm_presence):\n", | |
" if row.sum() == 0:\n", | |
" results.append(True)\n", | |
" continue\n", | |
" dataset_id = census_info.loc[i, \"dataset_id\"]\n", | |
" if mm_var.loc[row, \"feature_id\"].isin(names[dataset_id]).all():\n", | |
" results.append(True)\n", | |
" else:\n", | |
" results.append(False)\n", | |
"assert all(results)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "census-api-dev", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment