tonyfast · January 25, 2025 07:16
diff --git a/2025-01-24-cold-docs.ipynb b/2025-01-24-cold-docs.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "bc87bb4d-16f9-4ed6-875b-5b786bce9402",
   "metadata": {},
   "outputs": [],
   "source": [
    "import polars\n",
    "from nbformat import v4\n",
    "\n",
    "input = list(map(anyio.Path, glob.glob(str(pathlib.Path(\"~/Documents/syllabus/docs/*.ipynb\").expanduser()))))\n",
    "files = polars.Series(\"path\", input).to_frame()\n",
    "files = files.with_columns(\n",
    "    file=files[\"path\"].map_elements(lambda x: str(x._path), polars.String)\n",
    ")\n",
    "\n",
    "def enumerate_iterable(series, name=\"id\", start=0):\n",
    "    target_type = series.dtype.base_type()(polars.Struct(series.dtype.inner.fields + [polars.Field(name, polars.Int64)]))\n",
    "    return series.map_elements(lambda x: [{**body, name: i} for (i, body) in enumerate(x, start)], target_type)\n",
    "\n",
    "# https://github.com/jupyter/nbconvert/blob/5f508ebad9471876f53a59c737bd5f47b2b4c163/share/templates/base/display_priority.j2\n",
    "display_priority = \"\"\"text/html text/markdown image/svg+xml image/png image/jpeg text/plain application/pdf\n",
    "text/latex text/vnd.mermaid application/javascript application/vnd.jupyter.widget-view+json\"\"\".strip().split()\n",
    "\n",
    "async def read_text(path):\n",
    "    if isinstance(path, (anyio.Path, pathlib.Path)):\n",
    "        if path.suffix == \".ipynb\":\n",
    "            return await path.read_text()\n",
    "        elif path.suffix == \".md\":\n",
    "            return json.dumps(\n",
    "                v4.new_notebook(cells=[v4.new_markdown_cell((await  path.read_text()).splitlines(True))])\n",
    "            )\n",
    "        elif path.suffix == \".py\":\n",
    "            return json.dumps(v4.new_notebook(cells=[v4.new_code_cell((await path.read_text()).splitlines(True))]))\n",
    "    return json.dumps(v4.new_notebook())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "d263521e-7f72-447e-bb12-e7cfc8ecaa87",
   "metadata": {},
   "outputs": [],
   "source": [
    "import bs4\n",
    "Soup = partial(bs4.BeautifulSoup, features=\"lxml\")\n",
    "TEMPLATE = Soup(Path(\"~/Documents/refnb/packages/refnb-core/refnb-core/index.html\").expanduser().read_text())\n",
    "CELLS = TEMPLATE.select_one(\"template.cells\").select_one(\"tbody\")\n",
    "CELL = TEMPLATE.select_one(\"template.cell\").select_one(\"tr\")\n",
    "OUTPUTS = TEMPLATE.select_one(\"template.outputs\").select_one(\"details\")\n",
    "OUTPUT = TEMPLATE.select_one(\"template.output\").select_one(\"tr\")\n",
    "assert all((CELLS, CELL, OUTPUTS, OUTPUT)), \"bad selector\"\n",
    "\n",
    "def clone(el):\n",
    "    from bs4 import Tag, NavigableString\n",
    "    if isinstance(el, NavigableString): return type(el)(el)\n",
    "    copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix)\n",
    "    copy.attrs = dict(el.attrs)\n",
    "    for k, v in copy.attrs.items():\n",
    "        if isinstance(v, list):\n",
    "            copy[k] = [*v]\n",
    "    for attr in (\"can_be_empty_element\", \"hidden\"): setattr(copy, attr, getattr(el, attr))\n",
    "    for child in el.contents: copy.append(clone(child))\n",
    "    return copy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "3e2d9459-5e40-4acf-8c6b-d04ef8699cef",
   "metadata": {},
   "outputs": [],
   "source": [
    "def display_dispatch(t, v, metadata):\n",
    "    if t == \"text/plain\":\n",
    "        yield \"\".join(v)\n",
    "    elif t == \"text/markdown\":\n",
    "        yield from Soup(get_markdown().render(\"\".join(v))).body.children\n",
    "    elif t == \"text/html\":\n",
    "        yield from Soup(\"\".join(v)).body.children\n",
    "    elif t.startswith(\"text\"):\n",
    "        # highlight form mimetype\n",
    "        yield highlight(\"\".join(v))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "eac11e7e-6ec9-4097-8f8f-ed3f93c81d9f",
   "metadata": {},
   "outputs": [],
   "source": [
    "IDREFS = (\"aria-labelledby aria-describedby aria-owns aria-controls for form id\".split())\n",
    "def populate(nb):\n",
    "    tpl = clone(TEMPLATE)\n",
    "    table = tpl.select_one(\"main.notebook table.cells\")\n",
    "    footer = table.select_one(\"tfoot\")\n",
    "    cells = clone(CELLS)\n",
    "\n",
    "    for cell in nb[\"cells\"]:\n",
    "        id = cell[\"id\"]\n",
    "        if cell[\"metadata\"].get(\"name\"):\n",
    "            id = cell[\"metadata\"][\"name\"]\n",
    "        row = clone(CELL)\n",
    "        row[\"class\"].append(cell[\"cell_type\"])\n",
    "        # link back to the document\n",
    "        row.select_one(\"th.doc a\").append(str(nb[\"file\"]))\n",
    "        row.select_one(\"th.cell a\").append(str(cell[\"cell\"]))\n",
    "        row.select_one(\"th.id input\").attrs[\"value\"] = id\n",
    "        row.select_one(\"td.execution_count output\").append(str(cell[\"execution_count\"] or \"\") )\n",
    "        row.select_one(F\"td.cell_type option[value={cell['cell_type']}]\").attrs[\"selected\"] = True\n",
    "        source = \"\".join(cell[\"source\"])\n",
    "        if cell[\"cell_type\"] == \"markdown\":\n",
    "            cell[\"outputs\"] = [dict(data={\"text/markdown\": source}, output_type=\"display_data\")]\n",
    "        row.select_one(\"td.source textarea\").append(source)\n",
    "        row.select_one(\"td.source section.highlight\").append(source)\n",
    "        row.select_one(\"td.form form\")\n",
    "        row.select_one(\"td.metadata\")\n",
    "        if cell.get(\"outputs\"):\n",
    "            details = clone(OUTPUTS)\n",
    "            outputs = details.select_one(\"table\")\n",
    "            for output in cell[\"outputs\"]:\n",
    "                if output[\"output_type\"] in {\"display_data\", \"execute_result\"}:\n",
    "                    body = TEMPLATE.new_tag(\"tbody\")\n",
    "                    body.attrs.setdefault(\"class\", []).append(output['output_type'])\n",
    "                    for t in itertools.chain(\n",
    "                        filter(output[\"data\"].__contains__, display_priority),\n",
    "                        filter(lambda x: x not in display_priority, output[\"data\"])\n",
    "                    ):\n",
    "                        v = output[\"data\"][t] or \"<body></body>\"\n",
    "                        # we can include ALL the bundles OR the preferred one\n",
    "\n",
    "                        entry = clone(OUTPUT)\n",
    "                        entry.select_one(\"td.execution_count output\").append(str(cell[\"execution_count\"] or \"\") )\n",
    "                        entry.select_one(\"td.output_type label\").append(t)\n",
    "                        if t not in entry[\"class\"]: \n",
    "                            entry[\"class\"].append(t)\n",
    "                        \n",
    "                        body.append(entry)\n",
    "                        try:\n",
    "                            entry.select_one(\"td.data\").extend(display_dispatch(t, v, output.get(\"metadata\", {})))\n",
    "                        except Exception as e: \n",
    "                            raise e\n",
    "                        entry.select_one(\"td.metadata\")\n",
    "                        body.append(entry)\n",
    "                    outputs.append(body)\n",
    "                elif output[\"output_type\"] == \"stream\":\n",
    "                    # stdout/stderr\n",
    "                    entry = clone(OUTPUT)\n",
    "                    entry[\"class\"] += F\" {output['output_type']}\"\n",
    "                    entry.select_one(\"td.name\").append(output[\"name\"])\n",
    "                    entry.select_one(\"td.text samp\").append(\"\".join(output[\"text\"]))\n",
    "                    outputs.append(entry)\n",
    "                elif output[\"output_type\"] == \"error\":\n",
    "                    entry = clone(OUTPUT)\n",
    "                    entry[\"class\"] += F\" {output['output_type']}\"\n",
    "                    entry.select_one(\"td.ename\").append(output[\"ename\"])\n",
    "                    entry.select_one(\"td.evalue samp\").append(\"\".join(output[\"evalue\"]))\n",
    "                    entry.select_one(\"td.traceback samp\").append(\"\".join(output[\"traceback\"]))\n",
    "                    entry.select_one(\"td.execution_count output\").append(str(cell[\"execution_count\"] or \"\") )\n",
    "                    outputs.append(entry)\n",
    "            row.select_one(\"td.outputs\").append(outputs)\n",
    "            \n",
    "\n",
    "        if \"slide_type\" in cell[\"metadata\"]:\n",
    "            row[\"class\"].append(cell[\"metadata\"][\"slide_type\"])\n",
    "        if \"execution\" in cell[\"metadata\"]:\n",
    "            row.select_one(\"td.started_at time\").append(cell[\"metadata\"][\"execution\"][\"iopub.execute_input\"])\n",
    "            row.select_one(\"td.completed_at time\").append(cell[\"metadata\"][\"execution\"][\"iopub.execute_reply\"])\n",
    "            # do the math for the time\n",
    "            row.select_one(\"td.elapsed output time\")\n",
    "        if cell[\"metadata\"].get(\"collapsed\"):\n",
    "            row.select_one(\"td.outputs\")[\"class\"].append(\"collapsed\")\n",
    "        if cell[\"metadata\"].get(\"scrolled\"):\n",
    "            row.select_one(\"td.outputs\")[\"class\"].append(\"scrolled\")\n",
    "        if cell[\"metadata\"].get(\"jupyter\"):\n",
    "            if cell[\"metadata\"][\"jupyter\"].get(\"source_hidden\"):\n",
    "                row.select_one(\"td.source\")[\"hidden\"] = \"\"\n",
    "            if cell[\"metadata\"][\"jupyter\"].get(\"outputs_hidden\"):\n",
    "                row.select_one(\"td.outputs\")[\"hidden\"] = \"\"\n",
    "        row[\"class\"].extend(map(slugify.slugify, cell[\"metadata\"].get(\"tags\", \"\")))\n",
    "        \n",
    "        set_ids(row, id)\n",
    "        cells.append(row)\n",
    "    footer.insert_before(cells)\n",
    "    inject_toc(tpl)\n",
    "    return tpl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "25ce0453-de81-4a43-a7d4-4407108c9f1f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import slugify"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "e8366ea6-bacd-4bc6-b698-62b5a0b04058",
   "metadata": {},
   "outputs": [],
   "source": [
    "def inject_toc(document):\n",
    "    toc = table = document.select_one(\"table.toc.headings\")\n",
    "    ROW = table.select_one(\"template tr\")\n",
    "    tbody = TEMPLATE.new_tag(\"tbody\")\n",
    "    for h in document.select(\"table.cells h1,h2,h3,h4,h5,h6\"):\n",
    "        row = clone(ROW)        \n",
    "        a = row.select_one(\"td.heading>a\")\n",
    "        heading = h.get_text()\n",
    "        if \"id\" not in h: h[\"id\"] = slugify.slugify(heading)\n",
    "        a.append(heading)\n",
    "        a[\"href\"] = \"#\" + h[\"id\"]\n",
    "        row.select_one(\"th.level\").append(h.name[1])\n",
    "        row.select_one(\"td.description>p\")\n",
    "        tbody.append(row)\n",
    "    table.append(tbody)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "b95ae161-508d-402f-bb5c-dd6b3a75de2d",
   "metadata": {},
   "outputs": [],
   "source": [
    "idref_selection = \",\".join(map(\"[{}]\".format, IDREFS))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "2a5eb1ec-7785-421b-bd7a-101b07eea936",
   "metadata": {},
   "outputs": [],
   "source": [
    "def set_ids(selection, id=\"\"):\n",
    "    for s in [selection] + selection.select(\",\".join(map(\"[{}]\".format, IDREFS))):\n",
    "        for idref in IDREFS:\n",
    "            if idref in s.attrs:\n",
    "                value = s.attrs[idref]\n",
    "                if value == \":\":\n",
    "                    s[idref] = id\n",
    "                elif isinstance(value, str):\n",
    "                    s[idref] = \" \".join((F\"{id}-{x[1:]}\" if x.startswith(\":\") else x) for x in value.split())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "6ffa0606-3682-4fb2-95a5-d54a065d0c4b",
   "metadata": {},
   "outputs": [],
   "source": [
    "@functools.lru_cache(1)\n",
    "def get_markdown():\n",
    "    from markdown_it import MarkdownIt\n",
    "    return MarkdownIt()\n",
    "\n",
    "def highlight(source, lang=\"python\", attrs=None):\n",
    "    import pygments\n",
    "    try:\n",
    "        return str(pygments.highlight(\n",
    "            source,\n",
    "            pygments.lexers.get_lexer_by_name(lang),\n",
    "            pygments.formatters.get_formatter_by_name(\"html5\")\n",
    "        )).pre\n",
    "    except:\n",
    "        return Soup(f\"\"\"<pre><code class=\"{lang}\">{html.escape(source)}</code></pre>\"\"\").pre"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "74ffa475-94ca-4c5c-a4b1-7835fb264786",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "contents = await asyncio.gather(*map(read_text, files[\"path\"]))\n",
    "contents = polars.Series(\n",
    "    \"contents\", contents, strict=False\n",
    ").str.json_decode().struct.unnest().with_columns(\n",
    "    file=files[\"file\"]\n",
    "    # , path=files[\"path\"] # causes a panic cause its a python object\n",
    ")\n",
    "contents = contents.with_columns(cells=enumerate_iterable(contents[\"cells\"], \"cell\", 1))\n",
    "CONTENTS_COLUMNS = [*contents.columns]\n",
    "contents = contents.with_columns(\n",
    "    contents.map_rows(lambda x: (populate(dict(zip(CONTENTS_COLUMNS, x))),)).rename({\"column_0\": \"html\"})\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "558a6247-bd4c-4205-ad39-ff07ddc8e67d",
   "metadata": {},
   "outputs": [],
   "source": [
    "cells = contents[[\"file\", \"cells\"]].explode(\"cells\").unnest(\"cells\")\n",
    "cells = cells.with_columns(source=cells[\"source\"].map_elements(\"\".join, polars.String))\n",
    "outputs = cells[[\"file\", \"id\", \"outputs\"]].explode(\"outputs\").unnest(\"outputs\")\n",
    "displays = outputs[[\"file\", \"id\", \"data\"]].drop_nulls().unnest(\"data\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "bdc6206b-1e66-4cc2-a2b3-3fb2842412a1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "        <iframe\n",
       "            width=\"100%\"\n",
       "            height=\"600\"\n",
       "            src=\"test.html\"\n",
       "            frameborder=\"0\"\n",
       "            allowfullscreen\n",
       "            \n",
       "        ></iframe>\n",
       "        "
      ],
      "text/plain": [
       "<IPython.lib.display.IFrame at 0x781ba2539190>"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test = pathlib.Path(\"test.html\")\n",
    "test.write_text(contents[\"html\"][0].body.prettify())\n",
    "IFrame(\"test.html\", width=\"100%\", height=600)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4c979859-e516-40e4-9247-32749879efb5",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33e17f3f-a1fa-4963-aa62-d8902ed596ca",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "599eafd7-8410-4a7d-84d7-7797efedf724",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6ee2397-0d24-48fd-82c1-e12a7b867a8d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f3c50ca-6ab2-476c-83d3-8a554b8c62d6",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "802e5732-024f-4ec1-a467-8ed38ec02738",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57e8e84d-6a69-4f46-b4b4-69a3b0eb1c2a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0950074d-be87-4b44-ab15-54ea19c0e521",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:p311] *",
   "language": "python",
   "name": "conda-env-p311-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"id": "bc87bb4d-16f9-4ed6-875b-5b786bce9402",
	"metadata": {},
	"outputs": [],
	"source": [
	"import polars\n",
	"from nbformat import v4\n",
	"\n",
	"input = list(map(anyio.Path, glob.glob(str(pathlib.Path(\"~/Documents/syllabus/docs/*.ipynb\").expanduser()))))\n",
	"files = polars.Series(\"path\", input).to_frame()\n",
	"files = files.with_columns(\n",
	" file=files[\"path\"].map_elements(lambda x: str(x._path), polars.String)\n",
	")\n",
	"\n",
	"def enumerate_iterable(series, name=\"id\", start=0):\n",
	" target_type = series.dtype.base_type()(polars.Struct(series.dtype.inner.fields + [polars.Field(name, polars.Int64)]))\n",
	" return series.map_elements(lambda x: [{**body, name: i} for (i, body) in enumerate(x, start)], target_type)\n",
	"\n",
	"# https://github.com/jupyter/nbconvert/blob/5f508ebad9471876f53a59c737bd5f47b2b4c163/share/templates/base/display_priority.j2\n",
	"display_priority = \"\"\"text/html text/markdown image/svg+xml image/png image/jpeg text/plain application/pdf\n",
	"text/latex text/vnd.mermaid application/javascript application/vnd.jupyter.widget-view+json\"\"\".strip().split()\n",
	"\n",
	"async def read_text(path):\n",
	" if isinstance(path, (anyio.Path, pathlib.Path)):\n",
	" if path.suffix == \".ipynb\":\n",
	" return await path.read_text()\n",
	" elif path.suffix == \".md\":\n",
	" return json.dumps(\n",
	" v4.new_notebook(cells=[v4.new_markdown_cell((await path.read_text()).splitlines(True))])\n",
	" )\n",
	" elif path.suffix == \".py\":\n",
	" return json.dumps(v4.new_notebook(cells=[v4.new_code_cell((await path.read_text()).splitlines(True))]))\n",
	" return json.dumps(v4.new_notebook())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"id": "d263521e-7f72-447e-bb12-e7cfc8ecaa87",
	"metadata": {},
	"outputs": [],
	"source": [
	"import bs4\n",
	"Soup = partial(bs4.BeautifulSoup, features=\"lxml\")\n",
	"TEMPLATE = Soup(Path(\"~/Documents/refnb/packages/refnb-core/refnb-core/index.html\").expanduser().read_text())\n",
	"CELLS = TEMPLATE.select_one(\"template.cells\").select_one(\"tbody\")\n",
	"CELL = TEMPLATE.select_one(\"template.cell\").select_one(\"tr\")\n",
	"OUTPUTS = TEMPLATE.select_one(\"template.outputs\").select_one(\"details\")\n",
	"OUTPUT = TEMPLATE.select_one(\"template.output\").select_one(\"tr\")\n",
	"assert all((CELLS, CELL, OUTPUTS, OUTPUT)), \"bad selector\"\n",
	"\n",
	"def clone(el):\n",
	" from bs4 import Tag, NavigableString\n",
	" if isinstance(el, NavigableString): return type(el)(el)\n",
	" copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix)\n",
	" copy.attrs = dict(el.attrs)\n",
	" for k, v in copy.attrs.items():\n",
	" if isinstance(v, list):\n",
	" copy[k] = [*v]\n",
	" for attr in (\"can_be_empty_element\", \"hidden\"): setattr(copy, attr, getattr(el, attr))\n",
	" for child in el.contents: copy.append(clone(child))\n",
	" return copy"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"id": "3e2d9459-5e40-4acf-8c6b-d04ef8699cef",
	"metadata": {},
	"outputs": [],
	"source": [
	"def display_dispatch(t, v, metadata):\n",
	" if t == \"text/plain\":\n",
	" yield \"\".join(v)\n",
	" elif t == \"text/markdown\":\n",
	" yield from Soup(get_markdown().render(\"\".join(v))).body.children\n",
	" elif t == \"text/html\":\n",
	" yield from Soup(\"\".join(v)).body.children\n",
	" elif t.startswith(\"text\"):\n",
	" # highlight form mimetype\n",
	" yield highlight(\"\".join(v))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"id": "eac11e7e-6ec9-4097-8f8f-ed3f93c81d9f",
	"metadata": {},
	"outputs": [],
	"source": [
	"IDREFS = (\"aria-labelledby aria-describedby aria-owns aria-controls for form id\".split())\n",
	"def populate(nb):\n",
	" tpl = clone(TEMPLATE)\n",
	" table = tpl.select_one(\"main.notebook table.cells\")\n",
	" footer = table.select_one(\"tfoot\")\n",
	" cells = clone(CELLS)\n",
	"\n",
	" for cell in nb[\"cells\"]:\n",
	" id = cell[\"id\"]\n",
	" if cell[\"metadata\"].get(\"name\"):\n",
	" id = cell[\"metadata\"][\"name\"]\n",
	" row = clone(CELL)\n",
	" row[\"class\"].append(cell[\"cell_type\"])\n",
	" # link back to the document\n",
	" row.select_one(\"th.doc a\").append(str(nb[\"file\"]))\n",
	" row.select_one(\"th.cell a\").append(str(cell[\"cell\"]))\n",
	" row.select_one(\"th.id input\").attrs[\"value\"] = id\n",
	" row.select_one(\"td.execution_count output\").append(str(cell[\"execution_count\"] or \"\") )\n",
	" row.select_one(F\"td.cell_type option[value={cell['cell_type']}]\").attrs[\"selected\"] = True\n",
	" source = \"\".join(cell[\"source\"])\n",
	" if cell[\"cell_type\"] == \"markdown\":\n",
	" cell[\"outputs\"] = [dict(data={\"text/markdown\": source}, output_type=\"display_data\")]\n",
	" row.select_one(\"td.source textarea\").append(source)\n",
	" row.select_one(\"td.source section.highlight\").append(source)\n",
	" row.select_one(\"td.form form\")\n",
	" row.select_one(\"td.metadata\")\n",
	" if cell.get(\"outputs\"):\n",
	" details = clone(OUTPUTS)\n",
	" outputs = details.select_one(\"table\")\n",
	" for output in cell[\"outputs\"]:\n",
	" if output[\"output_type\"] in {\"display_data\", \"execute_result\"}:\n",
	" body = TEMPLATE.new_tag(\"tbody\")\n",
	" body.attrs.setdefault(\"class\", []).append(output['output_type'])\n",
	" for t in itertools.chain(\n",
	" filter(output[\"data\"].__contains__, display_priority),\n",
	" filter(lambda x: x not in display_priority, output[\"data\"])\n",
	" ):\n",
	" v = output[\"data\"][t] or \"<body></body>\"\n",
	" # we can include ALL the bundles OR the preferred one\n",
	"\n",
	" entry = clone(OUTPUT)\n",
	" entry.select_one(\"td.execution_count output\").append(str(cell[\"execution_count\"] or \"\") )\n",
	" entry.select_one(\"td.output_type label\").append(t)\n",
	" if t not in entry[\"class\"]: \n",
	" entry[\"class\"].append(t)\n",
	" \n",
	" body.append(entry)\n",
	" try:\n",
	" entry.select_one(\"td.data\").extend(display_dispatch(t, v, output.get(\"metadata\", {})))\n",
	" except Exception as e: \n",
	" raise e\n",
	" entry.select_one(\"td.metadata\")\n",
	" body.append(entry)\n",
	" outputs.append(body)\n",
	" elif output[\"output_type\"] == \"stream\":\n",
	" # stdout/stderr\n",
	" entry = clone(OUTPUT)\n",
	" entry[\"class\"] += F\" {output['output_type']}\"\n",
	" entry.select_one(\"td.name\").append(output[\"name\"])\n",
	" entry.select_one(\"td.text samp\").append(\"\".join(output[\"text\"]))\n",
	" outputs.append(entry)\n",
	" elif output[\"output_type\"] == \"error\":\n",
	" entry = clone(OUTPUT)\n",
	" entry[\"class\"] += F\" {output['output_type']}\"\n",
	" entry.select_one(\"td.ename\").append(output[\"ename\"])\n",
	" entry.select_one(\"td.evalue samp\").append(\"\".join(output[\"evalue\"]))\n",
	" entry.select_one(\"td.traceback samp\").append(\"\".join(output[\"traceback\"]))\n",
	" entry.select_one(\"td.execution_count output\").append(str(cell[\"execution_count\"] or \"\") )\n",
	" outputs.append(entry)\n",
	" row.select_one(\"td.outputs\").append(outputs)\n",
	" \n",
	"\n",
	" if \"slide_type\" in cell[\"metadata\"]:\n",
	" row[\"class\"].append(cell[\"metadata\"][\"slide_type\"])\n",
	" if \"execution\" in cell[\"metadata\"]:\n",
	" row.select_one(\"td.started_at time\").append(cell[\"metadata\"][\"execution\"][\"iopub.execute_input\"])\n",
	" row.select_one(\"td.completed_at time\").append(cell[\"metadata\"][\"execution\"][\"iopub.execute_reply\"])\n",
	" # do the math for the time\n",
	" row.select_one(\"td.elapsed output time\")\n",
	" if cell[\"metadata\"].get(\"collapsed\"):\n",
	" row.select_one(\"td.outputs\")[\"class\"].append(\"collapsed\")\n",
	" if cell[\"metadata\"].get(\"scrolled\"):\n",
	" row.select_one(\"td.outputs\")[\"class\"].append(\"scrolled\")\n",
	" if cell[\"metadata\"].get(\"jupyter\"):\n",
	" if cell[\"metadata\"][\"jupyter\"].get(\"source_hidden\"):\n",
	" row.select_one(\"td.source\")[\"hidden\"] = \"\"\n",
	" if cell[\"metadata\"][\"jupyter\"].get(\"outputs_hidden\"):\n",
	" row.select_one(\"td.outputs\")[\"hidden\"] = \"\"\n",
	" row[\"class\"].extend(map(slugify.slugify, cell[\"metadata\"].get(\"tags\", \"\")))\n",
	" \n",
	" set_ids(row, id)\n",
	" cells.append(row)\n",
	" footer.insert_before(cells)\n",
	" inject_toc(tpl)\n",
	" return tpl"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"id": "25ce0453-de81-4a43-a7d4-4407108c9f1f",
	"metadata": {},
	"outputs": [],
	"source": [
	"import slugify"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"id": "e8366ea6-bacd-4bc6-b698-62b5a0b04058",
	"metadata": {},
	"outputs": [],
	"source": [
	"def inject_toc(document):\n",
	" toc = table = document.select_one(\"table.toc.headings\")\n",
	" ROW = table.select_one(\"template tr\")\n",
	" tbody = TEMPLATE.new_tag(\"tbody\")\n",
	" for h in document.select(\"table.cells h1,h2,h3,h4,h5,h6\"):\n",
	" row = clone(ROW) \n",
	" a = row.select_one(\"td.heading>a\")\n",
	" heading = h.get_text()\n",
	" if \"id\" not in h: h[\"id\"] = slugify.slugify(heading)\n",
	" a.append(heading)\n",
	" a[\"href\"] = \"#\" + h[\"id\"]\n",
	" row.select_one(\"th.level\").append(h.name[1])\n",
	" row.select_one(\"td.description>p\")\n",
	" tbody.append(row)\n",
	" table.append(tbody)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"id": "b95ae161-508d-402f-bb5c-dd6b3a75de2d",
	"metadata": {},
	"outputs": [],
	"source": [
	"idref_selection = \",\".join(map(\"[{}]\".format, IDREFS))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"id": "2a5eb1ec-7785-421b-bd7a-101b07eea936",
	"metadata": {},
	"outputs": [],
	"source": [
	"def set_ids(selection, id=\"\"):\n",
	" for s in [selection] + selection.select(\",\".join(map(\"[{}]\".format, IDREFS))):\n",
	" for idref in IDREFS:\n",
	" if idref in s.attrs:\n",
	" value = s.attrs[idref]\n",
	" if value == \":\":\n",
	" s[idref] = id\n",
	" elif isinstance(value, str):\n",
	" s[idref] = \" \".join((F\"{id}-{x[1:]}\" if x.startswith(\":\") else x) for x in value.split())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"id": "6ffa0606-3682-4fb2-95a5-d54a065d0c4b",
	"metadata": {},
	"outputs": [],
	"source": [
	"@functools.lru_cache(1)\n",
	"def get_markdown():\n",
	" from markdown_it import MarkdownIt\n",
	" return MarkdownIt()\n",
	"\n",
	"def highlight(source, lang=\"python\", attrs=None):\n",
	" import pygments\n",
	" try:\n",
	" return str(pygments.highlight(\n",
	" source,\n",
	" pygments.lexers.get_lexer_by_name(lang),\n",
	" pygments.formatters.get_formatter_by_name(\"html5\")\n",
	" )).pre\n",
	" except:\n",
	" return Soup(f\"\"\"<pre><code class=\"{lang}\">{html.escape(source)}</code></pre>\"\"\").pre"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"id": "74ffa475-94ca-4c5c-a4b1-7835fb264786",
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"contents = await asyncio.gather(*map(read_text, files[\"path\"]))\n",
	"contents = polars.Series(\n",
	" \"contents\", contents, strict=False\n",
	").str.json_decode().struct.unnest().with_columns(\n",
	" file=files[\"file\"]\n",
	" # , path=files[\"path\"] # causes a panic cause its a python object\n",
	")\n",
	"contents = contents.with_columns(cells=enumerate_iterable(contents[\"cells\"], \"cell\", 1))\n",
	"CONTENTS_COLUMNS = [*contents.columns]\n",
	"contents = contents.with_columns(\n",
	" contents.map_rows(lambda x: (populate(dict(zip(CONTENTS_COLUMNS, x))),)).rename({\"column_0\": \"html\"})\n",
	")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"id": "558a6247-bd4c-4205-ad39-ff07ddc8e67d",
	"metadata": {},
	"outputs": [],
	"source": [
	"cells = contents[[\"file\", \"cells\"]].explode(\"cells\").unnest(\"cells\")\n",
	"cells = cells.with_columns(source=cells[\"source\"].map_elements(\"\".join, polars.String))\n",
	"outputs = cells[[\"file\", \"id\", \"outputs\"]].explode(\"outputs\").unnest(\"outputs\")\n",
	"displays = outputs[[\"file\", \"id\", \"data\"]].drop_nulls().unnest(\"data\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"id": "bdc6206b-1e66-4cc2-a2b3-3fb2842412a1",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"\n",
	" <iframe\n",
	" width=\"100%\"\n",
	" height=\"600\"\n",
	" src=\"test.html\"\n",
	" frameborder=\"0\"\n",
	" allowfullscreen\n",
	" \n",
	" ></iframe>\n",
	" "
	],
	"text/plain": [
	"<IPython.lib.display.IFrame at 0x781ba2539190>"
	]
	},
	"execution_count": 24,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"test = pathlib.Path(\"test.html\")\n",
	"test.write_text(contents[\"html\"][0].body.prettify())\n",
	"IFrame(\"test.html\", width=\"100%\", height=600)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "4c979859-e516-40e4-9247-32749879efb5",
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "33e17f3f-a1fa-4963-aa62-d8902ed596ca",
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "599eafd7-8410-4a7d-84d7-7797efedf724",
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "e6ee2397-0d24-48fd-82c1-e12a7b867a8d",
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "3f3c50ca-6ab2-476c-83d3-8a554b8c62d6",
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "802e5732-024f-4ec1-a467-8ed38ec02738",
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "57e8e84d-6a69-4f46-b4b4-69a3b0eb1c2a",
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "0950074d-be87-4b44-ab15-54ea19c0e521",
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python [conda env:p311] *",
	"language": "python",
	"name": "conda-env-p311-py"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.11.6"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}