Skip to content

Instantly share code, notes, and snippets.

@mrocklin
Created February 28, 2024 13:56
Show Gist options
  • Save mrocklin/7008801e2b5804584731675c6f52edb4 to your computer and use it in GitHub Desktop.
Save mrocklin/7008801e2b5804584731675c6f52edb4 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "49d8b433-5523-40d4-ab8d-dadd2cb17819",
"metadata": {},
"outputs": [],
"source": [
"from dask.distributed import LocalCluster\n",
"cluster = LocalCluster()\n",
"client = cluster.get_client()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f999d075-2f06-440a-aa40-d73601019409",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"({'id': '36053905400',\n",
" 'type': 'PushEvent',\n",
" 'actor': {'id': 146008414,\n",
" 'login': 'NikolaPejchinovski',\n",
" 'display_login': 'NikolaPejchinovski',\n",
" 'gravatar_id': '',\n",
" 'url': 'https://api.github.com/users/NikolaPejchinovski',\n",
" 'avatar_url': 'https://avatars.githubusercontent.com/u/146008414?'},\n",
" 'repo': {'id': 764096732,\n",
" 'name': 'NikolaPejchinovski/vivid-bits',\n",
" 'url': 'https://api.github.com/repos/NikolaPejchinovski/vivid-bits'},\n",
" 'payload': {'repository_id': 764096732,\n",
" 'push_id': 17291492339,\n",
" 'size': 1,\n",
" 'distinct_size': 1,\n",
" 'ref': 'refs/heads/master',\n",
" 'head': '4d4d01ef7c202aa28bd2a2b13a5bb03094b9c684',\n",
" 'before': '5b795756f44a92f8a2b0e0486d9c40f446798a13',\n",
" 'commits': [{'sha': '4d4d01ef7c202aa28bd2a2b13a5bb03094b9c684',\n",
" 'author': {'email': '[email protected]',\n",
" 'name': 'NikolaPejchinovski'},\n",
" 'message': 'Responsivness',\n",
" 'distinct': True,\n",
" 'url': 'https://api.github.com/repos/NikolaPejchinovski/vivid-bits/commits/4d4d01ef7c202aa28bd2a2b13a5bb03094b9c684'}]},\n",
" 'public': True,\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'id': '36053905402',\n",
" 'type': 'CreateEvent',\n",
" 'actor': {'id': 73640657,\n",
" 'login': 'Director-of-G',\n",
" 'display_login': 'Director-of-G',\n",
" 'gravatar_id': '',\n",
" 'url': 'https://api.github.com/users/Director-of-G',\n",
" 'avatar_url': 'https://avatars.githubusercontent.com/u/73640657?'},\n",
" 'repo': {'id': 764144270,\n",
" 'name': 'Director-of-G/jump-thu.github.io',\n",
" 'url': 'https://api.github.com/repos/Director-of-G/jump-thu.github.io'},\n",
" 'payload': {'ref': 'master',\n",
" 'ref_type': 'branch',\n",
" 'master_branch': 'master',\n",
" 'description': 'HomePage of Yongpeng JIANG / 姜永鹏的个人主页',\n",
" 'pusher_type': 'user'},\n",
" 'public': True,\n",
" 'created_at': '2024-02-27T15:00:00Z'})"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import json\n",
"import dask.bag as db\n",
"\n",
"b = db.read_text(\"2024-02-27-15.json\", blocksize=\"50 MiB\").map(json.loads)\n",
"b.take(2)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "4d12f3fb-cfd3-483e-9faa-981e3ffdc5fd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('PushEvent', 134977),\n",
" ('CreateEvent', 40357),\n",
" ('PullRequestEvent', 18526),\n",
" ('DeleteEvent', 13755),\n",
" ('IssueCommentEvent', 13095),\n",
" ('WatchEvent', 10720),\n",
" ('PullRequestReviewEvent', 8872),\n",
" ('PullRequestReviewCommentEvent', 5262),\n",
" ('IssuesEvent', 4856),\n",
" ('ForkEvent', 2758),\n",
" ('ReleaseEvent', 1441),\n",
" ('PublicEvent', 1030),\n",
" ('MemberEvent', 914),\n",
" ('GollumEvent', 363),\n",
" ('CommitCommentEvent', 296)]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b.pluck(\"type\").frequencies(sort=True).compute()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "512840de-89c2-44f3-9716-afe9c9faeb61",
"metadata": {},
"outputs": [],
"source": [
"def handle_PushEvent(d):\n",
" for commit in d[\"payload\"][\"commits\"]:\n",
" yield {\n",
" \"username\": d[\"actor\"][\"login\"],\n",
" \"repo\": d[\"repo\"][\"name\"],\n",
" \"sha\": commit[\"sha\"],\n",
" \"message\": commit[\"message\"],\n",
" \"created_at\": d[\"created_at\"],\n",
" }\n",
" \n",
"def handle_CreateEvent(d):\n",
" return {\n",
" \"username\": d[\"actor\"][\"login\"],\n",
" \"repo\": d[\"repo\"][\"name\"],\n",
" \"type\": d[\"payload\"][\"ref_type\"],\n",
" \"name\": d[\"payload\"][\"ref\"],\n",
" \"description\": d[\"payload\"][\"description\"],\n",
" \"created_at\": d[\"created_at\"],\n",
" }\n",
"\n",
"def handle_PullRequestEvent(d):\n",
" return {\n",
" \"username\": d[\"actor\"][\"login\"],\n",
" \"repo\": d[\"repo\"][\"name\"],\n",
" \"action\": d[\"payload\"][\"action\"],\n",
" \"number\": d[\"payload\"][\"number\"],\n",
" \"title\": d[\"payload\"][\"pull_request\"][\"title\"],\n",
" \"author\": d[\"payload\"][\"pull_request\"][\"user\"][\"login\"],\n",
" \"body\": d[\"payload\"][\"pull_request\"][\"body\"],\n",
" \"pr_created_at\": d[\"payload\"][\"pull_request\"][\"created_at\"],\n",
" \"created_at\": d[\"created_at\"],\n",
" }\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ebe0149b-355e-4346-bdd6-5a69bad5baac",
"metadata": {},
"outputs": [],
"source": [
"commits = (\n",
" b.filter(lambda d: d[\"type\"] == \"PushEvent\")\n",
" .map(handle_PushEvent)\n",
" .flatten()\n",
")\n",
"creates = (\n",
" b.filter(lambda d: d[\"type\"] == \"CreateEvent\")\n",
" .map(handle_CreateEvent)\n",
")\n",
"prs = (\n",
" b.filter(lambda d: d[\"type\"] == \"PullRequestEvent\")\n",
" .map(handle_PullRequestEvent)\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "034d952d-0e14-4a8e-9b1f-97f97d7c0473",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"({'username': 'NikolaPejchinovski',\n",
" 'repo': 'NikolaPejchinovski/vivid-bits',\n",
" 'sha': '4d4d01ef7c202aa28bd2a2b13a5bb03094b9c684',\n",
" 'message': 'Responsivness',\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'username': 'shakti-kc',\n",
" 'repo': 'shakti-kc/cosmo-search',\n",
" 'sha': '2daaab62f046ff6d9470ead8bafeb1ff0c4662dc',\n",
" 'message': 'First Commit',\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'username': 'B74LABgit',\n",
" 'repo': 'B74LABgit/CAM',\n",
" 'sha': 'ef17b98f59410ff4adfe9086fdccf06ae72e03bf',\n",
" 'message': 'committing files',\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'username': 'joshagilend',\n",
" 'repo': 'agilend/time-travel-db',\n",
" 'sha': 'a7f72079c10ca78833573c5472eeb6ba3b390b5b',\n",
" 'message': 'Fix for gunicorn',\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'username': 'msoilan',\n",
" 'repo': 'msoilan/msoilan.github.io',\n",
" 'sha': '5576fb6c1154e8bfdb10469a9c245d7a8bb125f1',\n",
" 'message': 'Testing local commit of index.html.',\n",
" 'created_at': '2024-02-27T15:00:00Z'})"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"commits.take(5)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "0ef42846-4d8e-48f3-85df-070dc5612af8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"({'username': 'Director-of-G',\n",
" 'repo': 'Director-of-G/jump-thu.github.io',\n",
" 'type': 'branch',\n",
" 'name': 'master',\n",
" 'description': 'HomePage of Yongpeng JIANG / 姜永鹏的个人主页',\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'username': 'mreza4456',\n",
" 'repo': 'mreza4456/Walknesia',\n",
" 'type': 'branch',\n",
" 'name': 'main',\n",
" 'description': None,\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'username': 'llekk',\n",
" 'repo': 'llekk/project-game-on-Pygame',\n",
" 'type': 'branch',\n",
" 'name': 'temp/upload-manifest/201964672-1709045996',\n",
" 'description': None,\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'username': 'lihxie',\n",
" 'repo': 'lihxie/DART',\n",
" 'type': 'branch',\n",
" 'name': 'temp/upload-manifest/201964652-1709045986',\n",
" 'description': None,\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'username': 'ManalWardi',\n",
" 'repo': 'ManalWardi/SnakeGame',\n",
" 'type': 'repository',\n",
" 'name': None,\n",
" 'description': None,\n",
" 'created_at': '2024-02-27T15:00:00Z'})"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"creates.take(5)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "58b5b337-f5a7-4946-adfb-d667f7fa20e5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"({'username': 'genraven1',\n",
" 'repo': 'GenravenGenesys/genesys',\n",
" 'action': 'closed',\n",
" 'number': 139,\n",
" 'title': 'Bump ip from 1.1.5 to 1.1.9 in /client',\n",
" 'author': 'dependabot[bot]',\n",
" 'body': 'Bumps [ip](https://github.com/indutny/node-ip) from 1.1.5 to 1.1.9.\\n<details>\\n<summary>Commits</summary>\\n<ul>\\n<li><a href=\"https://github.com/indutny/node-ip/commit/1ecbf2fd8c0cc85e44c3b587d2de641f50dc0217\"><code>1ecbf2f</code></a> 1.1.9</li>\\n<li><a href=\"https://github.com/indutny/node-ip/commit/6a3ada9b471b09d5f0f5be264911ab564bf67894\"><code>6a3ada9</code></a> lib: fixed CVE-2023-42282 and added unit test</li>\\n<li><a href=\"https://github.com/indutny/node-ip/commit/5dc3b2f3f4b4690fa9a918fa5085b6fca9979fca\"><code>5dc3b2f</code></a> 1.1.8</li>\\n<li><a href=\"https://github.com/indutny/node-ip/commit/8e6f28b23a89245eeac74ad2ef9eb0d27cb27e1c\"><code>8e6f28b</code></a> lib: even better node 6 support</li>\\n<li><a href=\"https://github.com/indutny/node-ip/commit/088c9e5664e43cda2a43c1e5b4acf921bb6613a9\"><code>088c9e5</code></a> 1.1.7</li>\\n<li><a href=\"https://github.com/indutny/node-ip/commit/1a4ca35ddc55d2987bb1a7e38e9a729a59174611\"><code>1a4ca35</code></a> lib: add back support for Node.js 6</li>\\n<li><a href=\"https://github.com/indutny/node-ip/commit/af82ef42adc513b20f7d1213421ca4b901d10f3d\"><code>af82ef4</code></a> 1.1.6</li>\\n<li><a href=\"https://github.com/indutny/node-ip/commit/dba19f6c0c6560bbc5bc18ad12976005900def87\"><code>dba19f6</code></a> package: exclude test folder from publishing</li>\\n<li><a href=\"https://github.com/indutny/node-ip/commit/7cd7f30991363d818cb0c6718a1ad5473c795b12\"><code>7cd7f30</code></a> ci: use github workflows</li>\\n<li><a href=\"https://github.com/indutny/node-ip/commit/4de50aec875d12b004849e11e19d6daf68b50c2d\"><code>4de50ae</code></a> lib: node 18 support</li>\\n<li>See full diff in <a href=\"https://github.com/indutny/node-ip/compare/v1.1.5...v1.1.9\">compare view</a></li>\\n</ul>\\n</details>\\n<br />\\n\\n\\n[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=ip&package-manager=npm_and_yarn&previous-version=1.1.5&new-version=1.1.9)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)\\n\\nDependabot will resolve any conflicts with this PR as long as you don\\'t alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`.\\n\\n[//]: # (dependabot-automerge-start)\\n[//]: # (dependabot-automerge-end)\\n\\n---\\n\\n<details>\\n<summary>Dependabot commands and options</summary>\\n<br />\\n\\nYou can trigger Dependabot actions by commenting on this PR:\\n- `@dependabot rebase` will rebase this PR\\n- `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it\\n- `@dependabot merge` will merge this PR after your CI passes on it\\n- `@dependabot squash and merge` will squash and merge this PR after your CI passes on it\\n- `@dependabot cancel merge` will cancel a previously requested merge and block automerging\\n- `@dependabot reopen` will reopen this PR if it is closed\\n- `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually\\n- `@dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency\\n- `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)\\n- `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)\\n- `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)\\nYou can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/GenravenGenesys/genesys/network/alerts).\\n\\n</details>',\n",
" 'pr_created_at': '2024-02-22T01:19:43Z',\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'username': 'bjjones18',\n",
" 'repo': 'bjjones18/library_repo_test',\n",
" 'action': 'opened',\n",
" 'number': 4,\n",
" 'title': 'Library setup',\n",
" 'author': 'bjjones18',\n",
" 'body': None,\n",
" 'pr_created_at': '2024-02-27T14:59:58Z',\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'username': 'pull[bot]',\n",
" 'repo': 'l3dlp-sandbox/baikal-docker',\n",
" 'action': 'closed',\n",
" 'number': 9,\n",
" 'title': '[pull] master from ckulka:master',\n",
" 'author': 'pull[bot]',\n",
" 'body': 'See [Commits](/l3dlp-sandbox/baikal-docker/pull/9/commits) and [Changes](/l3dlp-sandbox/baikal-docker/pull/9/files) for more details.\\n\\n-----\\nCreated by [<img src=\"https://prod.download/pull-18h-svg\" valign=\"bottom\"/> **pull[bot]**](https://github.com/wei/pull)\\n\\n_Can you help keep this open source service alive? **[💖 Please sponsor : )](https://prod.download/pull-pr-sponsor)**_',\n",
" 'pr_created_at': '2023-12-28T15:44:10Z',\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'username': 'NickIliev',\n",
" 'repo': 'telerik/fiddler-everywhere-docs',\n",
" 'action': 'closed',\n",
" 'number': 461,\n",
" 'title': 'docs: KB about capturing Go localhost traffic',\n",
" 'author': 'NickIliev',\n",
" 'body': 'related to https://kinvey.atlassian.net/browse/FID-5551',\n",
" 'pr_created_at': '2024-02-20T11:02:01Z',\n",
" 'created_at': '2024-02-27T15:00:00Z'},\n",
" {'username': 'user-sspmynxdvb',\n",
" 'repo': 'p1v2/hillel-pandas-2',\n",
" 'action': 'closed',\n",
" 'number': 2,\n",
" 'title': 'hw',\n",
" 'author': 'user-sspmynxdvb',\n",
" 'body': None,\n",
" 'pr_created_at': '2024-02-26T08:37:29Z',\n",
" 'created_at': '2024-02-27T15:00:00Z'})"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prs.take(5)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "fdf03271-234c-4335-9d6d-65b731349cd1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(204131, 40357, 18526)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Things work \n",
"import dask\n",
"\n",
"dask.compute(\n",
" commits.count(),\n",
" creates.count(),\n",
" prs.count(),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "791dacb9-2e32-43cd-bdea-47c442e1c522",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'username': 'wierdvanderhaar',\n",
" 'repo': 'crate/cratedb-examples',\n",
" 'sha': '68c45f2ec9017abbae4e268421a8d458a0b99c21',\n",
" 'message': 'Update dask[dataframe] requirement in /by-dataframe/dask\\n\\nUpdates the requirements on [dask[dataframe]](https://github.com/dask/dask) to permit the latest version.\\n- [Changelog](https://github.com/dask/dask/blob/main/docs/release-procedure.md)\\n- [Commits](https://github.com/dask/dask/compare/0.2.0...2024.2.1)\\n\\n---\\nupdated-dependencies:\\n- dependency-name: dask[dataframe]\\n dependency-type: direct:production\\n...\\n\\nSigned-off-by: dependabot[bot] <[email protected]>',\n",
" 'created_at': '2024-02-27T15:28:46Z'},\n",
" {'username': 'bjlittle',\n",
" 'repo': 'SciTools/iris',\n",
" 'sha': 'f4e2fb795ff93f20a53697979567b7a338d7b993',\n",
" 'message': 'RUFF: fix legacy use of np.numpy.random (#5786)\\n\\n* fix numpy (ruff NPY002)\\r\\n\\r\\n* handle test with dask array.',\n",
" 'created_at': '2024-02-27T15:36:09Z'},\n",
" {'username': 'ktyle',\n",
" 'repo': 'ktyle/projectpythia.github.io',\n",
" 'sha': '93844290cda213cd9f95688251124631707e49f5',\n",
" 'message': 'add dask tag to xarray tutorial (#339)',\n",
" 'created_at': '2024-02-27T15:43:20Z'},\n",
" {'username': 'tkknight',\n",
" 'repo': 'tkknight/iris',\n",
" 'sha': 'f4e2fb795ff93f20a53697979567b7a338d7b993',\n",
" 'message': 'RUFF: fix legacy use of np.numpy.random (#5786)\\n\\n* fix numpy (ruff NPY002)\\r\\n\\r\\n* handle test with dask array.',\n",
" 'created_at': '2024-02-27T15:54:42Z'}]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"commits.filter(lambda d: \" dask\" in d[\"message\"]).compute()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:coiled]",
"language": "python",
"name": "conda-env-coiled-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment