Last active
June 22, 2022 08:48
-
-
Save psychemedia/6fd3655782daf031299cfec03043d6bc to your computer and use it in GitHub Desktop.
Test read/write for pandas dataframes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"language_info": { | |
"codemirror_mode": { | |
"name": "python", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8" | |
}, | |
"kernelspec": { | |
"name": "python", | |
"display_name": "Pyolite", | |
"language": "python" | |
} | |
}, | |
"nbformat_minor": 4, | |
"nbformat": 4, | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"source": "import micropip", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "import pandas as pd\n\nimport numpy as np\n\nimport os\nsz = 100\nnp.random.seed(42)\ndf = pd.DataFrame({\"A\": np.random.randn(sz), \"B\": [1] * sz})\n\ndf", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_csv_write(df):\n df.to_csv(\"data/test.csv\")\n\ndef test_csv_read():\n return pd.read_csv(\"data/test.csv\")\n \ntest_csv_write(df)\ntest_csv_read()", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_json_write(df):\n df.to_json(\"test_json.json\")\n\ndef test_json_read():\n return pd.read_json(\"test_json.json\")\n \ntest_json_write(df)\ntest_json_read()", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "await micropip.install(\"html5lib\")", | |
"metadata": {}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "await micropip.install(\"lxml\")", | |
"metadata": {}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_html_write(df):\n df.to_html(\"./test_html.html\")\n\ndef test_html_read():\n return pd.read_html(\"./test_html.html\")[0]\n \ntest_html_write(df)\ntest_html_read()", | |
"metadata": {}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_xml_write(df):\n df.to_xml(\"test_xml.xml\")\n\ndef test_xml_read():\n return pd.read_xml(\"test_xml.xml\")\n \ntest_xml_write(df)\ntest_xml_read()", | |
"metadata": {}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "await micropip.install(\"openpyxl\")", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_xlsx_write(df):\n df.to_excel(\"test_xlsx.xlsx\")\n \ndef test_xlsx_read():\n return pd.read_excel(\"test_xlsx.xlsx\")\n \ntest_xlsx_write(df)\ntest_xlsx_read()", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "import sqlite3\nimport os\n\ndef test_sql_write(df):\n if os.path.exists(\"test.sql\"):\n os.remove(\"test.sql\")\n sql_db = sqlite3.connect(\"test.sql\")\n df.to_sql(name=\"test_table\", con=sql_db)\n sql_db.close()\n\n\ndef test_sql_read():\n sql_db = sqlite3.connect(\"test.sql\")\n df = pd.read_sql_query(\"select * from test_table\", sql_db)\n sql_db.close()\n return df\n \ntest_sql_write(df)\ntest_sql_read()", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "await micropip.install(\"tables\")", | |
"metadata": {}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_hdf_fixed_write(df):\n df.to_hdf(\"test_fixed.hdf\", \"test\", mode=\"w\")\n\ndef test_hdf_fixed_read():\n return pd.read_hdf(\"test_fixed.hdf\", \"test\")\n \ntest_hdf_fixed_write(df)\ntest_hdf_fixed_read()", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_hdf_fixed_write_compress(df):\n df.to_hdf(\"test_fixed_compress.hdf\", \"test\", mode=\"w\", complib=\"blosc\")\n\n\ndef test_hdf_fixed_read_compress():\n return pd.read_hdf(\"test_fixed_compress.hdf\", \"test\")\n\ntest_hdf_fixed_write_compress(df)\ntest_hdf_fixed_read_compress()", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_hdf_table_write(df):\n df.to_hdf(\"test_table.hdf\", \"test\", mode=\"w\", format=\"table\")\n\n\ndef test_hdf_table_read():\n return pd.read_hdf(\"test_table.hdf\", \"test\")\n \ntest_hdf_table_write(df)\ntest_hdf_table_read()", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_hdf_table_write_compress(df):\n df.to_hdf(\n \"test_table_compress.hdf\", \"test\", mode=\"w\", complib=\"blosc\", format=\"table\"\n )\n\ndef test_hdf_table_read_compress():\n return pd.read_hdf(\"test_table_compress.hdf\", \"test\")\n \ntest_hdf_table_write_compress(df)\ntest_hdf_table_read_compress()", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "await micropip.install(\"pyarrow\")", | |
"metadata": {}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_feather_write(df):\n df.to_feather(\"test.feather\")\n\n\ndef test_feather_read():\n return pd.read_feather(\"test.feather\")\n \ntest_feather_write(df)\ntest_feather_read()", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_pickle_write(df):\n df.to_pickle(\"test.pkl\")\n\n\ndef test_pickle_read():\n return pd.read_pickle(\"test.pkl\")\n \ntest_pickle_write(df)\ntest_pickle_read()", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_pickle_write_compress(df):\n df.to_pickle(\"test.pkl.compress\", compression=\"xz\")\n\n\ndef test_pickle_read_compress():\n return pd.read_pickle(\"test.pkl.compress\", compression=\"xz\")\n \ntest_pickle_write_compress(df)\ntest_pickle_read_compress()", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": "def test_parquet_write(df):\n df.to_parquet(\"test.parquet\")\n\n\ndef test_parquet_read():\n return pd.read_parquet(\"test.parquet\")\n\ntest_parquet_write(df)\ntest_parquet_read()", | |
"metadata": { | |
"trusted": true | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment