Created
November 3, 2017 03:43
-
-
Save Spaxe/94e130c73a1b835d3c30ea672ec7e5fe to your computer and use it in GitHub Desktop.
Converting CSV to GeoJSON Python Notebook
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas\n", | |
"import json" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>n_code</th>\n", | |
" <th>active</th>\n", | |
" <th>last_update</th>\n", | |
" <th>update_count</th>\n", | |
" <th>input_date</th>\n", | |
" <th>lat</th>\n", | |
" <th>lng</th>\n", | |
" <th>x_900913</th>\n", | |
" <th>y_900913</th>\n", | |
" <th>extension</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1282002</td>\n", | |
" <td>1</td>\n", | |
" <td>2016-02-19 10:30:19</td>\n", | |
" <td>1</td>\n", | |
" <td>2016-02-19 10:30:19</td>\n", | |
" <td>37.000</td>\n", | |
" <td>142.650</td>\n", | |
" <td>15879725</td>\n", | |
" <td>4439106</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1282001</td>\n", | |
" <td>1</td>\n", | |
" <td>2016-02-19 10:30:19</td>\n", | |
" <td>1</td>\n", | |
" <td>2016-02-19 10:30:19</td>\n", | |
" <td>39.467</td>\n", | |
" <td>128.917</td>\n", | |
" <td>14350974</td>\n", | |
" <td>4788787</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1282000</td>\n", | |
" <td>1</td>\n", | |
" <td>2016-02-19 10:30:19</td>\n", | |
" <td>1</td>\n", | |
" <td>2016-02-19 10:30:19</td>\n", | |
" <td>38.983</td>\n", | |
" <td>129.517</td>\n", | |
" <td>14417766</td>\n", | |
" <td>4719236</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1281999</td>\n", | |
" <td>1</td>\n", | |
" <td>2016-02-19 10:30:19</td>\n", | |
" <td>1</td>\n", | |
" <td>2016-02-19 10:30:19</td>\n", | |
" <td>39.333</td>\n", | |
" <td>129.067</td>\n", | |
" <td>14367672</td>\n", | |
" <td>4769483</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1281998</td>\n", | |
" <td>1</td>\n", | |
" <td>2016-02-19 10:30:19</td>\n", | |
" <td>1</td>\n", | |
" <td>2016-02-19 10:30:19</td>\n", | |
" <td>39.167</td>\n", | |
" <td>129.283</td>\n", | |
" <td>14391717</td>\n", | |
" <td>4745621</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" n_code active last_update update_count input_date \\\n", | |
"0 1282002 1 2016-02-19 10:30:19 1 2016-02-19 10:30:19 \n", | |
"1 1282001 1 2016-02-19 10:30:19 1 2016-02-19 10:30:19 \n", | |
"2 1282000 1 2016-02-19 10:30:19 1 2016-02-19 10:30:19 \n", | |
"3 1281999 1 2016-02-19 10:30:19 1 2016-02-19 10:30:19 \n", | |
"4 1281998 1 2016-02-19 10:30:19 1 2016-02-19 10:30:19 \n", | |
"\n", | |
" lat lng x_900913 y_900913 extension \n", | |
"0 37.000 142.650 15879725 4439106 NaN \n", | |
"1 39.467 128.917 14350974 4788787 NaN \n", | |
"2 38.983 129.517 14417766 4719236 NaN \n", | |
"3 39.333 129.067 14367672 4769483 NaN \n", | |
"4 39.167 129.283 14391717 4745621 NaN " | |
] | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data_frame = pandas.read_csv(\n", | |
" '../web/public/data/historical_colours_world.csv',\n", | |
" parse_dates=[\n", | |
" 'last_update',\n", | |
" 'input_date',\n", | |
" 'date_photo',\n", | |
" ],\n", | |
" infer_datetime_format=True,\n", | |
" na_values=['']\n", | |
")\n", | |
"\n", | |
"data_frame[data_frame.columns[:10]].head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>date_photo</th>\n", | |
" <th>device_platform</th>\n", | |
" <th>device_model</th>\n", | |
" <th>device_uuid</th>\n", | |
" <th>user_n_code</th>\n", | |
" <th>viewing_angle</th>\n", | |
" <th>rain</th>\n", | |
" <th>bottom</th>\n", | |
" <th>fu_value</th>\n", | |
" <th>fu_processed</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1936-09-14</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>4.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1936-09-13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>13.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1936-09-13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>11.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1936-09-13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>10.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1936-09-13</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>3.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" date_photo device_platform device_model device_uuid user_n_code \\\n", | |
"0 1936-09-14 NaN NaN NaN 0 \n", | |
"1 1936-09-13 NaN NaN NaN 0 \n", | |
"2 1936-09-13 NaN NaN NaN 0 \n", | |
"3 1936-09-13 NaN NaN NaN 0 \n", | |
"4 1936-09-13 NaN NaN NaN 0 \n", | |
"\n", | |
" viewing_angle rain bottom fu_value fu_processed \n", | |
"0 NaN NaN NaN 4.0 NaN \n", | |
"1 NaN NaN NaN 13.0 NaN \n", | |
"2 NaN NaN NaN 11.0 NaN \n", | |
"3 NaN NaN NaN 10.0 NaN \n", | |
"4 NaN NaN NaN 3.0 NaN " | |
] | |
}, | |
"execution_count": 32, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data_frame[data_frame.columns[10:20]].head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>fu_observed</th>\n", | |
" <th>sd_depth</th>\n", | |
" <th>flagged_count</th>\n", | |
" <th>image</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0.0</td>\n", | |
" <td>http://www.eyeonwater.org/media/eyeonwater_upl...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0.0</td>\n", | |
" <td>http://www.eyeonwater.org/media/eyeonwater_upl...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0.0</td>\n", | |
" <td>http://www.eyeonwater.org/media/eyeonwater_upl...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0.0</td>\n", | |
" <td>http://www.eyeonwater.org/media/eyeonwater_upl...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0.0</td>\n", | |
" <td>http://www.eyeonwater.org/media/eyeonwater_upl...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" fu_observed sd_depth flagged_count \\\n", | |
"0 NaN NaN 0.0 \n", | |
"1 NaN NaN 0.0 \n", | |
"2 NaN NaN 0.0 \n", | |
"3 NaN NaN 0.0 \n", | |
"4 NaN NaN 0.0 \n", | |
"\n", | |
" image \n", | |
"0 http://www.eyeonwater.org/media/eyeonwater_upl... \n", | |
"1 http://www.eyeonwater.org/media/eyeonwater_upl... \n", | |
"2 http://www.eyeonwater.org/media/eyeonwater_upl... \n", | |
"3 http://www.eyeonwater.org/media/eyeonwater_upl... \n", | |
"4 http://www.eyeonwater.org/media/eyeonwater_upl... " | |
] | |
}, | |
"execution_count": 33, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data_frame[data_frame.columns[20:]].head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"n_code int64\n", | |
"active int64\n", | |
"last_update datetime64[ns]\n", | |
"update_count int64\n", | |
"input_date datetime64[ns]\n", | |
"lat float64\n", | |
"lng float64\n", | |
"x_900913 int64\n", | |
"y_900913 int64\n", | |
"extension float64\n", | |
"date_photo datetime64[ns]\n", | |
"device_platform float64\n", | |
"device_model float64\n", | |
"device_uuid float64\n", | |
"user_n_code int64\n", | |
"viewing_angle float64\n", | |
"rain float64\n", | |
"bottom float64\n", | |
"fu_value float64\n", | |
"fu_processed float64\n", | |
"fu_observed float64\n", | |
"sd_depth float64\n", | |
"flagged_count float64\n", | |
"image object\n", | |
"dtype: object" | |
] | |
}, | |
"execution_count": 34, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data_frame.dtypes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"json_result_string = data_frame.to_json(\n", | |
" orient='records', \n", | |
" double_precision=12,\n", | |
" date_format='iso'\n", | |
")\n", | |
"json_result = json.loads(json_result_string)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"geojson = {\n", | |
" 'type': 'FeatureCollection',\n", | |
" 'features': []\n", | |
"}\n", | |
"for record in json_result:\n", | |
" geojson['features'].append({\n", | |
" 'type': 'Feature',\n", | |
" 'geometry': {\n", | |
" 'type': 'Point',\n", | |
" 'coordinates': [record['lng'], record['lat']],\n", | |
" },\n", | |
" 'properties': record,\n", | |
" })" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with open('../web/public/data/historical_colours_world.json', 'w') as f:\n", | |
" f.write(json.dumps(geojson, indent=2))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment