Created
April 16, 2020 17:12
-
-
Save romeokienzler/8c58b91aabeb2c6d913ebf3f8aada782 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": "# The code was removed by Watson Studio for sharing." | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": "import types\nimport pandas as pd\nfrom botocore.client import Config\nimport ibm_boto3\n\ndef __iter__(self): return 0\n\n\nclient = ibm_boto3.client(service_name='s3',\n ibm_api_key_id=credentials_1['IBM_API_KEY_ID'],\n ibm_auth_endpoint=credentials_1['IBM_AUTH_ENDPOINT'],\n config=Config(signature_version='oauth'),\n endpoint_url=credentials_1['ENDPOINT'])\nbucket=credentials_1['BUCKET']" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "--2020-04-16 17:10:50-- http://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=csv\nResolving api.worldbank.org (api.worldbank.org)... 107.23.127.3\nConnecting to api.worldbank.org (api.worldbank.org)|107.23.127.3|:80... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 78803 (77K) [application/zip]\nSaving to: \u2018SP.POP.TOTL?downloadformat=csv\u2019\n\n100%[======================================>] 78,803 --.-K/s in 0.06s \n\n2020-04-16 17:10:51 (1.22 MB/s) - \u2018SP.POP.TOTL?downloadformat=csv\u2019 saved [78803/78803]\n\nArchive: SP.POP.TOTL?downloadformat=csv\n inflating: Metadata_Indicator_API_SP.POP.TOTL_DS2_en_csv_v2_936048.csv \n inflating: API_SP.POP.TOTL_DS2_en_csv_v2_936048.csv \n inflating: Metadata_Country_API_SP.POP.TOTL_DS2_en_csv_v2_936048.csv \n" | |
} | |
], | |
"source": "#get world's population data\n!rm -f SP.POP.TOTL?downloadformat=csv\n!rm -f API_SP.POP.TOTL_DS2_en_csv_v2_936048.csv\n!rm -f Metadata_Country_API_SP.POP.TOTL_DS2_en_csv_v2_936048.csv\n!rm -f Metadata_Indicator_API_SP.POP.TOTL_DS2_en_csv_v2_936048.csv\n!wget http://api.worldbank.org/v2/en/indicator/SP.POP.TOTL?downloadformat=csv\n!unzip SP.POP.TOTL?downloadformat=csv\n!tail -n +5 API_SP.POP.TOTL_DS2_en_csv_v2_936048.csv > pop.csv" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": "client.upload_file(Filename='pop.csv',Bucket=bucket,Key='pop.csv')" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": "df_pop=pd.read_csv(\"pop.csv\")\n" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": "import datetime\n\ntoday = datetime.date.today()\nurl = 'https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-'+str(today)+'.xlsx'\nurl = 'https://www.ecdc.europa.eu/sites/default/files/documents/COVID-19-geographic-disbtribution-worldwide-2020-04-15.xlsx'\n#url = './COVID-19-geographic-disbtribution-worldwide-2020-03-24.xlsx'" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": "import pandas as pd\ndf_raw =pd.read_excel(url)\n" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "['Czechia', 'Dominican_Republic', 'Egypt', 'Iran', 'Russia', 'South_Korea', 'United_Kingdom', 'United_States_of_America']\n" | |
} | |
], | |
"source": "group_by_deaths = df_raw.groupby(['countriesAndTerritories']).sum()['deaths']\nbad_countries = list(group_by_deaths[group_by_deaths>100].index)\ndf_raw = df_raw[df_raw['countriesAndTerritories'].isin(bad_countries)]\ncountry_names = df_raw['countriesAndTerritories'].unique()\npop_coutry_names = df_pop['Country Name'].unique()\nprint([x for x in country_names if x not in pop_coutry_names])\n\ndf_pop = df_pop.replace('Iran, Islamic Rep.','Iran')\ndf_pop = df_pop.replace('Korea, Rep.','South_Korea')\ndf_pop = df_pop.replace('United Kingdom','United_Kingdom')\ndf_pop = df_pop.replace('United States','United_States_of_America')\ndf_pop = df_pop.replace('Dominican Republic','Dominican_Republic')\ndf_pop = df_pop.replace('Czech Republic','Czechia')\ndf_pop = df_pop.replace('Russian Federation','Russia')\ndf_pop = df_pop.replace('Egypt, Arab Rep.','Egypt')\n\n\n\n\n\n\npop_coutry_names = df_pop['Country Name'].unique()\nassert len([x for x in country_names if x not in pop_coutry_names]) == 0, 'ETL error, not all country names matching: '+str([x for x in country_names if x not in pop_coutry_names])" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": "df_pop=df_pop.rename(columns={'Country Name': 'countriesAndTerritories'})\n\ndf_raw = pd.merge(df_raw, df_pop, on='countriesAndTerritories', how='inner')" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": "df_raw.to_csv('cases.csv')" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": "client.upload_file(Filename='cases.csv',Bucket=bucket,Key='cases.csv')" | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3.6", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.9" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment