Created
September 20, 2018 03:55
-
-
Save gunessenturk/757356cc86c9695ed7e7fe171e0d98a4 to your computer and use it in GitHub Desktop.
TDI_project_week2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt\n", | |
"import datetime as ddtt\n", | |
"import numpy as np\n", | |
"import folium" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Get reported crime data\n", | |
"df = pd.read_csv('LA_Crime_Data_from_2010_to_11sep2018.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = df.loc[ (df['Premise Description'] == 'STREET') \n", | |
" | (df['Premise Description'] == 'PARKING LOT') \n", | |
" | (df['Premise Description'] == 'SIDEWALK') \n", | |
" | (df['Premise Description'] == 'PARK/PLAYGROUND') \n", | |
" | (df['Premise Description'] == 'ALLEY')\n", | |
" | (df['Premise Description'] == 'GAS STATION')\n", | |
" | (df['Premise Description'] == 'BUS STOP')\n", | |
" | (df['Premise Description'] == 'OTHER/OUTSIDE')\n", | |
" | (df['Premise Description'] == 'PEDESTRIAN OVERCROSSING')\n", | |
" | (df['Premise Description'] == 'UNDERPASS/BRIDGE*')\n", | |
" | (df['Premise Description'] == 'SKATEBOARD FACILITY/SKATEBOARD PARK*')\n", | |
" ]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Clean column 'Location'\n", | |
"df['Location '] = df['Location '].str.replace('\\(','')\n", | |
"df['Location '] = df['Location '].str.replace('\\)','')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Add columns 'Latitude', 'Longitude', 'Year Occurred', 'Month Occurred', 'Day Occurred'\n", | |
"df['Latitude'] = df['Location '].str.replace(',.*', '').astype('float')\n", | |
"df['Longitude'] = df['Location '].str.replace('.*,', '').astype('float')\n", | |
"df['Year Occurred'] = df['Date Occurred'].str.replace('.*/', '').astype('int')\n", | |
"df['Month Occurred'] = ( df['Date Occurred']\n", | |
" .str.replace('/.*/.*', '').astype('int') )\n", | |
"df['Day Occurred'] = ( df['Date Occurred']\n", | |
" .str.replace('^\\d\\d/', '').str.replace('/.*', '').astype('int') )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Convert 'Date Occurred' and 'Date Reported' to datetime\n", | |
"df['Date Occurred'] = pd.to_datetime(df['Date Occurred'], format='%m/%d/%Y')\n", | |
"df['Date Reported'] = pd.to_datetime(df['Date Reported'], format='%m/%d/%Y')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Add column 'Weekday Occurred'\n", | |
"df['Weekday Occurred'] = df['Date Occurred'].apply(lambda x: ddtt.datetime.weekday(x))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.drop('Location ', axis=1, inplace=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Drop rows with zero lat or lon\n", | |
"df = df[(df['Longitude']!= 0) & (df['Latitude'] != 0)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df['PUMA'] = 0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_crime_area = df[['Area Name', 'DR Number']].groupby(['Area Name']).count().reset_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"num_days = pd.Timestamp('September 11, 2018') - pd.Timestamp('January 1, 2010')\n", | |
"num_days = (num_days / np.timedelta64(1, 'D')).astype(int)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Plot number of reported crimes by area name\n", | |
"\n", | |
"plt.figure(figsize=(16,10))\n", | |
"plt.subplot(1, 1, 1)\n", | |
"bars = plt.bar(x=df_crime_area['Area Name'], \n", | |
" height=np.round(df_crime_area['DR Number'] / num_days) )\n", | |
"plt.tick_params(top='off', bottom='off', left='off', \n", | |
" right='off', labelleft='off', labelbottom='on')\n", | |
"plt.title('Number of Crimes per Day - Los Angeles City\\n (street or similar premise)', \n", | |
" fontsize=28)\n", | |
"for bar in bars:\n", | |
" plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height() - 0.5, \n", | |
" '{:.0f}'.format(bar.get_height()), \n", | |
" ha='center', color='w', fontsize=20)\n", | |
" \n", | |
"x = plt.gca().xaxis\n", | |
"\n", | |
"for item in x.get_ticklabels():\n", | |
" item.set_rotation(70)\n", | |
" item.set_fontsize(24)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_18_jan = df[(df['Year Occurred'] == 2018) & (df['Month Occurred'] == 1)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"LA_map_2018_jan = folium.Map(location=[33.9829, -118.3338],\n", | |
" zoom_start=10,\n", | |
"# tiles=\"Stamen Toner\")\n", | |
" tiles='cartodbpositron')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for i in range(len(df_18_jan)):\n", | |
" lat = df_18_jan['Latitude'].iloc[i]\n", | |
" lon = df_18_jan['Longitude'].iloc[i]\n", | |
" folium.CircleMarker([lat, lon], radius=1, color='red', popup=df_18_jan['Area Name'].iloc[i]).add_to(LA_map_2018_jan)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# LA_map_2018_jan" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[215:220]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"def getPUMA(lonlat):\n", | |
" params = { 'f' :'pjson', \n", | |
" 'geometry' : lonlat, \n", | |
" 'geometryType' : 'esriGeometryPoint',\n", | |
" 'inSR' : 4265,\n", | |
" 'spatialRel' : 'esriSpatialRelIntersects',\n", | |
" 'outFields': 'BASENAME,STATE,PUMA',\n", | |
" 'returnGeometry': 'false',\n", | |
" # 'returnTrueCurves': 'false',\n", | |
" # 'returnIdsOnly': 'false',\n", | |
" # 'returnCountOnly': 'false',\n", | |
" # 'returnZ' : 'false',\n", | |
" # 'returnM' : 'false',\n", | |
" # 'returnDistinctValues' : 'false',\n", | |
" # 'returnExtentsOnly' :'false',\n", | |
" }\n", | |
" response = requests.get('https://tigerweb.geo.census.gov/arcgis/rest/services/TIGERweb/PUMA_TAD_TAZ_UGA_ZCTA/MapServer/0/query', params=params)\n", | |
" if response:\n", | |
" if response.json()['features'][0]['attributes']['PUMA']:\n", | |
" PUMA = response.json()['features'][0]['attributes']['PUMA']\n", | |
" else:\n", | |
" PUMA = -2\n", | |
" else:\n", | |
" PUMA = -1\n", | |
" return PUMA\n", | |
"\n", | |
"\n", | |
"getPUMA(\"-118.3157, 34.0454\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Takes too long\n", | |
"### df['PUMA'] = df[['Longitude', 'Latitude']].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)\n", | |
"#getPUMA(str(df['Longitude']) + ', ' + str(df['Latitude']))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[:20, -1:] = df[['Longitude', 'Latitude']].iloc[:20].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[20:50, -1:] = df[['Longitude', 'Latitude']].iloc[20:50].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[50:100, -1:] = df[['Longitude', 'Latitude']].iloc[50:100].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[100:150, -1:] = df[['Longitude', 'Latitude']].iloc[100:150].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[150:300, -1:] = df[['Longitude', 'Latitude']].iloc[150:300].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[300:400, -1:] = df[['Longitude', 'Latitude']].iloc[300:400].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[400:600, -1:] = df[['Longitude', 'Latitude']].iloc[400:600].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[600:800, -1:] = df[['Longitude', 'Latitude']].iloc[600:800].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[800:1000, -1:] = df[['Longitude', 'Latitude']].iloc[800:1000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[1000:1200, -1:] = df[['Longitude', 'Latitude']].iloc[1000:1200].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[1200:1400, -1:] = df[['Longitude', 'Latitude']].iloc[1200:1400].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[1400:1600, -1:] = df[['Longitude', 'Latitude']].iloc[1400:1600].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[1600:1800, -1:] = df[['Longitude', 'Latitude']].iloc[1600:1800].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.iloc[1800:2000, -1:] = df[['Longitude', 'Latitude']].iloc[1800:2000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_2000 = df.iloc[:2000]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# import dill\n", | |
"# dill.dump(df_2000, open('df_LA_wPUMA_first2000.pkd', 'wb'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# df_2000 = dill.load(open('df_LA_wPUMA_first2000.pkd', 'rb'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# df_2000.drop(['MO Codes', 'Victim Age', 'Victim Sex','Victim Descent', \n", | |
"# 'Crime Code 1', 'Crime Code 2', 'Crime Code 3','Crime Code 4'], axis=1, inplace=True)\n", | |
"# df_2000.drop(['Address', 'Cross Street'], axis=1, inplace=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Crime data from Jan 2016 only\n", | |
"df_Jan2016 = df[(df['Year Occurred']==2016) & (df['Month Occurred']==1)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.drop(['MO Codes', 'Victim Age', 'Victim Sex','Victim Descent', \n", | |
" 'Crime Code 1', 'Crime Code 2', 'Crime Code 3','Crime Code 4', \n", | |
" 'Address', 'Cross Street'], axis=1, inplace=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[:500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[:500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[500:1000, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[500:1000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[1000:1500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[1000:1500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[1500:2000, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[1500:2000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[2000:2500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[2000:2500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[2500:3000, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[2500:3000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[3000:3500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[3000:3500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[3500:4000, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[3500:4000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[4000:4500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[4000:4500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[4500:5000, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[4500:5000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[5000:5500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[5000:5500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[5500:6000, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[5500:6000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[6000:6500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[6000:6500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016.iloc[6500:, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[6500:].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# import dill\n", | |
"# dill.dump(df_Jan2016, open('df_LA_wPUMA_jan2016.pkd', 'wb'))\n", | |
"# df_Jan2016 = dill.load(open('df_LA_wPUMA_jan2016.pkd', 'rb'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016['PUMA'] = df_Jan2016['PUMA'].astype(int)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016_by_PUMA = df_Jan2016[['PUMA', 'DR Number']].groupby(['PUMA']).count().reset_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016_by_PUMA = df_Jan2016_by_PUMA.sort_values(by='DR Number')[16:]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016_by_PUMA['PUMA'] = df_Jan2016_by_PUMA['PUMA'].astype(int)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_Jan2016_by_PUMA['PUMA_str'] = df_Jan2016_by_PUMA['PUMA'].astype(str)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Plot number of reported crimes by area name\n", | |
"\n", | |
"plt.figure(figsize=(16,10))\n", | |
"plt.subplot(1, 1, 1)\n", | |
"bars = plt.bar(x=df_Jan2016_by_PUMA['PUMA_str'], \n", | |
" height=df_Jan2016_by_PUMA['DR Number'] )\n", | |
"plt.tick_params(top='off', bottom='off', left='off', \n", | |
" right='off', labelleft='off', labelbottom='on')\n", | |
"plt.title('Jan 2016 Total Number of Crimes - Los Angeles City\\n (street or similar premise)', \n", | |
" fontsize=28)\n", | |
"for bar in bars:\n", | |
" plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height() + 2.0, \n", | |
" '{:.0f}'.format(bar.get_height()), \n", | |
" ha='center', color='k', fontsize=16)\n", | |
" \n", | |
"x = plt.gca().xaxis\n", | |
"\n", | |
"for item in x.get_ticklabels():\n", | |
" item.set_rotation(70)\n", | |
" item.set_fontsize(24)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len(df_Jan2016_by_PUMA)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"combined = pd.merge(df_Jan2016_by_PUMA, acs_mean, how='inner', left_on='PUMA', right_on='PUMA')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"combined" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.scatter(combined['HINCP'], combined['DR Number'])\n", | |
"plt.title('Number of Crimes (Jan 2016)', fontsize=14)\n", | |
"plt.xlabel('Average Household Income', fontsize=14)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.scatter(combined['NPF'], combined['DR Number'])\n", | |
"plt.title('Number of Crimes (Jan 2016)', fontsize=14)\n", | |
"plt.xlabel('Average Number of Persons in Family', fontsize=14)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.scatter(combined['GRPIP'], combined['DR Number'])\n", | |
"plt.title('Number of Crimes (Jan 2016)', fontsize=14)\n", | |
"plt.xlabel('Rent as % of Household Income', fontsize=14)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"plt.scatter(combined['NOC'], combined['DR Number'])\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"acs_mean = acs_ca.groupby(['PUMA'])[['HINCP', 'NPF', 'GRPIP', 'NOC']].mean().reset_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Get reported American Community Survey data for California, housing level\n", | |
"acs_ca = pd.read_csv('csv_hca/ss16hca.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"acs_ca.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"acs_ca.columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"acs_ca.iloc[:, 4:20].head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# monthly rent\n", | |
"# $1 to $99999 (Rounded and top-coded)\n", | |
"acs_ca['RNTP'].hist() " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Total payment on all second and junior mortgages and home equity loans (monthly amount)\n", | |
"# $1 to $99999 (Rounded and top-coded)\n", | |
"acs_ca['SMP'].hist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Property value\n", | |
"# $1 to $9999999 (Rounded and top-coded)\n", | |
"acs_ca['VALP'].hist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# When structure first built\n", | |
"acs_ca['YBL'].hist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Family income (past 12 months)\n", | |
"# Note: Use ADJINC to adjust FINCP to constant dollars.\n", | |
"acs_ca['FINCP'].hist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Household income (past 12 months)\n", | |
"# Note: Use ADJINC to adjust HINCP to constant dollars.\n", | |
"acs_ca['HINCP'].hist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import this" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Gross rent (monthly amount)\n", | |
"# $1 - $99999 (Components are rounded)\n", | |
"# Note: Use ADJHSG to adjust GRNTP to constant dollars.\n", | |
"acs_ca['GRNTP'].hist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Gross rent as a percentage of household income past 12 months\n", | |
"# 101 .101% or more\n", | |
"acs_ca['GRPIP'].hist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# When moved into this house or apartment\n", | |
"acs_ca['MV'].hist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Number of own children in household (unweighted)\n", | |
"acs_ca['NOC'].hist(log=True, bins=14) # it ranges from 0 to 13, with one hh w/ 11, 12, and 13 kids" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len(acs_ca[acs_ca['NOC']==10])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Number of persons in family (unweighted)\n", | |
"acs_ca['NPF'].hist(log=True, bins=19)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"acs_ca['NPF'].max()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Grandparent headed household with no parent present\n", | |
"'''b .N/A (GQ/vacant)\n", | |
"0 .Not a grandparent headed household with no parent present 1 .Grandparent headed household with no parent present'''\n", | |
"acs_ca['NPP'].hist(log=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len(acs_ca[acs_ca['NPP'] == 1]) *100 / len(acs_ca)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Property taxes (yearly amount)\n", | |
"# $10000+(Top-coded)\n", | |
"acs_ca['TAXP'].hist(bins = 68)\n", | |
"# Note: No adjustment factor is applied to TAXP." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment