Skip to content

Instantly share code, notes, and snippets.

@gunessenturk
Created September 20, 2018 03:55
Show Gist options
  • Save gunessenturk/757356cc86c9695ed7e7fe171e0d98a4 to your computer and use it in GitHub Desktop.
Save gunessenturk/757356cc86c9695ed7e7fe171e0d98a4 to your computer and use it in GitHub Desktop.
TDI_project_week2
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import datetime as ddtt\n",
"import numpy as np\n",
"import folium"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get reported crime data\n",
"df = pd.read_csv('LA_Crime_Data_from_2010_to_11sep2018.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = df.loc[ (df['Premise Description'] == 'STREET') \n",
" | (df['Premise Description'] == 'PARKING LOT') \n",
" | (df['Premise Description'] == 'SIDEWALK') \n",
" | (df['Premise Description'] == 'PARK/PLAYGROUND') \n",
" | (df['Premise Description'] == 'ALLEY')\n",
" | (df['Premise Description'] == 'GAS STATION')\n",
" | (df['Premise Description'] == 'BUS STOP')\n",
" | (df['Premise Description'] == 'OTHER/OUTSIDE')\n",
" | (df['Premise Description'] == 'PEDESTRIAN OVERCROSSING')\n",
" | (df['Premise Description'] == 'UNDERPASS/BRIDGE*')\n",
" | (df['Premise Description'] == 'SKATEBOARD FACILITY/SKATEBOARD PARK*')\n",
" ]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Clean column 'Location'\n",
"df['Location '] = df['Location '].str.replace('\\(','')\n",
"df['Location '] = df['Location '].str.replace('\\)','')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Add columns 'Latitude', 'Longitude', 'Year Occurred', 'Month Occurred', 'Day Occurred'\n",
"df['Latitude'] = df['Location '].str.replace(',.*', '').astype('float')\n",
"df['Longitude'] = df['Location '].str.replace('.*,', '').astype('float')\n",
"df['Year Occurred'] = df['Date Occurred'].str.replace('.*/', '').astype('int')\n",
"df['Month Occurred'] = ( df['Date Occurred']\n",
" .str.replace('/.*/.*', '').astype('int') )\n",
"df['Day Occurred'] = ( df['Date Occurred']\n",
" .str.replace('^\\d\\d/', '').str.replace('/.*', '').astype('int') )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Convert 'Date Occurred' and 'Date Reported' to datetime\n",
"df['Date Occurred'] = pd.to_datetime(df['Date Occurred'], format='%m/%d/%Y')\n",
"df['Date Reported'] = pd.to_datetime(df['Date Reported'], format='%m/%d/%Y')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Add column 'Weekday Occurred'\n",
"df['Weekday Occurred'] = df['Date Occurred'].apply(lambda x: ddtt.datetime.weekday(x))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.drop('Location ', axis=1, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Drop rows with zero lat or lon\n",
"df = df[(df['Longitude']!= 0) & (df['Latitude'] != 0)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df['PUMA'] = 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_crime_area = df[['Area Name', 'DR Number']].groupby(['Area Name']).count().reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"num_days = pd.Timestamp('September 11, 2018') - pd.Timestamp('January 1, 2010')\n",
"num_days = (num_days / np.timedelta64(1, 'D')).astype(int)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot number of reported crimes by area name\n",
"\n",
"plt.figure(figsize=(16,10))\n",
"plt.subplot(1, 1, 1)\n",
"bars = plt.bar(x=df_crime_area['Area Name'], \n",
" height=np.round(df_crime_area['DR Number'] / num_days) )\n",
"plt.tick_params(top='off', bottom='off', left='off', \n",
" right='off', labelleft='off', labelbottom='on')\n",
"plt.title('Number of Crimes per Day - Los Angeles City\\n (street or similar premise)', \n",
" fontsize=28)\n",
"for bar in bars:\n",
" plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height() - 0.5, \n",
" '{:.0f}'.format(bar.get_height()), \n",
" ha='center', color='w', fontsize=20)\n",
" \n",
"x = plt.gca().xaxis\n",
"\n",
"for item in x.get_ticklabels():\n",
" item.set_rotation(70)\n",
" item.set_fontsize(24)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_18_jan = df[(df['Year Occurred'] == 2018) & (df['Month Occurred'] == 1)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"LA_map_2018_jan = folium.Map(location=[33.9829, -118.3338],\n",
" zoom_start=10,\n",
"# tiles=\"Stamen Toner\")\n",
" tiles='cartodbpositron')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for i in range(len(df_18_jan)):\n",
" lat = df_18_jan['Latitude'].iloc[i]\n",
" lon = df_18_jan['Longitude'].iloc[i]\n",
" folium.CircleMarker([lat, lon], radius=1, color='red', popup=df_18_jan['Area Name'].iloc[i]).add_to(LA_map_2018_jan)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# LA_map_2018_jan"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[215:220]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"def getPUMA(lonlat):\n",
" params = { 'f' :'pjson', \n",
" 'geometry' : lonlat, \n",
" 'geometryType' : 'esriGeometryPoint',\n",
" 'inSR' : 4265,\n",
" 'spatialRel' : 'esriSpatialRelIntersects',\n",
" 'outFields': 'BASENAME,STATE,PUMA',\n",
" 'returnGeometry': 'false',\n",
" # 'returnTrueCurves': 'false',\n",
" # 'returnIdsOnly': 'false',\n",
" # 'returnCountOnly': 'false',\n",
" # 'returnZ' : 'false',\n",
" # 'returnM' : 'false',\n",
" # 'returnDistinctValues' : 'false',\n",
" # 'returnExtentsOnly' :'false',\n",
" }\n",
" response = requests.get('https://tigerweb.geo.census.gov/arcgis/rest/services/TIGERweb/PUMA_TAD_TAZ_UGA_ZCTA/MapServer/0/query', params=params)\n",
" if response:\n",
" if response.json()['features'][0]['attributes']['PUMA']:\n",
" PUMA = response.json()['features'][0]['attributes']['PUMA']\n",
" else:\n",
" PUMA = -2\n",
" else:\n",
" PUMA = -1\n",
" return PUMA\n",
"\n",
"\n",
"getPUMA(\"-118.3157, 34.0454\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Takes too long\n",
"### df['PUMA'] = df[['Longitude', 'Latitude']].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)\n",
"#getPUMA(str(df['Longitude']) + ', ' + str(df['Latitude']))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[:20, -1:] = df[['Longitude', 'Latitude']].iloc[:20].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[20:50, -1:] = df[['Longitude', 'Latitude']].iloc[20:50].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[50:100, -1:] = df[['Longitude', 'Latitude']].iloc[50:100].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[100:150, -1:] = df[['Longitude', 'Latitude']].iloc[100:150].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[150:300, -1:] = df[['Longitude', 'Latitude']].iloc[150:300].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[300:400, -1:] = df[['Longitude', 'Latitude']].iloc[300:400].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[400:600, -1:] = df[['Longitude', 'Latitude']].iloc[400:600].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[600:800, -1:] = df[['Longitude', 'Latitude']].iloc[600:800].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[800:1000, -1:] = df[['Longitude', 'Latitude']].iloc[800:1000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[1000:1200, -1:] = df[['Longitude', 'Latitude']].iloc[1000:1200].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[1200:1400, -1:] = df[['Longitude', 'Latitude']].iloc[1200:1400].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[1400:1600, -1:] = df[['Longitude', 'Latitude']].iloc[1400:1600].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[1600:1800, -1:] = df[['Longitude', 'Latitude']].iloc[1600:1800].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.iloc[1800:2000, -1:] = df[['Longitude', 'Latitude']].iloc[1800:2000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_2000 = df.iloc[:2000]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import dill\n",
"# dill.dump(df_2000, open('df_LA_wPUMA_first2000.pkd', 'wb'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# df_2000 = dill.load(open('df_LA_wPUMA_first2000.pkd', 'rb'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# df_2000.drop(['MO Codes', 'Victim Age', 'Victim Sex','Victim Descent', \n",
"# 'Crime Code 1', 'Crime Code 2', 'Crime Code 3','Crime Code 4'], axis=1, inplace=True)\n",
"# df_2000.drop(['Address', 'Cross Street'], axis=1, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Crime data from Jan 2016 only\n",
"df_Jan2016 = df[(df['Year Occurred']==2016) & (df['Month Occurred']==1)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.drop(['MO Codes', 'Victim Age', 'Victim Sex','Victim Descent', \n",
" 'Crime Code 1', 'Crime Code 2', 'Crime Code 3','Crime Code 4', \n",
" 'Address', 'Cross Street'], axis=1, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[:500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[:500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[500:1000, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[500:1000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[1000:1500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[1000:1500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[1500:2000, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[1500:2000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[2000:2500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[2000:2500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[2500:3000, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[2500:3000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[3000:3500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[3000:3500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[3500:4000, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[3500:4000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[4000:4500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[4000:4500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[4500:5000, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[4500:5000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[5000:5500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[5000:5500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[5500:6000, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[5500:6000].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[6000:6500, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[6000:6500].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016.iloc[6500:, -1:] = df_Jan2016[['Longitude', 'Latitude']].iloc[6500:].apply(lambda x: getPUMA(str(x[0]) + ', ' + str(x[1])), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import dill\n",
"# dill.dump(df_Jan2016, open('df_LA_wPUMA_jan2016.pkd', 'wb'))\n",
"# df_Jan2016 = dill.load(open('df_LA_wPUMA_jan2016.pkd', 'rb'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016['PUMA'] = df_Jan2016['PUMA'].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016_by_PUMA = df_Jan2016[['PUMA', 'DR Number']].groupby(['PUMA']).count().reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016_by_PUMA = df_Jan2016_by_PUMA.sort_values(by='DR Number')[16:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016_by_PUMA['PUMA'] = df_Jan2016_by_PUMA['PUMA'].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_Jan2016_by_PUMA['PUMA_str'] = df_Jan2016_by_PUMA['PUMA'].astype(str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Plot number of reported crimes by area name\n",
"\n",
"plt.figure(figsize=(16,10))\n",
"plt.subplot(1, 1, 1)\n",
"bars = plt.bar(x=df_Jan2016_by_PUMA['PUMA_str'], \n",
" height=df_Jan2016_by_PUMA['DR Number'] )\n",
"plt.tick_params(top='off', bottom='off', left='off', \n",
" right='off', labelleft='off', labelbottom='on')\n",
"plt.title('Jan 2016 Total Number of Crimes - Los Angeles City\\n (street or similar premise)', \n",
" fontsize=28)\n",
"for bar in bars:\n",
" plt.gca().text(bar.get_x() + bar.get_width()/2, bar.get_height() + 2.0, \n",
" '{:.0f}'.format(bar.get_height()), \n",
" ha='center', color='k', fontsize=16)\n",
" \n",
"x = plt.gca().xaxis\n",
"\n",
"for item in x.get_ticklabels():\n",
" item.set_rotation(70)\n",
" item.set_fontsize(24)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(df_Jan2016_by_PUMA)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"combined = pd.merge(df_Jan2016_by_PUMA, acs_mean, how='inner', left_on='PUMA', right_on='PUMA')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"combined"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.scatter(combined['HINCP'], combined['DR Number'])\n",
"plt.title('Number of Crimes (Jan 2016)', fontsize=14)\n",
"plt.xlabel('Average Household Income', fontsize=14)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.scatter(combined['NPF'], combined['DR Number'])\n",
"plt.title('Number of Crimes (Jan 2016)', fontsize=14)\n",
"plt.xlabel('Average Number of Persons in Family', fontsize=14)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.scatter(combined['GRPIP'], combined['DR Number'])\n",
"plt.title('Number of Crimes (Jan 2016)', fontsize=14)\n",
"plt.xlabel('Rent as % of Household Income', fontsize=14)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.scatter(combined['NOC'], combined['DR Number'])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"acs_mean = acs_ca.groupby(['PUMA'])[['HINCP', 'NPF', 'GRPIP', 'NOC']].mean().reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get reported American Community Survey data for California, housing level\n",
"acs_ca = pd.read_csv('csv_hca/ss16hca.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"acs_ca.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"acs_ca.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"acs_ca.iloc[:, 4:20].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# monthly rent\n",
"# $1 to $99999 (Rounded and top-coded)\n",
"acs_ca['RNTP'].hist() "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Total payment on all second and junior mortgages and home equity loans (monthly amount)\n",
"# $1 to $99999 (Rounded and top-coded)\n",
"acs_ca['SMP'].hist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Property value\n",
"# $1 to $9999999 (Rounded and top-coded)\n",
"acs_ca['VALP'].hist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# When structure first built\n",
"acs_ca['YBL'].hist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Family income (past 12 months)\n",
"# Note: Use ADJINC to adjust FINCP to constant dollars.\n",
"acs_ca['FINCP'].hist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Household income (past 12 months)\n",
"# Note: Use ADJINC to adjust HINCP to constant dollars.\n",
"acs_ca['HINCP'].hist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import this"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Gross rent (monthly amount)\n",
"# $1 - $99999 (Components are rounded)\n",
"# Note: Use ADJHSG to adjust GRNTP to constant dollars.\n",
"acs_ca['GRNTP'].hist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Gross rent as a percentage of household income past 12 months\n",
"# 101 .101% or more\n",
"acs_ca['GRPIP'].hist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# When moved into this house or apartment\n",
"acs_ca['MV'].hist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Number of own children in household (unweighted)\n",
"acs_ca['NOC'].hist(log=True, bins=14) # it ranges from 0 to 13, with one hh w/ 11, 12, and 13 kids"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(acs_ca[acs_ca['NOC']==10])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Number of persons in family (unweighted)\n",
"acs_ca['NPF'].hist(log=True, bins=19)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"acs_ca['NPF'].max()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Grandparent headed household with no parent present\n",
"'''b .N/A (GQ/vacant)\n",
"0 .Not a grandparent headed household with no parent present 1 .Grandparent headed household with no parent present'''\n",
"acs_ca['NPP'].hist(log=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(acs_ca[acs_ca['NPP'] == 1]) *100 / len(acs_ca)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Property taxes (yearly amount)\n",
"# $10000+(Top-coded)\n",
"acs_ca['TAXP'].hist(bins = 68)\n",
"# Note: No adjustment factor is applied to TAXP."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment