Skip to content

Instantly share code, notes, and snippets.

@jjsantos01
Last active July 23, 2024 16:37
Show Gist options
  • Save jjsantos01/6afb7428667306eeae0219b999239c21 to your computer and use it in GitHub Desktop.
Save jjsantos01/6afb7428667306eeae0219b999239c21 to your computer and use it in GitHub Desktop.
Participación en el Hackathon BBVA 2019 en el reto de Pymes. El objetivo era crear un nuevo modelo de propensión al crédito que considere factores especiales para las Pymes considerando su ciclo de vida y particularidades.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import json\n",
"import re\n",
"\n",
"\n",
"def sin_acento(text):\n",
" out = text.lower().strip().replace('á','a').replace('é','e').replace('í','i').replace('ó','o').replace('ú','u')\n",
" return out\n",
"\n",
"def normaliza(text):\n",
" out = text.lower().strip().replace('á','a').replace('é','e').replace('í','i')\\\n",
" .replace('ó','o').replace('ú','u').replace(' ', '_').replace('\\n', '')\\\n",
" .replace('(', '').replace(')', '').replace('>', '')\n",
" return out\n",
"\n",
"def asigna_cve_entidad(x, nacional=True):\n",
" \"\"\"Función que asigna la clave Inegi si detecta el nombre de la entidad.\n",
" nacional indica si se asigna también un código cuando se refiere al valor Nacional\"\"\"\n",
" if not nacional:\n",
" del diccionario_cve_entidad['00']\n",
" for c in diccionario_cve_entidad:\n",
" out = ''\n",
" if re.search(sin_acento(diccionario_cve_entidad[c]), sin_acento(x), flags=re.I|re.A):\n",
" out = c\n",
" break\n",
" return out\n",
"\n",
"with open('entidades/diccionario_cve_entidad.json', encoding='utf-8') as d_ent_file:\n",
" diccionario_cve_entidad = json.load(d_ent_file)\n",
"\n",
"\n",
"%matplotlib inline"
],
"outputs": [],
"execution_count": 1,
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"# Base BBVA"
],
"metadata": {
"heading_collapsed": true
}
},
{
"cell_type": "code",
"source": [
"# catalogos\n",
"# creamos sector economico IMSS\n",
"dicc_cliente_imss = pd.read_csv('catalogos/basepyme_sectores.csv')\\\n",
" .set_index('CLIENTE')['sectores_imss'].astype(str).str[0].to_dict()\n",
"\n",
"# Sector bancomer a SCIAN\n",
"dicc_sector_scian = pd.read_csv('catalogos/sectores_scian.csv')\\\n",
" .set_index('sector_baseBancomer')['clave_scian_2digs'].astype(str).to_dict()\n"
],
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"c:\\programdata\\anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:2728: DtypeWarning: Columns (2) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" interactivity=interactivity, compiler=compiler, result=result)\n"
]
}
],
"execution_count": 2,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"df = pd.read_csv('catalogos/basepyme_cve_mun.csv',\n",
" dtype={'CD_POSTAL': str, 'codgeo': str})\\\n",
" .rename(columns=normaliza)\\\n",
" .rename(columns={'codgeo': 'cve_mun'})\\\n",
" .assign(cve_ent=lambda x: x['cve_mun'].str[0:2],\n",
" nacimiento=lambda x: x['fh_nacimiento'].pipe(pd.to_datetime, errors='coerce'),\n",
" antiguedad_banco=lambda x: x['fh_antiguedad_con_banco'].pipe(pd.to_datetime, errors='coerce'),\n",
" nacimiento_meses=lambda x:(pd.datetime.today()-x['nacimiento']).dt.days.div(30),\n",
" antiguedad_meses=lambda x:(pd.datetime.today()-x['antiguedad_banco']).dt.days.div(30),\n",
" mora=lambda x: x['moratarj_tarjeta_pyme'].add(x['moracs_credito_pyme']).gt(0).astype(int),\n",
" oferta=lambda x: x['oferta_credito_en_2018'].notnull().astype(int),\n",
" scian=lambda x: x['nb_sector_n2'].map(dicc_sector_scian),\n",
" sector_imss1=lambda x: x['cliente'].map(dicc_cliente_imss))\n",
"df.head()"
],
"outputs": [],
"execution_count": null,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"df.columns"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 4,
"data": {
"text/plain": "Index(['cliente', 'sexo', 'tipo_persona', 'cd_estado', 'cd_postal',\n 'municipio', 'fh_nacimiento', 'cd_giro_banxico', 'nb_actividad',\n 'nb_sector_n2', 'nb_seg_nrb_19', 'rango_ventas',\n 'fh_antiguedad_con_banco', 'ventas_por_tpv', 'oferta_credito_en_2018',\n 'fh_forma_credito_2018', 'tdcn', 'disponibletarjeta', 'bcom', 'bneg',\n 'email', 'creditos_simples_otorgados_desde_2018',\n 'moratarj_tarjeta_pyme', 'moracs_credito_pyme',\n 'saldo_cuenta_cheques_jul19', 'gastos__100_mil_en_ultimo_3_meses',\n 'gastos__300_mil_en_ultimo_3_meses',\n 'gastos__600_mil_en_ultimo_3_meses', 'gastos__1_millon_en_ulti_3_meses',\n 'edo', 'cve_mun', 'cve_ent', 'nacimiento', 'antiguedad_banco',\n 'nacimiento_meses', 'antiguedad_meses', 'mora', 'oferta', 'scian',\n 'sector_imss1'],\n dtype='object')"
},
"metadata": {}
}
],
"execution_count": 4,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"df.mora.value_counts()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": "0 47416\n1 2022\nName: mora, dtype: int64"
},
"metadata": {}
}
],
"execution_count": 5,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"# gastos de más de 100 mil\n",
"df.gastos__100_mil_en_ultimo_3_meses.value_counts().head()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 6,
"data": {
"text/plain": "0 44268\n1 2509\n2 905\n3 473\n4 272\nName: gastos__100_mil_en_ultimo_3_meses, dtype: int64"
},
"metadata": {}
}
],
"execution_count": 6,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "code",
"source": [
"# Créditos otorgados\n",
"df.creditos_simples_otorgados_desde_2018.value_counts()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 7,
"data": {
"text/plain": "0 49237\n1 189\n2 12\nName: creditos_simples_otorgados_desde_2018, dtype: int64"
},
"metadata": {}
}
],
"execution_count": 7,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"# Mora tarjeta Pyme\n",
"df.moratarj_tarjeta_pyme.value_counts(normalize=True)"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 8,
"data": {
"text/plain": "0 0.971075\n1 0.028925\nName: moratarj_tarjeta_pyme, dtype: float64"
},
"metadata": {}
}
],
"execution_count": 8,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"# Mora credito pyme\n",
"df.moracs_credito_pyme.value_counts(normalize=True)"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 9,
"data": {
"text/plain": "0 0.984384\n1 0.015616\nName: moracs_credito_pyme, dtype: float64"
},
"metadata": {}
}
],
"execution_count": 9,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"# municpios únicos\n",
"len((df.cd_estado+df.municipio).unique())"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 10,
"data": {
"text/plain": "1451"
},
"metadata": {}
}
],
"execution_count": 10,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"# valores únicos giro Banxico\n",
"len(df.cd_giro_banxico.unique())"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 11,
"data": {
"text/plain": "870"
},
"metadata": {}
}
],
"execution_count": 11,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"# se les ofrecio credito\n",
"df.oferta_credito_en_2018.value_counts()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 12,
"data": {
"text/plain": "300000.0 366\n195000.0 16\n207000.0 14\n197000.0 13\n0.0 13\n228000.0 12\n219000.0 11\n205000.0 11\n194000.0 11\n200000.0 10\n193000.0 10\n199000.0 10\n243000.0 9\n191000.0 9\n213000.0 9\n211000.0 9\n297000.0 8\n192000.0 8\n227000.0 8\n231000.0 8\n202000.0 8\n198000.0 8\n259000.0 8\n295000.0 8\n239000.0 7\n291000.0 7\n208000.0 7\n442000.0 7\n248000.0 7\n218000.0 7\n ... \n936000.0 1\n614000.0 1\n471000.0 1\n76800.0 1\n104800.0 1\n157000.0 1\n877000.0 1\n373000.0 1\n358000.0 1\n25000.0 1\n759000.0 1\n394000.0 1\n1095000.0 1\n613000.0 1\n95000.0 1\n1272000.0 1\n30000.0 1\n476000.0 1\n677000.0 1\n389000.0 1\n743000.0 1\n41000.0 1\n35200.0 1\n891000.0 1\n98000.0 1\n707000.0 1\n511000.0 1\n1356000.0 1\n472000.0 1\n173000.0 1\nName: oferta_credito_en_2018, Length: 584, dtype: int64"
},
"metadata": {}
}
],
"execution_count": 12,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "code",
"source": [
"df[df.oferta_credito_en_2018.notnull()]"
],
"outputs": [],
"execution_count": null,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"# formalizaron crédito\n",
"df.fh_forma_credito_2018.notnull().sum()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 14,
"data": {
"text/plain": "366"
},
"metadata": {}
}
],
"execution_count": 14,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"df.groupby(['nb_actividad'])\\\n",
" .agg({'cliente': ['count'], 'mora': lambda x: x.mean()*100})"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 15,
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead tr th {\n text-align: left;\n }\n\n .dataframe thead tr:last-of-type th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr>\n <th></th>\n <th>cliente</th>\n <th>mora</th>\n </tr>\n <tr>\n <th></th>\n <th>count</th>\n <th>&lt;lambda&gt;</th>\n </tr>\n <tr>\n <th>nb_actividad</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>?</th>\n <td>5021</td>\n <td>3.843856</td>\n </tr>\n <tr>\n <th>AGROINDUSTRIA</th>\n <td>3735</td>\n <td>2.141901</td>\n </tr>\n <tr>\n <th>ALQUILER DE INMUEBLES</th>\n <td>1082</td>\n <td>1.848429</td>\n </tr>\n <tr>\n <th>AUTOS Y REFACCIONES</th>\n <td>538</td>\n <td>6.691450</td>\n </tr>\n <tr>\n <th>COMUNICACIONES</th>\n <td>190</td>\n <td>6.315789</td>\n </tr>\n <tr>\n <th>ELECTRICIDAD</th>\n <td>58</td>\n <td>10.344828</td>\n </tr>\n <tr>\n <th>ESTACIONES DE GASOLI Y GASERAS</th>\n <td>203</td>\n <td>0.985222</td>\n </tr>\n <tr>\n <th>GOBIERNO</th>\n <td>1335</td>\n <td>0.674157</td>\n </tr>\n <tr>\n <th>HOTELES</th>\n <td>354</td>\n <td>2.824859</td>\n </tr>\n <tr>\n <th>INDUSTRIAS BASICAS</th>\n <td>444</td>\n <td>4.729730</td>\n </tr>\n <tr>\n <th>INSUMOS PARA LA INDUSTRIA</th>\n <td>2069</td>\n <td>5.026583</td>\n </tr>\n <tr>\n <th>MAQUIRIA Y EQUIPO</th>\n <td>646</td>\n <td>4.798762</td>\n </tr>\n <tr>\n <th>MINERIA</th>\n <td>146</td>\n <td>2.739726</td>\n </tr>\n <tr>\n <th>NO VIVIENDA</th>\n <td>1823</td>\n <td>2.413604</td>\n </tr>\n <tr>\n <th>OTRAS INDUSTRIAS MANUFACTURERAS</th>\n <td>611</td>\n <td>5.728314</td>\n </tr>\n <tr>\n <th>OTROS COMERCIANTES</th>\n <td>3398</td>\n <td>5.620954</td>\n </tr>\n <tr>\n <th>OTROS SERVICIOS</th>\n <td>1827</td>\n <td>4.980843</td>\n </tr>\n <tr>\n <th>PAPEL</th>\n <td>249</td>\n <td>6.827309</td>\n </tr>\n <tr>\n <th>PRODUCTOS A BASE DE MINERALES NO METALICOS</th>\n <td>446</td>\n <td>4.484305</td>\n </tr>\n <tr>\n <th>QUIMICO</th>\n <td>246</td>\n <td>2.439024</td>\n </tr>\n <tr>\n <th>RESTAURANT</th>\n <td>1076</td>\n <td>3.624535</td>\n </tr>\n <tr>\n <th>ROPA REGALOS Y CALZADO</th>\n <td>3075</td>\n <td>5.658537</td>\n </tr>\n <tr>\n <th>SERVICIOS DE EDUCACION</th>\n <td>1781</td>\n <td>5.951713</td>\n </tr>\n <tr>\n <th>SERVICIOS DE ESPARCIMIENTO</th>\n <td>302</td>\n <td>3.311258</td>\n </tr>\n <tr>\n <th>SERVICIOS FINANCIEROS</th>\n <td>632</td>\n <td>3.006329</td>\n </tr>\n <tr>\n <th>SERVICIOS MEDICOS</th>\n <td>1541</td>\n <td>3.893576</td>\n </tr>\n <tr>\n <th>SERVICIOS PROFESIONALES</th>\n <td>9994</td>\n <td>3.582149</td>\n </tr>\n <tr>\n <th>SUPER Y ABARROTES</th>\n <td>3164</td>\n <td>4.709229</td>\n </tr>\n <tr>\n <th>TEXTIL</th>\n <td>519</td>\n <td>4.816956</td>\n </tr>\n <tr>\n <th>TRANSPORTE</th>\n <td>2107</td>\n <td>5.742762</td>\n </tr>\n <tr>\n <th>VIVIENDA</th>\n <td>826</td>\n <td>3.510896</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " cliente mora\n count <lambda>\nnb_actividad \n? 5021 3.843856\nAGROINDUSTRIA 3735 2.141901\nALQUILER DE INMUEBLES 1082 1.848429\nAUTOS Y REFACCIONES 538 6.691450\nCOMUNICACIONES 190 6.315789\nELECTRICIDAD 58 10.344828\nESTACIONES DE GASOLI Y GASERAS 203 0.985222\nGOBIERNO 1335 0.674157\nHOTELES 354 2.824859\nINDUSTRIAS BASICAS 444 4.729730\nINSUMOS PARA LA INDUSTRIA 2069 5.026583\nMAQUIRIA Y EQUIPO 646 4.798762\nMINERIA 146 2.739726\nNO VIVIENDA 1823 2.413604\nOTRAS INDUSTRIAS MANUFACTURERAS 611 5.728314\nOTROS COMERCIANTES 3398 5.620954\nOTROS SERVICIOS 1827 4.980843\nPAPEL 249 6.827309\nPRODUCTOS A BASE DE MINERALES NO METALICOS 446 4.484305\nQUIMICO 246 2.439024\nRESTAURANT 1076 3.624535\nROPA REGALOS Y CALZADO 3075 5.658537\nSERVICIOS DE EDUCACION 1781 5.951713\nSERVICIOS DE ESPARCIMIENTO 302 3.311258\nSERVICIOS FINANCIEROS 632 3.006329\nSERVICIOS MEDICOS 1541 3.893576\nSERVICIOS PROFESIONALES 9994 3.582149\nSUPER Y ABARROTES 3164 4.709229\nTEXTIL 519 4.816956\nTRANSPORTE 2107 5.742762\nVIVIENDA 826 3.510896"
},
"metadata": {}
}
],
"execution_count": 15,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "code",
"source": [
"df.groupby(['cd_estado', 'municipio'])\\\n",
" .agg({'cliente': 'count', 'mora': lambda x: x.mean()*100})"
],
"outputs": [],
"execution_count": null,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "markdown",
"source": [
"# Datos externos"
],
"metadata": {
"heading_collapsed": true
}
},
{
"cell_type": "markdown",
"source": [
"## Demográficos"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"demog = pd.read_csv('inclusion_financiera/demograficos.csv', dtype={'cve_mun': str, 'cve_ent': str})\\\n",
" .assign(cve_mun=lambda x: x['cve_mun'].str.zfill(5),\n",
" cve_ent=lambda x: x['cve_ent'].str.zfill(2))\n",
"demog.head()\n",
"\n",
"base_final = df.merge(demog, on='cve_mun', how='left')\n",
"demog.to_csv('bases_auxiliares_finales/demograficas.csv', index=False)"
],
"outputs": [],
"execution_count": 17,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## DENUE\n",
"\n",
"Número de unidades económicas, por municipio y sector (2 dígitos scian)"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"years_denue = ['2015', '10_2016', '11_2017', '11_2018', '04_2019']\n",
"denue = pd.read_csv('variables_economicas/denue_agrupado_muni_subsector.csv', low_memory=False,\n",
" dtype={'cve_mun': str, 'subsector': str})\\\n",
" .query('denue==@years_denue')\\\n",
" .query('cve_mun.str.len()==5', engine='python')\\\n",
" .assign(sector=lambda x: x['subsector'].str[0:2],\n",
" cve_ent=lambda x: x['cve_mun'].str[0:2])\\\n",
" .groupby(['cve_ent', 'cve_mun', 'sector', 'denue']).agg({'id': 'count'})\\\n",
" .reset_index()\\\n",
" .assign(year=lambda x: 'empresas_denue_'+x['denue'].str[-4:])\\\n",
" .pivot_table(values='id', index=['cve_ent', 'cve_mun', 'sector'], columns='year')\\\n",
" .reset_index()\\\n",
" .rename(columns={'sector': 'scian'})\n",
"\n",
"base_final = base_final.merge(denue, on=['cve_mun', 'scian'], how='left')\n",
"denue.head()\n",
"denue.to_csv('bases_auxiliares_finales/denue.csv', index=False)"
],
"outputs": [],
"execution_count": 18,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "markdown",
"source": [
"## Asegurados IMSS\n",
"\n",
"Número de asegurados ante el IMSS, en empresas de hasta 6 empleados.\n",
"\n",
"FALTA SECTOR ECONOMICO\n",
"FALTAN MUNICIPIOS DEL DF"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"dict_imss1_to_scian = pd.read_csv('catalogos/sectores_imss.csv')\\\n",
" .assign(scian=lambda x: x['sector'].map(dicc_sector_scian),\n",
" imss1=lambda x: x['clave'].astype(str).str[0])\\\n",
" .set_index('imss1')['scian'].to_dict()\n",
"\n",
"dict_imss1_to_scian['0'] = '11'\n",
"dict_imss1_to_scian['5'] = '22'"
],
"outputs": [],
"execution_count": 467,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "code",
"source": [
"periodo_imss = ['2019-04-30', '2018-12-31', '2017-12-31', '2016-12-31']\n",
"imss = pd.read_csv('imss/asegurados_imss_municpio_sector_2015_2019.csv', dtype={'cve_mun': str})\\\n",
" .query('periodo==@periodo_imss')\\\n",
" .assign(year=lambda x: x['periodo'].str[0:4])\\\n",
" .pivot_table(values=['asegurados', 'salario'], index=['cve_mun', 'sector_economico_1'],\n",
" columns=['year']).stack([0, 1]).reset_index()\\\n",
" .assign(variable=lambda x: x['level_2']+'_'+x['year'].astype(str))\\\n",
" .pivot_table(index=['cve_mun', 'sector_economico_1'], values=0, columns='variable')\\\n",
" .reset_index()\\\n",
" .assign(cve_ent=lambda x: x['cve_mun'].str[0:2])\\\n",
" .rename(columns={'sector_economico_1': 'sector_imss1'})\\\n",
" .astype({'sector_imss1': str})\\\n",
" .assign(scian=lambda x: x['sector_imss1'].map(dict_imss1_to_scian))\n",
"\n",
"base_final = base_final.merge(imss, on=['cve_mun', 'sector_imss1'], how='left')\n",
"imss.head()\n",
"imss.to_csv('bases_auxiliares_finales/imss.csv', index=False)"
],
"outputs": [],
"execution_count": 470,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "markdown",
"source": [
"## Inclusión financiera"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"inclusion = pd.read_csv('inclusion_financiera/bd_inclusion_financiera_municipio_2015_2019.csv',\n",
" dtype={'cve_mun': str})\\\n",
" .query(\"year==2019\")\\\n",
" .assign(cve_mun=lambda x: x['cve_mun'].str.zfill(5),\n",
" cve_ent=lambda x: x['cve_mun'].str[0:2])\n",
"\n",
"base_final = base_final.merge(inclusion, on='cve_mun', how='left')\n",
"inclusion.head()\n",
"inclusion.to_csv('bases_auxiliares_finales/inclusion.csv', index=False)"
],
"outputs": [],
"execution_count": 20,
"metadata": {
"hidden": true,
"scrolled": false
}
},
{
"cell_type": "code",
"source": [],
"outputs": [],
"execution_count": null,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## Cartera comercial"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"### Por estado"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"cartera_ent = pd.read_csv('cartera_comercial/bd_cartera_comercial_tamano_estados_2016_2019.csv',\n",
" dtype={'cve_ent': str})\\\n",
" .filter(regex='year|cve_ent|pyme')\\\n",
" .pivot_table(index='cve_ent', columns='year').stack([0, 1]).reset_index()\\\n",
" .assign(variable=lambda x: x['level_1']+'_'+x['year'].astype(str)+'_x_ent')\\\n",
" .pivot_table(index='cve_ent', values=0, columns='variable')\\\n",
" .reset_index()\n",
"\n",
"base_final = base_final.merge(cartera_ent, on='cve_ent', how='left')\n",
"cartera_ent.to_csv('bases_auxiliares_finales/cartera_comercial_entidades.csv', index=False)\n",
"cartera_ent.head()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 21,
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>variable</th>\n <th>cve_ent</th>\n <th>acreditados_pyme_2016_x_ent</th>\n <th>acreditados_pyme_2017_x_ent</th>\n <th>acreditados_pyme_2018_x_ent</th>\n <th>acreditados_pyme_2019_x_ent</th>\n <th>cartera_total_pyme_2016_x_ent</th>\n <th>cartera_total_pyme_2017_x_ent</th>\n <th>cartera_total_pyme_2018_x_ent</th>\n <th>cartera_total_pyme_2019_x_ent</th>\n <th>cartera_vencida_pyme_2016_x_ent</th>\n <th>...</th>\n <th>cartera_vigente_pyme_2018_x_ent</th>\n <th>cartera_vigente_pyme_2019_x_ent</th>\n <th>creditos_pyme_2016_x_ent</th>\n <th>creditos_pyme_2017_x_ent</th>\n <th>creditos_pyme_2018_x_ent</th>\n <th>creditos_pyme_2019_x_ent</th>\n <th>monto_dispuesto_pyme_2016_x_ent</th>\n <th>monto_dispuesto_pyme_2017_x_ent</th>\n <th>monto_dispuesto_pyme_2018_x_ent</th>\n <th>monto_dispuesto_pyme_2019_x_ent</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>01</td>\n <td>7635.0</td>\n <td>7673.0</td>\n <td>7626.0</td>\n <td>7814.0</td>\n <td>5747.18</td>\n <td>5829.22</td>\n <td>5800.91</td>\n <td>6213.12</td>\n <td>142.90</td>\n <td>...</td>\n <td>5681.92</td>\n <td>6080.47</td>\n <td>7786.0</td>\n <td>8030.0</td>\n <td>7825.0</td>\n <td>8040.0</td>\n <td>1232.64</td>\n <td>1271.03</td>\n <td>1027.63</td>\n <td>1061.00</td>\n </tr>\n <tr>\n <th>1</th>\n <td>02</td>\n <td>15510.0</td>\n <td>15299.0</td>\n <td>15062.0</td>\n <td>15565.0</td>\n <td>10737.88</td>\n <td>10917.96</td>\n <td>12161.98</td>\n <td>12062.35</td>\n <td>351.22</td>\n <td>...</td>\n <td>11552.13</td>\n <td>11431.98</td>\n <td>15906.0</td>\n <td>15869.0</td>\n <td>15621.0</td>\n <td>16395.0</td>\n <td>1673.19</td>\n <td>1316.13</td>\n <td>1626.56</td>\n <td>1455.93</td>\n </tr>\n <tr>\n <th>2</th>\n <td>03</td>\n <td>4750.0</td>\n <td>4457.0</td>\n <td>4077.0</td>\n <td>4084.0</td>\n <td>2351.96</td>\n <td>2682.62</td>\n <td>2527.36</td>\n <td>2322.01</td>\n <td>81.71</td>\n <td>...</td>\n <td>2403.74</td>\n <td>2199.48</td>\n <td>4886.0</td>\n <td>4758.0</td>\n <td>4169.0</td>\n <td>4156.0</td>\n <td>259.59</td>\n <td>337.92</td>\n <td>278.52</td>\n <td>256.64</td>\n </tr>\n <tr>\n <th>3</th>\n <td>04</td>\n <td>3036.0</td>\n <td>2888.0</td>\n <td>2684.0</td>\n <td>2718.0</td>\n <td>2299.17</td>\n <td>2138.64</td>\n <td>2195.14</td>\n <td>2007.92</td>\n <td>182.69</td>\n <td>...</td>\n <td>2006.84</td>\n <td>1819.93</td>\n <td>3075.0</td>\n <td>2927.0</td>\n <td>2710.0</td>\n <td>2792.0</td>\n <td>309.01</td>\n <td>261.76</td>\n <td>229.84</td>\n <td>246.52</td>\n </tr>\n <tr>\n <th>4</th>\n <td>05</td>\n <td>15339.0</td>\n <td>14799.0</td>\n <td>14698.0</td>\n <td>15027.0</td>\n <td>11278.53</td>\n <td>12024.48</td>\n <td>12267.28</td>\n <td>12469.29</td>\n <td>461.32</td>\n <td>...</td>\n <td>11801.44</td>\n <td>11936.93</td>\n <td>15809.0</td>\n <td>15563.0</td>\n <td>15376.0</td>\n <td>15694.0</td>\n <td>1661.11</td>\n <td>1754.64</td>\n <td>1716.54</td>\n <td>1762.38</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 25 columns</p>\n</div>",
"text/plain": "variable cve_ent acreditados_pyme_2016_x_ent acreditados_pyme_2017_x_ent \\\n0 01 7635.0 7673.0 \n1 02 15510.0 15299.0 \n2 03 4750.0 4457.0 \n3 04 3036.0 2888.0 \n4 05 15339.0 14799.0 \n\nvariable acreditados_pyme_2018_x_ent acreditados_pyme_2019_x_ent \\\n0 7626.0 7814.0 \n1 15062.0 15565.0 \n2 4077.0 4084.0 \n3 2684.0 2718.0 \n4 14698.0 15027.0 \n\nvariable cartera_total_pyme_2016_x_ent cartera_total_pyme_2017_x_ent \\\n0 5747.18 5829.22 \n1 10737.88 10917.96 \n2 2351.96 2682.62 \n3 2299.17 2138.64 \n4 11278.53 12024.48 \n\nvariable cartera_total_pyme_2018_x_ent cartera_total_pyme_2019_x_ent \\\n0 5800.91 6213.12 \n1 12161.98 12062.35 \n2 2527.36 2322.01 \n3 2195.14 2007.92 \n4 12267.28 12469.29 \n\nvariable cartera_vencida_pyme_2016_x_ent ... \\\n0 142.90 ... \n1 351.22 ... \n2 81.71 ... \n3 182.69 ... \n4 461.32 ... \n\nvariable cartera_vigente_pyme_2018_x_ent cartera_vigente_pyme_2019_x_ent \\\n0 5681.92 6080.47 \n1 11552.13 11431.98 \n2 2403.74 2199.48 \n3 2006.84 1819.93 \n4 11801.44 11936.93 \n\nvariable creditos_pyme_2016_x_ent creditos_pyme_2017_x_ent \\\n0 7786.0 8030.0 \n1 15906.0 15869.0 \n2 4886.0 4758.0 \n3 3075.0 2927.0 \n4 15809.0 15563.0 \n\nvariable creditos_pyme_2018_x_ent creditos_pyme_2019_x_ent \\\n0 7825.0 8040.0 \n1 15621.0 16395.0 \n2 4169.0 4156.0 \n3 2710.0 2792.0 \n4 15376.0 15694.0 \n\nvariable monto_dispuesto_pyme_2016_x_ent monto_dispuesto_pyme_2017_x_ent \\\n0 1232.64 1271.03 \n1 1673.19 1316.13 \n2 259.59 337.92 \n3 309.01 261.76 \n4 1661.11 1754.64 \n\nvariable monto_dispuesto_pyme_2018_x_ent monto_dispuesto_pyme_2019_x_ent \n0 1027.63 1061.00 \n1 1626.56 1455.93 \n2 278.52 256.64 \n3 229.84 246.52 \n4 1716.54 1762.38 \n\n[5 rows x 25 columns]"
},
"metadata": {}
}
],
"execution_count": 21,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "markdown",
"source": [
"### Por sector"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"cartera_sector = pd.read_csv('cartera_comercial/bd_cartera_comercial_tamano_sector_2016_2019.csv',\n",
" dtype={'cve_ent': str})\\\n",
" .rename(columns={'actividad_economica_agragada': 'sector'})\\\n",
" .filter(regex='year|sector|pyme')\\\n",
" .pivot_table(index='sector', columns='year').stack([0, 1]).reset_index()\\\n",
" .assign(variable=lambda x: x['level_1']+'_'+x['year'].astype(str)+'_x_sector')\\\n",
" .pivot_table(index='sector', values=0, columns='variable')\\\n",
" .reset_index()\\\n",
" .rename({'sector': 'scian'})\n",
"\n",
"cartera_sector"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 22,
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>variable</th>\n <th>sector</th>\n <th>acreditados_pyme_2016_x_sector</th>\n <th>acreditados_pyme_2017_x_sector</th>\n <th>acreditados_pyme_2018_x_sector</th>\n <th>acreditados_pyme_2019_x_sector</th>\n <th>cartera_total_pyme_2016_x_sector</th>\n <th>cartera_total_pyme_2017_x_sector</th>\n <th>cartera_total_pyme_2018_x_sector</th>\n <th>cartera_total_pyme_2019_x_sector</th>\n <th>cartera_vencida_pyme_2016_x_sector</th>\n <th>...</th>\n <th>cartera_vigente_pyme_2018_x_sector</th>\n <th>cartera_vigente_pyme_2019_x_sector</th>\n <th>creditos_pyme_2016_x_sector</th>\n <th>creditos_pyme_2017_x_sector</th>\n <th>creditos_pyme_2018_x_sector</th>\n <th>creditos_pyme_2019_x_sector</th>\n <th>monto_dispuesto_pyme_2016_x_sector</th>\n <th>monto_dispuesto_pyme_2017_x_sector</th>\n <th>monto_dispuesto_pyme_2018_x_sector</th>\n <th>monto_dispuesto_pyme_2019_x_sector</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Agricultura, Silvicultura, Ganadería y Pesca</td>\n <td>12830.0</td>\n <td>12830.0</td>\n <td>12830.0</td>\n <td>12830.0</td>\n <td>29233.76</td>\n <td>29233.76</td>\n <td>29233.76</td>\n <td>29233.76</td>\n <td>1183.57</td>\n <td>...</td>\n <td>28050.20</td>\n <td>28050.20</td>\n <td>13362.0</td>\n <td>13362.0</td>\n <td>13362.0</td>\n <td>13362.0</td>\n <td>3750.29</td>\n <td>3750.29</td>\n <td>3750.29</td>\n <td>3750.29</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Alimentos, Bebidas y Tabaco</td>\n <td>10797.0</td>\n <td>10797.0</td>\n <td>10797.0</td>\n <td>10797.0</td>\n <td>11796.30</td>\n <td>11796.30</td>\n <td>11796.30</td>\n <td>11796.30</td>\n <td>631.50</td>\n <td>...</td>\n <td>11164.81</td>\n <td>11164.81</td>\n <td>11367.0</td>\n <td>11367.0</td>\n <td>11367.0</td>\n <td>11367.0</td>\n <td>1936.21</td>\n <td>1936.21</td>\n <td>1936.21</td>\n <td>1936.21</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Bancario</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>2.64</td>\n <td>2.64</td>\n <td>2.64</td>\n <td>2.64</td>\n <td>0.00</td>\n <td>...</td>\n <td>2.64</td>\n <td>2.64</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>2.0</td>\n <td>0.98</td>\n <td>0.98</td>\n <td>0.98</td>\n <td>0.98</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Comercio</td>\n <td>210588.0</td>\n <td>210588.0</td>\n <td>210588.0</td>\n <td>210588.0</td>\n <td>156026.76</td>\n <td>156026.76</td>\n <td>156026.76</td>\n <td>156026.76</td>\n <td>10251.12</td>\n <td>...</td>\n <td>145775.70</td>\n <td>145775.70</td>\n <td>221119.0</td>\n <td>221119.0</td>\n <td>221119.0</td>\n <td>221119.0</td>\n <td>24695.81</td>\n <td>24695.81</td>\n <td>24695.81</td>\n <td>24695.81</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Comunicaciones y Telecomunicaciones</td>\n <td>8442.0</td>\n <td>8442.0</td>\n <td>8442.0</td>\n <td>8442.0</td>\n <td>6118.06</td>\n <td>6118.06</td>\n <td>6118.06</td>\n <td>6118.06</td>\n <td>357.75</td>\n <td>...</td>\n <td>5760.31</td>\n <td>5760.31</td>\n <td>8818.0</td>\n <td>8818.0</td>\n <td>8818.0</td>\n <td>8818.0</td>\n <td>898.37</td>\n <td>898.37</td>\n <td>898.37</td>\n <td>898.37</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Construcción</td>\n <td>25544.0</td>\n <td>25544.0</td>\n <td>25544.0</td>\n <td>25544.0</td>\n <td>44197.66</td>\n <td>44197.66</td>\n <td>44197.66</td>\n <td>44197.66</td>\n <td>2194.39</td>\n <td>...</td>\n <td>42003.25</td>\n <td>42003.25</td>\n <td>26810.0</td>\n <td>26810.0</td>\n <td>26810.0</td>\n <td>26810.0</td>\n <td>6305.68</td>\n <td>6305.68</td>\n <td>6305.68</td>\n <td>6305.68</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Educativo</td>\n <td>7264.0</td>\n <td>7264.0</td>\n <td>7264.0</td>\n <td>7264.0</td>\n <td>4137.89</td>\n <td>4137.89</td>\n <td>4137.89</td>\n <td>4137.89</td>\n <td>95.94</td>\n <td>...</td>\n <td>4041.94</td>\n <td>4041.94</td>\n <td>7383.0</td>\n <td>7383.0</td>\n <td>7383.0</td>\n <td>7383.0</td>\n <td>399.86</td>\n <td>399.86</td>\n <td>399.86</td>\n <td>399.86</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Gobierno Federal</td>\n <td>463.0</td>\n <td>463.0</td>\n <td>463.0</td>\n <td>463.0</td>\n <td>94.83</td>\n <td>94.83</td>\n <td>94.83</td>\n <td>94.83</td>\n <td>2.58</td>\n <td>...</td>\n <td>92.25</td>\n <td>92.25</td>\n <td>466.0</td>\n <td>466.0</td>\n <td>466.0</td>\n <td>466.0</td>\n <td>19.72</td>\n <td>19.72</td>\n <td>19.72</td>\n <td>19.72</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Hoteles y Restaurantes</td>\n <td>14840.0</td>\n <td>14840.0</td>\n <td>14840.0</td>\n <td>14840.0</td>\n <td>12526.77</td>\n <td>12526.77</td>\n <td>12526.77</td>\n <td>12526.77</td>\n <td>415.42</td>\n <td>...</td>\n <td>12111.36</td>\n <td>12111.36</td>\n <td>17289.0</td>\n <td>17289.0</td>\n <td>17289.0</td>\n <td>17289.0</td>\n <td>1468.63</td>\n <td>1468.63</td>\n <td>1468.63</td>\n <td>1468.63</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Industria Automotriz</td>\n <td>1787.0</td>\n <td>1787.0</td>\n <td>1787.0</td>\n <td>1787.0</td>\n <td>1733.93</td>\n <td>1733.93</td>\n <td>1733.93</td>\n <td>1733.93</td>\n <td>36.64</td>\n <td>...</td>\n <td>1697.29</td>\n <td>1697.29</td>\n <td>1884.0</td>\n <td>1884.0</td>\n <td>1884.0</td>\n <td>1884.0</td>\n <td>465.72</td>\n <td>465.72</td>\n <td>465.72</td>\n <td>465.72</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Industria Materiales de Construcción</td>\n <td>1288.0</td>\n <td>1288.0</td>\n <td>1288.0</td>\n <td>1288.0</td>\n <td>1279.65</td>\n <td>1279.65</td>\n <td>1279.65</td>\n <td>1279.65</td>\n <td>86.33</td>\n <td>...</td>\n <td>1193.30</td>\n <td>1193.30</td>\n <td>1723.0</td>\n <td>1723.0</td>\n <td>1723.0</td>\n <td>1723.0</td>\n <td>173.42</td>\n <td>173.42</td>\n <td>173.42</td>\n <td>173.42</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Industria Química y Farmacéutica</td>\n <td>4690.0</td>\n <td>4690.0</td>\n <td>4690.0</td>\n <td>4690.0</td>\n <td>5639.24</td>\n <td>5639.24</td>\n <td>5639.24</td>\n <td>5639.24</td>\n <td>221.05</td>\n <td>...</td>\n <td>5418.20</td>\n <td>5418.20</td>\n <td>4930.0</td>\n <td>4930.0</td>\n <td>4930.0</td>\n <td>4930.0</td>\n <td>935.40</td>\n <td>935.40</td>\n <td>935.40</td>\n <td>935.40</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Industria Textil y de Calzado</td>\n <td>9456.0</td>\n <td>9456.0</td>\n <td>9456.0</td>\n <td>9456.0</td>\n <td>10168.67</td>\n <td>10168.67</td>\n <td>10168.67</td>\n <td>10168.67</td>\n <td>768.54</td>\n <td>...</td>\n <td>9400.11</td>\n <td>9400.11</td>\n <td>9819.0</td>\n <td>9819.0</td>\n <td>9819.0</td>\n <td>9819.0</td>\n <td>1786.58</td>\n <td>1786.58</td>\n <td>1786.58</td>\n <td>1786.58</td>\n </tr>\n <tr>\n <th>13</th>\n <td>Organismos Internacionales</td>\n <td>25.0</td>\n <td>25.0</td>\n <td>25.0</td>\n <td>25.0</td>\n <td>12.24</td>\n <td>12.24</td>\n <td>12.24</td>\n <td>12.24</td>\n <td>0.51</td>\n <td>...</td>\n <td>11.73</td>\n <td>11.73</td>\n <td>26.0</td>\n <td>26.0</td>\n <td>26.0</td>\n <td>26.0</td>\n <td>0.24</td>\n <td>0.24</td>\n <td>0.24</td>\n <td>0.24</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Petróleo, Minería, Gas y Energía</td>\n <td>3926.0</td>\n <td>3926.0</td>\n <td>3926.0</td>\n <td>3926.0</td>\n <td>4392.02</td>\n <td>4392.02</td>\n <td>4392.02</td>\n <td>4392.02</td>\n <td>267.12</td>\n <td>...</td>\n <td>4124.90</td>\n <td>4124.90</td>\n <td>4114.0</td>\n <td>4114.0</td>\n <td>4114.0</td>\n <td>4114.0</td>\n <td>1257.97</td>\n <td>1257.97</td>\n <td>1257.97</td>\n <td>1257.97</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Resto Industria</td>\n <td>40230.0</td>\n <td>40230.0</td>\n <td>40230.0</td>\n <td>40230.0</td>\n <td>41623.80</td>\n <td>41623.80</td>\n <td>41623.80</td>\n <td>41623.80</td>\n <td>2122.84</td>\n <td>...</td>\n <td>39501.00</td>\n <td>39501.00</td>\n <td>43034.0</td>\n <td>43034.0</td>\n <td>43034.0</td>\n <td>43034.0</td>\n <td>6425.53</td>\n <td>6425.53</td>\n <td>6425.53</td>\n <td>6425.53</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Salud</td>\n <td>8306.0</td>\n <td>8306.0</td>\n <td>8306.0</td>\n <td>8306.0</td>\n <td>3966.38</td>\n <td>3966.38</td>\n <td>3966.38</td>\n <td>3966.38</td>\n <td>124.78</td>\n <td>...</td>\n <td>3841.58</td>\n <td>3841.58</td>\n <td>8448.0</td>\n <td>8448.0</td>\n <td>8448.0</td>\n <td>8448.0</td>\n <td>508.09</td>\n <td>508.09</td>\n <td>508.09</td>\n <td>508.09</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Servicios Comunales y Sociales</td>\n <td>1504.0</td>\n <td>1504.0</td>\n <td>1504.0</td>\n <td>1504.0</td>\n <td>1057.99</td>\n <td>1057.99</td>\n <td>1057.99</td>\n <td>1057.99</td>\n <td>38.16</td>\n <td>...</td>\n <td>1019.84</td>\n <td>1019.84</td>\n <td>1521.0</td>\n <td>1521.0</td>\n <td>1521.0</td>\n <td>1521.0</td>\n <td>134.59</td>\n <td>134.59</td>\n <td>134.59</td>\n <td>134.59</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Servicios Financieros (No Bancarios)</td>\n <td>273.0</td>\n <td>273.0</td>\n <td>273.0</td>\n <td>273.0</td>\n <td>292.79</td>\n <td>292.79</td>\n <td>292.79</td>\n <td>292.79</td>\n <td>2.12</td>\n <td>...</td>\n <td>290.67</td>\n <td>290.67</td>\n <td>276.0</td>\n <td>276.0</td>\n <td>276.0</td>\n <td>276.0</td>\n <td>54.55</td>\n <td>54.55</td>\n <td>54.55</td>\n <td>54.55</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Servicios Profesionales y Técnicos</td>\n <td>131139.0</td>\n <td>131139.0</td>\n <td>131139.0</td>\n <td>131139.0</td>\n <td>87144.20</td>\n <td>87144.20</td>\n <td>87144.20</td>\n <td>87144.20</td>\n <td>3934.75</td>\n <td>...</td>\n <td>83209.44</td>\n <td>83209.44</td>\n <td>133697.0</td>\n <td>133697.0</td>\n <td>133697.0</td>\n <td>133697.0</td>\n <td>12733.28</td>\n <td>12733.28</td>\n <td>12733.28</td>\n <td>12733.28</td>\n </tr>\n <tr>\n <th>20</th>\n <td>Servicios de Esparcimiento y otros Servicios R...</td>\n <td>1337.0</td>\n <td>1337.0</td>\n <td>1337.0</td>\n <td>1337.0</td>\n <td>1357.11</td>\n <td>1357.11</td>\n <td>1357.11</td>\n <td>1357.11</td>\n <td>63.10</td>\n <td>...</td>\n <td>1294.01</td>\n <td>1294.01</td>\n <td>1379.0</td>\n <td>1379.0</td>\n <td>1379.0</td>\n <td>1379.0</td>\n <td>87.51</td>\n <td>87.51</td>\n <td>87.51</td>\n <td>87.51</td>\n </tr>\n <tr>\n <th>21</th>\n <td>Transporte</td>\n <td>22424.0</td>\n <td>22424.0</td>\n <td>22424.0</td>\n <td>22424.0</td>\n <td>19123.59</td>\n <td>19123.59</td>\n <td>19123.59</td>\n <td>19123.59</td>\n <td>624.68</td>\n <td>...</td>\n <td>18498.95</td>\n <td>18498.95</td>\n <td>23195.0</td>\n <td>23195.0</td>\n <td>23195.0</td>\n <td>23195.0</td>\n <td>2767.53</td>\n <td>2767.53</td>\n <td>2767.53</td>\n <td>2767.53</td>\n </tr>\n </tbody>\n</table>\n<p>22 rows × 25 columns</p>\n</div>",
"text/plain": "variable sector \\\n0 Agricultura, Silvicultura, Ganadería y Pesca \n1 Alimentos, Bebidas y Tabaco \n2 Bancario \n3 Comercio \n4 Comunicaciones y Telecomunicaciones \n5 Construcción \n6 Educativo \n7 Gobierno Federal \n8 Hoteles y Restaurantes \n9 Industria Automotriz \n10 Industria Materiales de Construcción \n11 Industria Química y Farmacéutica \n12 Industria Textil y de Calzado \n13 Organismos Internacionales \n14 Petróleo, Minería, Gas y Energía \n15 Resto Industria \n16 Salud \n17 Servicios Comunales y Sociales \n18 Servicios Financieros (No Bancarios) \n19 Servicios Profesionales y Técnicos \n20 Servicios de Esparcimiento y otros Servicios R... \n21 Transporte \n\nvariable acreditados_pyme_2016_x_sector acreditados_pyme_2017_x_sector \\\n0 12830.0 12830.0 \n1 10797.0 10797.0 \n2 2.0 2.0 \n3 210588.0 210588.0 \n4 8442.0 8442.0 \n5 25544.0 25544.0 \n6 7264.0 7264.0 \n7 463.0 463.0 \n8 14840.0 14840.0 \n9 1787.0 1787.0 \n10 1288.0 1288.0 \n11 4690.0 4690.0 \n12 9456.0 9456.0 \n13 25.0 25.0 \n14 3926.0 3926.0 \n15 40230.0 40230.0 \n16 8306.0 8306.0 \n17 1504.0 1504.0 \n18 273.0 273.0 \n19 131139.0 131139.0 \n20 1337.0 1337.0 \n21 22424.0 22424.0 \n\nvariable acreditados_pyme_2018_x_sector acreditados_pyme_2019_x_sector \\\n0 12830.0 12830.0 \n1 10797.0 10797.0 \n2 2.0 2.0 \n3 210588.0 210588.0 \n4 8442.0 8442.0 \n5 25544.0 25544.0 \n6 7264.0 7264.0 \n7 463.0 463.0 \n8 14840.0 14840.0 \n9 1787.0 1787.0 \n10 1288.0 1288.0 \n11 4690.0 4690.0 \n12 9456.0 9456.0 \n13 25.0 25.0 \n14 3926.0 3926.0 \n15 40230.0 40230.0 \n16 8306.0 8306.0 \n17 1504.0 1504.0 \n18 273.0 273.0 \n19 131139.0 131139.0 \n20 1337.0 1337.0 \n21 22424.0 22424.0 \n\nvariable cartera_total_pyme_2016_x_sector cartera_total_pyme_2017_x_sector \\\n0 29233.76 29233.76 \n1 11796.30 11796.30 \n2 2.64 2.64 \n3 156026.76 156026.76 \n4 6118.06 6118.06 \n5 44197.66 44197.66 \n6 4137.89 4137.89 \n7 94.83 94.83 \n8 12526.77 12526.77 \n9 1733.93 1733.93 \n10 1279.65 1279.65 \n11 5639.24 5639.24 \n12 10168.67 10168.67 \n13 12.24 12.24 \n14 4392.02 4392.02 \n15 41623.80 41623.80 \n16 3966.38 3966.38 \n17 1057.99 1057.99 \n18 292.79 292.79 \n19 87144.20 87144.20 \n20 1357.11 1357.11 \n21 19123.59 19123.59 \n\nvariable cartera_total_pyme_2018_x_sector cartera_total_pyme_2019_x_sector \\\n0 29233.76 29233.76 \n1 11796.30 11796.30 \n2 2.64 2.64 \n3 156026.76 156026.76 \n4 6118.06 6118.06 \n5 44197.66 44197.66 \n6 4137.89 4137.89 \n7 94.83 94.83 \n8 12526.77 12526.77 \n9 1733.93 1733.93 \n10 1279.65 1279.65 \n11 5639.24 5639.24 \n12 10168.67 10168.67 \n13 12.24 12.24 \n14 4392.02 4392.02 \n15 41623.80 41623.80 \n16 3966.38 3966.38 \n17 1057.99 1057.99 \n18 292.79 292.79 \n19 87144.20 87144.20 \n20 1357.11 1357.11 \n21 19123.59 19123.59 \n\nvariable cartera_vencida_pyme_2016_x_sector ... \\\n0 1183.57 ... \n1 631.50 ... \n2 0.00 ... \n3 10251.12 ... \n4 357.75 ... \n5 2194.39 ... \n6 95.94 ... \n7 2.58 ... \n8 415.42 ... \n9 36.64 ... \n10 86.33 ... \n11 221.05 ... \n12 768.54 ... \n13 0.51 ... \n14 267.12 ... \n15 2122.84 ... \n16 124.78 ... \n17 38.16 ... \n18 2.12 ... \n19 3934.75 ... \n20 63.10 ... \n21 624.68 ... \n\nvariable cartera_vigente_pyme_2018_x_sector \\\n0 28050.20 \n1 11164.81 \n2 2.64 \n3 145775.70 \n4 5760.31 \n5 42003.25 \n6 4041.94 \n7 92.25 \n8 12111.36 \n9 1697.29 \n10 1193.30 \n11 5418.20 \n12 9400.11 \n13 11.73 \n14 4124.90 \n15 39501.00 \n16 3841.58 \n17 1019.84 \n18 290.67 \n19 83209.44 \n20 1294.01 \n21 18498.95 \n\nvariable cartera_vigente_pyme_2019_x_sector creditos_pyme_2016_x_sector \\\n0 28050.20 13362.0 \n1 11164.81 11367.0 \n2 2.64 2.0 \n3 145775.70 221119.0 \n4 5760.31 8818.0 \n5 42003.25 26810.0 \n6 4041.94 7383.0 \n7 92.25 466.0 \n8 12111.36 17289.0 \n9 1697.29 1884.0 \n10 1193.30 1723.0 \n11 5418.20 4930.0 \n12 9400.11 9819.0 \n13 11.73 26.0 \n14 4124.90 4114.0 \n15 39501.00 43034.0 \n16 3841.58 8448.0 \n17 1019.84 1521.0 \n18 290.67 276.0 \n19 83209.44 133697.0 \n20 1294.01 1379.0 \n21 18498.95 23195.0 \n\nvariable creditos_pyme_2017_x_sector creditos_pyme_2018_x_sector \\\n0 13362.0 13362.0 \n1 11367.0 11367.0 \n2 2.0 2.0 \n3 221119.0 221119.0 \n4 8818.0 8818.0 \n5 26810.0 26810.0 \n6 7383.0 7383.0 \n7 466.0 466.0 \n8 17289.0 17289.0 \n9 1884.0 1884.0 \n10 1723.0 1723.0 \n11 4930.0 4930.0 \n12 9819.0 9819.0 \n13 26.0 26.0 \n14 4114.0 4114.0 \n15 43034.0 43034.0 \n16 8448.0 8448.0 \n17 1521.0 1521.0 \n18 276.0 276.0 \n19 133697.0 133697.0 \n20 1379.0 1379.0 \n21 23195.0 23195.0 \n\nvariable creditos_pyme_2019_x_sector monto_dispuesto_pyme_2016_x_sector \\\n0 13362.0 3750.29 \n1 11367.0 1936.21 \n2 2.0 0.98 \n3 221119.0 24695.81 \n4 8818.0 898.37 \n5 26810.0 6305.68 \n6 7383.0 399.86 \n7 466.0 19.72 \n8 17289.0 1468.63 \n9 1884.0 465.72 \n10 1723.0 173.42 \n11 4930.0 935.40 \n12 9819.0 1786.58 \n13 26.0 0.24 \n14 4114.0 1257.97 \n15 43034.0 6425.53 \n16 8448.0 508.09 \n17 1521.0 134.59 \n18 276.0 54.55 \n19 133697.0 12733.28 \n20 1379.0 87.51 \n21 23195.0 2767.53 \n\nvariable monto_dispuesto_pyme_2017_x_sector \\\n0 3750.29 \n1 1936.21 \n2 0.98 \n3 24695.81 \n4 898.37 \n5 6305.68 \n6 399.86 \n7 19.72 \n8 1468.63 \n9 465.72 \n10 173.42 \n11 935.40 \n12 1786.58 \n13 0.24 \n14 1257.97 \n15 6425.53 \n16 508.09 \n17 134.59 \n18 54.55 \n19 12733.28 \n20 87.51 \n21 2767.53 \n\nvariable monto_dispuesto_pyme_2018_x_sector \\\n0 3750.29 \n1 1936.21 \n2 0.98 \n3 24695.81 \n4 898.37 \n5 6305.68 \n6 399.86 \n7 19.72 \n8 1468.63 \n9 465.72 \n10 173.42 \n11 935.40 \n12 1786.58 \n13 0.24 \n14 1257.97 \n15 6425.53 \n16 508.09 \n17 134.59 \n18 54.55 \n19 12733.28 \n20 87.51 \n21 2767.53 \n\nvariable monto_dispuesto_pyme_2019_x_sector \n0 3750.29 \n1 1936.21 \n2 0.98 \n3 24695.81 \n4 898.37 \n5 6305.68 \n6 399.86 \n7 19.72 \n8 1468.63 \n9 465.72 \n10 173.42 \n11 935.40 \n12 1786.58 \n13 0.24 \n14 1257.97 \n15 6425.53 \n16 508.09 \n17 134.59 \n18 54.55 \n19 12733.28 \n20 87.51 \n21 2767.53 \n\n[22 rows x 25 columns]"
},
"metadata": {}
}
],
"execution_count": 22,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "markdown",
"source": [
"## Padron SAT"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"### Entidad"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"sat_ent = pd.read_excel('sat/padron_entidades_federativas.xls', encoding='latin1')\\\n",
" .rename(columns={'Año': 'year'}).query('Mes==\"Junio\"')\\\n",
" .drop(['Mes', 'No especificado'], axis=1)\\\n",
" .melt(id_vars='year', var_name='entidad')\\\n",
" .assign(year=lambda x: 'padron_sat_' + x['year'].astype(str)+'_ent',\n",
" cve_ent=lambda x: x['entidad'].apply(asigna_cve_entidad))\\\n",
" .pivot_table(index='cve_ent', columns='year', values='value')\\\n",
" .reset_index()\n",
"\n",
"base_final = base_final.merge(sat_ent, on='cve_ent', how='left')\n",
"sat_ent.to_csv('bases_auxiliares_finales/padron_sat_entidades.csv', index=False)\n",
"sat_ent.head()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 23,
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>year</th>\n <th>cve_ent</th>\n <th>padron_sat_2010_ent</th>\n <th>padron_sat_2011_ent</th>\n <th>padron_sat_2012_ent</th>\n <th>padron_sat_2013_ent</th>\n <th>padron_sat_2014_ent</th>\n <th>padron_sat_2015_ent</th>\n <th>padron_sat_2016_ent</th>\n <th>padron_sat_2017_ent</th>\n <th>padron_sat_2018_ent</th>\n <th>padron_sat_2019_ent</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>01</td>\n <td>392978</td>\n <td>450887</td>\n <td>473080</td>\n <td>489598</td>\n <td>533261</td>\n <td>595879</td>\n <td>638618</td>\n <td>719615</td>\n <td>790048</td>\n <td>855747</td>\n </tr>\n <tr>\n <th>1</th>\n <td>02</td>\n <td>1225711</td>\n <td>1359622</td>\n <td>1422500</td>\n <td>1468161</td>\n <td>1565007</td>\n <td>1742243</td>\n <td>1859458</td>\n <td>2146377</td>\n <td>2382541</td>\n <td>2572440</td>\n </tr>\n <tr>\n <th>2</th>\n <td>03</td>\n <td>181357</td>\n <td>209784</td>\n <td>226069</td>\n <td>236147</td>\n <td>268645</td>\n <td>307681</td>\n <td>340614</td>\n <td>391367</td>\n <td>440845</td>\n <td>486095</td>\n </tr>\n <tr>\n <th>3</th>\n <td>04</td>\n <td>214984</td>\n <td>240523</td>\n <td>260208</td>\n <td>276711</td>\n <td>308479</td>\n <td>348081</td>\n <td>377085</td>\n <td>419074</td>\n <td>463919</td>\n <td>505266</td>\n </tr>\n <tr>\n <th>4</th>\n <td>05</td>\n <td>768797</td>\n <td>885191</td>\n <td>943819</td>\n <td>985517</td>\n <td>1107542</td>\n <td>1305892</td>\n <td>1424612</td>\n <td>1632540</td>\n <td>1801221</td>\n <td>1939373</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": "year cve_ent padron_sat_2010_ent padron_sat_2011_ent padron_sat_2012_ent \\\n0 01 392978 450887 473080 \n1 02 1225711 1359622 1422500 \n2 03 181357 209784 226069 \n3 04 214984 240523 260208 \n4 05 768797 885191 943819 \n\nyear padron_sat_2013_ent padron_sat_2014_ent padron_sat_2015_ent \\\n0 489598 533261 595879 \n1 1468161 1565007 1742243 \n2 236147 268645 307681 \n3 276711 308479 348081 \n4 985517 1107542 1305892 \n\nyear padron_sat_2016_ent padron_sat_2017_ent padron_sat_2018_ent \\\n0 638618 719615 790048 \n1 1859458 2146377 2382541 \n2 340614 391367 440845 \n3 377085 419074 463919 \n4 1424612 1632540 1801221 \n\nyear padron_sat_2019_ent \n0 855747 \n1 2572440 \n2 486095 \n3 505266 \n4 1939373 "
},
"metadata": {}
}
],
"execution_count": 23,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "code",
"source": [],
"outputs": [],
"execution_count": null,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"### Sector"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"sectores_sat = pd.read_csv('catalogos/sectores_sat.csv')\\\n",
" \n",
"conversion_sat_scian = pd.read_csv('catalogos/conversion_scian_a_sat.csv', dtype={'clave_scian': str})\\\n",
" .rename(columns={'clave_scian': 'scian'})\n",
"\n",
"dicc_sat_scian = sectores_sat.merge(conversion_sat_scian, on='clave_sat', how='left')\\\n",
" .set_index('sector_sat')['scian'].to_dict()\n",
"\n",
"dicc_sat_scian['Servicios de apoyo a los negocios y manejo de desechos y servicios de remediación'] = 54\n"
],
"outputs": [],
"execution_count": 24,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"sat_sector = pd.read_excel('sat/padron_actividad_economica.xls', encoding='latin1')\\\n",
" .rename(columns={'Año': 'year'}).query('Trimestre==\"Segundo\"')\\\n",
" .drop(['Trimestre'], axis=1)\\\n",
" .melt(id_vars='year', var_name='sector')\\\n",
" .assign(year=lambda x: 'padron_sat_' + x['year'].astype(str)+'_sector')\\\n",
" .pivot_table(index='sector', columns='year', values='value')\\\n",
" .reset_index()\\\n",
" .assign(scian=lambda x: x['sector'].map(dicc_sat_scian))\n",
"\n",
"base_final = base_final.merge(sat_sector, on='scian', how='left')\n",
"sat_sector.to_csv('bases_auxiliares_finales/padron_sat_sector.csv', index=False)\n",
"sat_sector.head()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 25,
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>year</th>\n <th>sector</th>\n <th>padron_sat_2011_sector</th>\n <th>padron_sat_2012_sector</th>\n <th>padron_sat_2013_sector</th>\n <th>padron_sat_2014_sector</th>\n <th>padron_sat_2015_sector</th>\n <th>padron_sat_2016_sector</th>\n <th>padron_sat_2017_sector</th>\n <th>padron_sat_2018_sector</th>\n <th>padron_sat_2019_sector</th>\n <th>scian</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Actividades del gobierno y de organismos inter...</td>\n <td>1742265</td>\n <td>1906118</td>\n <td>2020775</td>\n <td>2073932</td>\n <td>2081821</td>\n <td>2046300</td>\n <td>1923295</td>\n <td>1895664</td>\n <td>1865545</td>\n <td>93</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Agricultura, ganadería, aprovechamiento forest...</td>\n <td>2573428</td>\n <td>2779147</td>\n <td>2962976</td>\n <td>3113744</td>\n <td>3315766</td>\n <td>3396111</td>\n <td>3504435</td>\n <td>3596572</td>\n <td>3664668</td>\n <td>11</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Comercio al por mayor</td>\n <td>1588422</td>\n <td>1697039</td>\n <td>1766987</td>\n <td>1788922</td>\n <td>1762922</td>\n <td>1745185</td>\n <td>1693817</td>\n <td>1701487</td>\n <td>1705902</td>\n <td>43</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Comercio al por menor</td>\n <td>5349691</td>\n <td>5626134</td>\n <td>5755751</td>\n <td>5775674</td>\n <td>5615869</td>\n <td>5515923</td>\n <td>5365778</td>\n <td>5365486</td>\n <td>5339584</td>\n <td>46</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Construcción</td>\n <td>1538711</td>\n <td>1678225</td>\n <td>1769821</td>\n <td>1828116</td>\n <td>1855762</td>\n <td>1848866</td>\n <td>1803834</td>\n <td>1823833</td>\n <td>1837282</td>\n <td>23</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": "year sector \\\n0 Actividades del gobierno y de organismos inter... \n1 Agricultura, ganadería, aprovechamiento forest... \n2 Comercio al por mayor \n3 Comercio al por menor \n4 Construcción \n\nyear padron_sat_2011_sector padron_sat_2012_sector padron_sat_2013_sector \\\n0 1742265 1906118 2020775 \n1 2573428 2779147 2962976 \n2 1588422 1697039 1766987 \n3 5349691 5626134 5755751 \n4 1538711 1678225 1769821 \n\nyear padron_sat_2014_sector padron_sat_2015_sector padron_sat_2016_sector \\\n0 2073932 2081821 2046300 \n1 3113744 3315766 3396111 \n2 1788922 1762922 1745185 \n3 5775674 5615869 5515923 \n4 1828116 1855762 1848866 \n\nyear padron_sat_2017_sector padron_sat_2018_sector padron_sat_2019_sector \\\n0 1923295 1895664 1865545 \n1 3504435 3596572 3664668 \n2 1693817 1701487 1705902 \n3 5365778 5365486 5339584 \n4 1803834 1823833 1837282 \n\nyear scian \n0 93 \n1 11 \n2 43 \n3 46 \n4 23 "
},
"metadata": {}
}
],
"execution_count": 25,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "markdown",
"source": [
"## Actividad economica trimestral"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"itae = pd.read_excel('variables_economicas/itaee_indice_retro.xlsx', header=[0, 1])\\\n",
" .set_index('Entidad Federativa').stack([0, 1]).to_frame('itae')\\\n",
" .reset_index()\\\n",
" .query('level_1>=2010 & level_2==\"I\"')\\\n",
" .assign(cve_ent=lambda x: x['Entidad Federativa'].str[0].apply(asigna_cve_entidad),\n",
" variable=lambda x: 'itae_'+x['level_1'].astype(str))\\\n",
" .pivot_table(index='cve_ent', values='itae', columns='variable')\\\n",
" .reset_index()\n",
"\n",
"base_final = base_final.merge(itae, on='cve_ent', how='left')\n",
"itae.to_csv('bases_auxiliares_finales/indice_actividad_economica_estatal.csv', index=False)\n",
"itae.head()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 26,
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>variable</th>\n <th>cve_ent</th>\n <th>itae_2010</th>\n <th>itae_2011</th>\n <th>itae_2012</th>\n <th>itae_2013</th>\n <th>itae_2014</th>\n <th>itae_2015</th>\n <th>itae_2016</th>\n <th>itae_2017</th>\n <th>itae_2018</th>\n <th>itae_2019</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>01</td>\n <td>86.746229</td>\n <td>89.537501</td>\n <td>95.925388</td>\n <td>99.511695</td>\n <td>108.407642</td>\n <td>112.497526</td>\n <td>119.690226</td>\n <td>131.983855</td>\n <td>135.941916</td>\n <td>134.251055</td>\n </tr>\n <tr>\n <th>1</th>\n <td>02</td>\n <td>89.915144</td>\n <td>94.021973</td>\n <td>96.682896</td>\n <td>99.553183</td>\n <td>99.144466</td>\n <td>107.113774</td>\n <td>111.058446</td>\n <td>115.848308</td>\n <td>119.636289</td>\n <td>122.683027</td>\n </tr>\n <tr>\n <th>2</th>\n <td>03</td>\n <td>96.990576</td>\n <td>100.791771</td>\n <td>100.011959</td>\n <td>98.334093</td>\n <td>100.527818</td>\n <td>113.046692</td>\n <td>113.211482</td>\n <td>118.447219</td>\n <td>144.809684</td>\n <td>148.284076</td>\n </tr>\n <tr>\n <th>3</th>\n <td>04</td>\n <td>105.798000</td>\n <td>102.823000</td>\n <td>98.366000</td>\n <td>99.846000</td>\n <td>98.090000</td>\n <td>92.514000</td>\n <td>87.256000</td>\n <td>78.454000</td>\n <td>74.709000</td>\n <td>73.040000</td>\n </tr>\n <tr>\n <th>4</th>\n <td>05</td>\n <td>87.605306</td>\n <td>96.945014</td>\n <td>101.819845</td>\n <td>98.349610</td>\n <td>103.561531</td>\n <td>104.966818</td>\n <td>107.330764</td>\n <td>112.828630</td>\n <td>115.185898</td>\n <td>116.012147</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": "variable cve_ent itae_2010 itae_2011 itae_2012 itae_2013 itae_2014 \\\n0 01 86.746229 89.537501 95.925388 99.511695 108.407642 \n1 02 89.915144 94.021973 96.682896 99.553183 99.144466 \n2 03 96.990576 100.791771 100.011959 98.334093 100.527818 \n3 04 105.798000 102.823000 98.366000 99.846000 98.090000 \n4 05 87.605306 96.945014 101.819845 98.349610 103.561531 \n\nvariable itae_2015 itae_2016 itae_2017 itae_2018 itae_2019 \n0 112.497526 119.690226 131.983855 135.941916 134.251055 \n1 107.113774 111.058446 115.848308 119.636289 122.683027 \n2 113.046692 113.211482 118.447219 144.809684 148.284076 \n3 92.514000 87.256000 78.454000 74.709000 73.040000 \n4 104.966818 107.330764 112.828630 115.185898 116.012147 "
},
"metadata": {}
}
],
"execution_count": 26,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "markdown",
"source": [
"## Homicidios y seguridad"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"segur = pd.read_csv('variables_sociales/Tasa homicidios Base v3.csv',\n",
" dtype={'Clave Municipal': str, 'Clave de estado': str})\\\n",
" .rename(columns={'Clave Municipal': 'cve_mun', 'Clave de estado': 'cve_ent'})\\\n",
" .query('cve_mun!=\"00\"')\\\n",
" .fillna(0)\n",
"\n",
"segur.to_csv('bases_auxiliares_finales/seguridad_estatal.csv', index=False)\n",
"base_final = base_final.merge(segur, on='cve_mun', how='left')"
],
"outputs": [],
"execution_count": 27,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "code",
"source": [
"base_final.to_csv('base_modelo.csv', index=False)"
],
"outputs": [],
"execution_count": 28,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"# Modelo préstamos"
],
"metadata": {}
},
{
"cell_type": "code",
"source": [
"import sklearn\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
"from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, cohen_kappa_score\n",
"from sklearn import svm\n",
"\n",
"def resultados_modelo(modelo):\n",
" y_pred = modelo.predict(X_test).squeeze() > 0.5\n",
" metricas = {\n",
" 'precision': precision_score(y_pred, y_test, average='macro'),\n",
" 'recall': recall_score(y_pred, y_test, average='macro'),\n",
" 'f1': f1_score(y_pred, y_test, average='macro'),\n",
" 'confusion': confusion_matrix(y_pred, y_test),\n",
" 'cohen_k': cohen_kappa_score(y_pred, y_test),}\n",
" metricas['precision_0'], metricas['precision_1']= (metricas['confusion']/metricas['confusion'].sum(axis=0)).diagonal()\n",
" return metricas\n",
"\n",
"print(sklearn.__version__)"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0.21.3\n"
]
}
],
"execution_count": 188,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"base_modelo = pd.read_csv('base_modelo.csv',\n",
" dtype={'cve_muni': str, 'scian': str, 'cve_ent': str})\n",
"\n",
"base_modelo = base_modelo.pipe(lambda x: pd.concat([x.loc[x.oferta.eq(1)],\n",
" x.sample(n=x.oferta.eq(1).sum(), random_state=12345)]))"
],
"outputs": [],
"execution_count": 799,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"# We create the preprocessing pipelines for both numeric and categorical data.\n",
"vars_externas = ['poblacion', 'poblacion_adulta', 'poblacion_adulta_mujeres',\n",
" 'poblacion_adulta_hombres', 'empresas_denue_2015', 'empresas_denue_2016', 'empresas_denue_2017',\n",
" 'empresas_denue_2018', 'empresas_denue_2019', 'asegurados_2016', 'asegurados_2017',\n",
" 'asegurados_2018', 'asegurados_2019', 'salario_2016', 'salario_2017', 'salario_2018',\n",
" 'salario_2019', 'cuentas_transaccionales_nivel_1', 'cuentas_transaccionales_nivel_2',\n",
" 'cuentas_transaccionales_nivel_3', 'cuentas_transaccionales_tradicionales', 'cuentas_de_ahorro',\n",
" 'depositos_a_plazo', 'tarjetas_de_debito', 'tarjetas_de_credito', 'credito_hipotecario',\n",
" 'credito_grupal', 'credito_personal', 'credito_nomina', 'credito_automotriz', 'credito_abcd',\n",
" 'credito_en_cajeros_automaticos', 'credito_en_tpv', 'sucursales_banca_comercial',\n",
" 'sucursales_banca_de_desarrollo', 'sucursales_socap', 'sucursales_sofipo', 'totalsucursales',\n",
" 'corresponsales', 'cajeros', 'terminales_punto_de_venta', 'establecimientos_con_tpv',\n",
"'contratos_que_utilizan_banca_movil', 'acreditados_pyme_2016_x_ent', 'acreditados_pyme_2017_x_ent',\n",
" 'acreditados_pyme_2018_x_ent', 'acreditados_pyme_2019_x_ent', 'cartera_total_pyme_2016_x_ent',\n",
" 'cartera_total_pyme_2017_x_ent', 'cartera_total_pyme_2018_x_ent', 'cartera_total_pyme_2019_x_ent',\n",
" 'cartera_vencida_pyme_2016_x_ent', 'cartera_vencida_pyme_2017_x_ent', 'cartera_vencida_pyme_2018_x_ent',\n",
" 'cartera_vencida_pyme_2019_x_ent', 'cartera_vigente_pyme_2016_x_ent', 'cartera_vigente_pyme_2017_x_ent',\n",
" 'cartera_vigente_pyme_2018_x_ent', 'cartera_vigente_pyme_2019_x_ent', 'creditos_pyme_2016_x_ent',\n",
" 'creditos_pyme_2017_x_ent', 'creditos_pyme_2018_x_ent', 'creditos_pyme_2019_x_ent', 'monto_dispuesto_pyme_2016_x_ent',\n",
" 'monto_dispuesto_pyme_2017_x_ent', 'monto_dispuesto_pyme_2018_x_ent', 'monto_dispuesto_pyme_2019_x_ent',\n",
" 'padron_sat_2010_ent', 'padron_sat_2011_ent', 'padron_sat_2012_ent', 'padron_sat_2013_ent', 'padron_sat_2014_ent',\n",
" 'padron_sat_2015_ent', 'padron_sat_2016_ent', 'padron_sat_2017_ent', 'padron_sat_2018_ent',\n",
" 'padron_sat_2019_ent', 'itae_2015', 'itae_2016', 'itae_2017', 'itae_2018', 'itae_2019',\n",
" 'homicidios_2013', 'homicidios_2014', 'homicidios_2015', 'homicidios_2016', 'homicidios_2017',\n",
" '2010_policia_tasa_2012', '2012_2010_policia_tasa_2014', '2014_2012_2010_policia_tasa_2016',\n",
" 'ministerios_tasa_2013', 'percep_insegur_2013', 'juez_hab_2014', 'ministerios_tasa_2014',\n",
" 'percep_insegur_2014', 'juez_hab_2015', 'ministerios_tasa_2015', 'percep_insegur_2015',\n",
" 'juez_hab_2016', 'ministerios_tasa_2016', 'percep_insegur_2016', 'juez_hab_2017', 'ministerios_tasa_2017',\n",
" 'percep_insegur_2017', 'percep_insegur_2018']\n",
"\n",
"numeric_features = ['antiguedad_meses']+vars_externas\n",
"\n",
"categorical_features = ['tipo_persona', 'nb_actividad', 'rango_ventas', 'tipo_de_poblacion']\n",
"\n",
"numeric_transformer = Pipeline(steps=[\n",
" ('imputer', SimpleImputer(strategy='median')),\n",
" ('scaler', StandardScaler())])\n",
"\n",
"\n",
"categorical_transformer = Pipeline(steps=[\n",
" ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),\n",
" ('onehot', OneHotEncoder(handle_unknown='ignore'))])\n",
"\n",
"preprocessor = ColumnTransformer(\n",
" transformers=[\n",
" ('num', numeric_transformer, numeric_features),\n",
" ('cat', categorical_transformer, categorical_features)])\n",
"\n",
"# Append classifier to preprocessing pipeline.\n",
"# Now we have a full prediction pipeline.\n",
"clf = Pipeline(steps=[('preprocessor', preprocessor),\n",
" ('classifier', RandomForestClassifier(n_estimators=100, max_depth=7, random_state=0))])\n"
],
"outputs": [],
"execution_count": 800,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"# LogisticRegression(solver='lbfgs')\n",
"#clf = RandomForestClassifier(n_estimators=100, max_depth=7, random_state=0)\n",
"\n",
"X = base_modelo[numeric_features+categorical_features]\n",
"y = base_modelo['oferta']\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
"\n",
"modelo1 = clf.fit(X_train, y_train)\n",
"\n",
"print(\"model score: %.3f\" % clf.score(X_test, y_test))\n",
"print(resultados_modelo(modelo1))"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"model score: 0.863\n",
"{'precision': 0.8655843800561205, 'recall': 0.8676996611715132, 'f1': 0.8630601092896174, 'confusion': array([[301, 26],\n",
" [ 72, 317]], dtype=int64), 'cohen_k': 0.7272465773659128, 'precision_0': 0.806970509383378, 'precision_1': 0.924198250728863}\n"
]
}
],
"execution_count": 801,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"#X_test, y_test = X, y\n",
"prob = pd.concat([pd.DataFrame(clf.predict_proba(X))\\\n",
" .rename(columns={1: 'prob'})[['prob']],\n",
" y.reset_index(),\n",
" pd.DataFrame(clf.predict(X), columns=['predict'])], axis=1)\\\n",
" .assign(error=lambda x: x['oferta'].ne(x['predict']).astype(int))"
],
"outputs": [],
"execution_count": 802,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"cutoff_1 = 0.4 # si el modelo 2 es bueno podríamos bajar más esta\n",
"cutoff_2 = 0.6\n",
"# No les di, pero debería haberles dado\n",
"fn = prob.sort_values('prob').query(f'prob<={cutoff_2}')['oferta'].sum()\n",
"# les di, pero no debí\n",
"fp = prob.query(f'prob>={cutoff_2} & oferta==0')['oferta'].count()\n",
"# Número de observaciones en zona gris\n",
"n_obs = prob.shape[0]\n",
"n_gris = prob.sort_values('prob').query(f'{cutoff_1}<=prob<={cutoff_2}')['oferta'].count()\n",
"error_rec = prob.sort_values('prob').query(f'{cutoff_1}<=prob<={cutoff_2}')['error'].sum()\n",
"print(f'FP:{fp}\\nFN:{fn} ({fn/prob[\"oferta\"].sum():0.1%})\\nTotal error: {fp+fn} ({(fp+fn)/n_obs:0.1%})'\n",
" f'\\nZona gris: {n_gris} ({n_gris/n_obs:0.1%})'\n",
" f'\\nError recuperable: {error_rec} ({error_rec/n_obs:0.1%})')"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"FP:5859\n",
"FN:275 (15.4%)\n",
"Total error: 6134 (12.4%)\n",
"Zona gris: 8981 (18.2%)\n",
"Error recuperable: 2954 (6.0%)\n"
]
}
],
"execution_count": 358,
"metadata": {
"scrolled": false
}
},
{
"cell_type": "code",
"source": [
"# Guarda el modelo\n",
"dump(clf, 'modelo/random_forest_oferta.joblib')"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 803,
"data": {
"text/plain": "['modelo/random_forest_oferta.joblib']"
},
"metadata": {}
}
],
"execution_count": 803,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"importances = clf.named_steps['classifier'].feature_importances_"
],
"outputs": [],
"execution_count": 414,
"metadata": {
"scrolled": true
}
},
{
"cell_type": "markdown",
"source": [
"# Modelo monto de crédito"
],
"metadata": {
"heading_collapsed": true
}
},
{
"cell_type": "code",
"source": [
"import xgboost as xgb\n",
"from sklearn.metrics import mean_squared_error"
],
"outputs": [],
"execution_count": 431,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"monto_credito = pd.read_csv('base_modelo.csv',\n",
" dtype={'cve_muni': str, 'scian': str, 'cve_ent': str})\\\n",
" .loc[lambda x: x['oferta_credito_en_2018'].notnull()]"
],
"outputs": [],
"execution_count": 427,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"X = monto_credito[numeric_features+categorical_features]\n",
"y = monto_credito['oferta_credito_en_2018']\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
"\n",
"clf_monto = Pipeline(steps=[('preprocessor', preprocessor),\n",
" ('regressor', xgb.XGBRegressor(colsample_bytree=0.4,\n",
" gamma=0, \n",
" learning_rate=0.07,\n",
" max_depth=4,\n",
" min_child_weight=1.5,\n",
" n_estimators=1000, \n",
" reg_alpha=0.75,\n",
" reg_lambda=0.45,\n",
" subsample=0.6,\n",
" seed=42))])\n",
"\n",
"modelo_monto = clf_monto.fit(X_train, y_train)"
],
"outputs": [],
"execution_count": 481,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"predict_monto = clf_monto.predict(X_test)\n",
"mean_squared_error(y_test, predict_monto)**(1/2)"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 482,
"data": {
"text/plain": "426208.54510890314"
},
"metadata": {}
}
],
"execution_count": 482,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"pd.DataFrame({'predict': predict_monto, 'y': y_test})\\\n",
" .assign(error=lambda x: x['y']-x['predict'])"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 483,
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>predict</th>\n <th>y</th>\n <th>error</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>28294</th>\n <td>6.469594e+05</td>\n <td>829000.0</td>\n <td>1.820406e+05</td>\n </tr>\n <tr>\n <th>2880</th>\n <td>2.999709e+05</td>\n <td>230000.0</td>\n <td>-6.997094e+04</td>\n </tr>\n <tr>\n <th>37208</th>\n <td>5.582744e+05</td>\n <td>1053000.0</td>\n <td>4.947256e+05</td>\n </tr>\n <tr>\n <th>26399</th>\n <td>1.793602e+05</td>\n <td>68000.0</td>\n <td>-1.113602e+05</td>\n </tr>\n <tr>\n <th>28139</th>\n <td>1.848976e+05</td>\n <td>410000.0</td>\n <td>2.251024e+05</td>\n </tr>\n <tr>\n <th>24740</th>\n <td>1.797679e+05</td>\n <td>277000.0</td>\n <td>9.723206e+04</td>\n </tr>\n <tr>\n <th>30773</th>\n <td>1.918836e+05</td>\n <td>191000.0</td>\n <td>-8.836094e+02</td>\n </tr>\n <tr>\n <th>48836</th>\n <td>2.459480e+05</td>\n <td>44000.0</td>\n <td>-2.019480e+05</td>\n </tr>\n <tr>\n <th>33575</th>\n <td>2.579890e+05</td>\n <td>194000.0</td>\n <td>-6.398902e+04</td>\n </tr>\n <tr>\n <th>17747</th>\n <td>3.077202e+05</td>\n <td>96000.0</td>\n <td>-2.117202e+05</td>\n </tr>\n <tr>\n <th>8033</th>\n <td>2.786577e+05</td>\n <td>300000.0</td>\n <td>2.134231e+04</td>\n </tr>\n <tr>\n <th>19947</th>\n <td>2.598304e+05</td>\n <td>208000.0</td>\n <td>-5.183044e+04</td>\n </tr>\n <tr>\n <th>14517</th>\n <td>3.641021e+05</td>\n <td>279000.0</td>\n <td>-8.510206e+04</td>\n </tr>\n <tr>\n <th>2300</th>\n <td>1.933479e+05</td>\n <td>300000.0</td>\n <td>1.066521e+05</td>\n </tr>\n <tr>\n <th>32354</th>\n <td>2.276155e+05</td>\n <td>242000.0</td>\n <td>1.438455e+04</td>\n </tr>\n <tr>\n <th>25063</th>\n <td>5.030112e+05</td>\n <td>401000.0</td>\n <td>-1.020112e+05</td>\n </tr>\n <tr>\n <th>10249</th>\n <td>5.509758e+05</td>\n <td>753000.0</td>\n <td>2.020242e+05</td>\n </tr>\n <tr>\n <th>27067</th>\n <td>3.800398e+05</td>\n <td>300000.0</td>\n <td>-8.003975e+04</td>\n </tr>\n <tr>\n <th>17257</th>\n <td>3.376987e+05</td>\n <td>159000.0</td>\n <td>-1.786987e+05</td>\n </tr>\n <tr>\n <th>37320</th>\n <td>1.592139e+05</td>\n <td>300000.0</td>\n <td>1.407861e+05</td>\n </tr>\n <tr>\n <th>32137</th>\n <td>3.319711e+05</td>\n <td>225000.0</td>\n <td>-1.069711e+05</td>\n </tr>\n <tr>\n <th>26674</th>\n <td>5.307900e+05</td>\n <td>502000.0</td>\n <td>-2.879000e+04</td>\n </tr>\n <tr>\n <th>8883</th>\n <td>3.118130e+05</td>\n <td>300000.0</td>\n <td>-1.181297e+04</td>\n </tr>\n <tr>\n <th>4971</th>\n <td>2.704140e+05</td>\n <td>229000.0</td>\n <td>-4.141400e+04</td>\n </tr>\n <tr>\n <th>22640</th>\n <td>5.254141e+05</td>\n <td>300000.0</td>\n <td>-2.254141e+05</td>\n </tr>\n <tr>\n <th>26896</th>\n <td>2.854698e+05</td>\n <td>280000.0</td>\n <td>-5.469781e+03</td>\n </tr>\n <tr>\n <th>20190</th>\n <td>5.135508e+05</td>\n <td>454000.0</td>\n <td>-5.955075e+04</td>\n </tr>\n <tr>\n <th>45110</th>\n <td>7.120031e+04</td>\n <td>158000.0</td>\n <td>8.679969e+04</td>\n </tr>\n <tr>\n <th>47222</th>\n <td>7.674050e+05</td>\n <td>1393000.0</td>\n <td>6.255950e+05</td>\n </tr>\n <tr>\n <th>33114</th>\n <td>8.127677e+04</td>\n <td>205000.0</td>\n <td>1.237232e+05</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>38636</th>\n <td>3.308949e+05</td>\n <td>295000.0</td>\n <td>-3.589491e+04</td>\n </tr>\n <tr>\n <th>1715</th>\n <td>3.611993e+05</td>\n <td>300000.0</td>\n <td>-6.119934e+04</td>\n </tr>\n <tr>\n <th>21725</th>\n <td>3.393563e+05</td>\n <td>201000.0</td>\n <td>-1.383563e+05</td>\n </tr>\n <tr>\n <th>5363</th>\n <td>2.933535e+05</td>\n <td>470000.0</td>\n <td>1.766465e+05</td>\n </tr>\n <tr>\n <th>41307</th>\n <td>2.927023e+05</td>\n <td>263000.0</td>\n <td>-2.970231e+04</td>\n </tr>\n <tr>\n <th>37143</th>\n <td>2.205328e+05</td>\n <td>300000.0</td>\n <td>7.946719e+04</td>\n </tr>\n <tr>\n <th>5462</th>\n <td>2.741175e+05</td>\n <td>108000.0</td>\n <td>-1.661175e+05</td>\n </tr>\n <tr>\n <th>11186</th>\n <td>3.597577e+05</td>\n <td>290000.0</td>\n <td>-6.975769e+04</td>\n </tr>\n <tr>\n <th>830</th>\n <td>3.427161e+05</td>\n <td>300000.0</td>\n <td>-4.271612e+04</td>\n </tr>\n <tr>\n <th>4061</th>\n <td>2.049018e+05</td>\n <td>248000.0</td>\n <td>4.309817e+04</td>\n </tr>\n <tr>\n <th>32620</th>\n <td>4.135647e+05</td>\n <td>677000.0</td>\n <td>2.634353e+05</td>\n </tr>\n <tr>\n <th>14642</th>\n <td>3.398079e+05</td>\n <td>253000.0</td>\n <td>-8.680791e+04</td>\n </tr>\n <tr>\n <th>20469</th>\n <td>2.944676e+05</td>\n <td>300000.0</td>\n <td>5.532438e+03</td>\n </tr>\n <tr>\n <th>11297</th>\n <td>1.536740e+05</td>\n <td>209000.0</td>\n <td>5.532600e+04</td>\n </tr>\n <tr>\n <th>26970</th>\n <td>4.147671e+05</td>\n <td>387000.0</td>\n <td>-2.776712e+04</td>\n </tr>\n <tr>\n <th>23190</th>\n <td>3.834831e+05</td>\n <td>252000.0</td>\n <td>-1.314831e+05</td>\n </tr>\n <tr>\n <th>12</th>\n <td>2.365862e+05</td>\n <td>183000.0</td>\n <td>-5.358619e+04</td>\n </tr>\n <tr>\n <th>4774</th>\n <td>2.690958e+05</td>\n <td>300000.0</td>\n <td>3.090422e+04</td>\n </tr>\n <tr>\n <th>38706</th>\n <td>1.651462e+06</td>\n <td>3784000.0</td>\n <td>2.132538e+06</td>\n </tr>\n <tr>\n <th>32311</th>\n <td>3.506942e+05</td>\n <td>206000.0</td>\n <td>-1.446942e+05</td>\n </tr>\n <tr>\n <th>11628</th>\n <td>2.259772e+05</td>\n <td>300000.0</td>\n <td>7.402280e+04</td>\n </tr>\n <tr>\n <th>30916</th>\n <td>2.479177e+05</td>\n <td>271000.0</td>\n <td>2.308233e+04</td>\n </tr>\n <tr>\n <th>12684</th>\n <td>3.058095e+04</td>\n <td>265000.0</td>\n <td>2.344191e+05</td>\n </tr>\n <tr>\n <th>159</th>\n <td>1.269404e+05</td>\n <td>172000.0</td>\n <td>4.505959e+04</td>\n </tr>\n <tr>\n <th>4968</th>\n <td>2.719720e+05</td>\n <td>500000.0</td>\n <td>2.280280e+05</td>\n </tr>\n <tr>\n <th>9549</th>\n <td>2.106300e+05</td>\n <td>199000.0</td>\n <td>-1.162995e+04</td>\n </tr>\n <tr>\n <th>22910</th>\n <td>1.863523e+05</td>\n <td>419000.0</td>\n <td>2.326477e+05</td>\n </tr>\n <tr>\n <th>37214</th>\n <td>2.004230e+05</td>\n <td>118400.0</td>\n <td>-8.202305e+04</td>\n </tr>\n <tr>\n <th>28486</th>\n <td>2.827678e+05</td>\n <td>228000.0</td>\n <td>-5.476775e+04</td>\n </tr>\n <tr>\n <th>17758</th>\n <td>2.464092e+05</td>\n <td>120800.0</td>\n <td>-1.256092e+05</td>\n </tr>\n </tbody>\n</table>\n<p>358 rows × 3 columns</p>\n</div>",
"text/plain": " predict y error\n28294 6.469594e+05 829000.0 1.820406e+05\n2880 2.999709e+05 230000.0 -6.997094e+04\n37208 5.582744e+05 1053000.0 4.947256e+05\n26399 1.793602e+05 68000.0 -1.113602e+05\n28139 1.848976e+05 410000.0 2.251024e+05\n24740 1.797679e+05 277000.0 9.723206e+04\n30773 1.918836e+05 191000.0 -8.836094e+02\n48836 2.459480e+05 44000.0 -2.019480e+05\n33575 2.579890e+05 194000.0 -6.398902e+04\n17747 3.077202e+05 96000.0 -2.117202e+05\n8033 2.786577e+05 300000.0 2.134231e+04\n19947 2.598304e+05 208000.0 -5.183044e+04\n14517 3.641021e+05 279000.0 -8.510206e+04\n2300 1.933479e+05 300000.0 1.066521e+05\n32354 2.276155e+05 242000.0 1.438455e+04\n25063 5.030112e+05 401000.0 -1.020112e+05\n10249 5.509758e+05 753000.0 2.020242e+05\n27067 3.800398e+05 300000.0 -8.003975e+04\n17257 3.376987e+05 159000.0 -1.786987e+05\n37320 1.592139e+05 300000.0 1.407861e+05\n32137 3.319711e+05 225000.0 -1.069711e+05\n26674 5.307900e+05 502000.0 -2.879000e+04\n8883 3.118130e+05 300000.0 -1.181297e+04\n4971 2.704140e+05 229000.0 -4.141400e+04\n22640 5.254141e+05 300000.0 -2.254141e+05\n26896 2.854698e+05 280000.0 -5.469781e+03\n20190 5.135508e+05 454000.0 -5.955075e+04\n45110 7.120031e+04 158000.0 8.679969e+04\n47222 7.674050e+05 1393000.0 6.255950e+05\n33114 8.127677e+04 205000.0 1.237232e+05\n... ... ... ...\n38636 3.308949e+05 295000.0 -3.589491e+04\n1715 3.611993e+05 300000.0 -6.119934e+04\n21725 3.393563e+05 201000.0 -1.383563e+05\n5363 2.933535e+05 470000.0 1.766465e+05\n41307 2.927023e+05 263000.0 -2.970231e+04\n37143 2.205328e+05 300000.0 7.946719e+04\n5462 2.741175e+05 108000.0 -1.661175e+05\n11186 3.597577e+05 290000.0 -6.975769e+04\n830 3.427161e+05 300000.0 -4.271612e+04\n4061 2.049018e+05 248000.0 4.309817e+04\n32620 4.135647e+05 677000.0 2.634353e+05\n14642 3.398079e+05 253000.0 -8.680791e+04\n20469 2.944676e+05 300000.0 5.532438e+03\n11297 1.536740e+05 209000.0 5.532600e+04\n26970 4.147671e+05 387000.0 -2.776712e+04\n23190 3.834831e+05 252000.0 -1.314831e+05\n12 2.365862e+05 183000.0 -5.358619e+04\n4774 2.690958e+05 300000.0 3.090422e+04\n38706 1.651462e+06 3784000.0 2.132538e+06\n32311 3.506942e+05 206000.0 -1.446942e+05\n11628 2.259772e+05 300000.0 7.402280e+04\n30916 2.479177e+05 271000.0 2.308233e+04\n12684 3.058095e+04 265000.0 2.344191e+05\n159 1.269404e+05 172000.0 4.505959e+04\n4968 2.719720e+05 500000.0 2.280280e+05\n9549 2.106300e+05 199000.0 -1.162995e+04\n22910 1.863523e+05 419000.0 2.326477e+05\n37214 2.004230e+05 118400.0 -8.202305e+04\n28486 2.827678e+05 228000.0 -5.476775e+04\n17758 2.464092e+05 120800.0 -1.256092e+05\n\n[358 rows x 3 columns]"
},
"metadata": {}
}
],
"execution_count": 483,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "markdown",
"source": [
"# Modelo 2 de ofrecimiento"
],
"metadata": {
"heading_collapsed": true
}
},
{
"cell_type": "markdown",
"source": [
"## Incremento empresas DENUE"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"# Este No\n",
"score_denue = pd.read_csv('scores_modelo2/scoreAumentoEmpresasDENUE.csv',\n",
" dtype={'CVE_ENT': str, 'CVE_MUN': str, 'CVE_SCIAN': str})\\\n",
" .rename(columns={'CVE_ENT': 'cve_ent', 'CVE_MUN': 'cve_mun', 'CVE_SCIAN': 'scian',\n",
" 'scoreAumentoEmpresasDENUE': 'score_AumentoEmpresasDENUE'})\\\n",
" .assign(cve_mun=lambda x:x['cve_mun'].str.zfill(5),\n",
" cve_ent=lambda x:x['cve_ent'].str.zfill(2))\n",
"score_denue.head()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 509,
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>cve_ent</th>\n <th>cve_mun</th>\n <th>score_AumentoEmpresasDENUE</th>\n <th>scian</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>01</td>\n <td>01001</td>\n <td>3.0</td>\n <td>11</td>\n </tr>\n <tr>\n <th>1</th>\n <td>01</td>\n <td>01001</td>\n <td>2.0</td>\n <td>21</td>\n </tr>\n <tr>\n <th>2</th>\n <td>01</td>\n <td>01001</td>\n <td>1.0</td>\n <td>22</td>\n </tr>\n <tr>\n <th>3</th>\n <td>01</td>\n <td>01001</td>\n <td>2.0</td>\n <td>23</td>\n </tr>\n <tr>\n <th>4</th>\n <td>01</td>\n <td>01001</td>\n <td>2.0</td>\n <td>31</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " cve_ent cve_mun score_AumentoEmpresasDENUE scian\n0 01 01001 3.0 11\n1 01 01001 2.0 21\n2 01 01001 1.0 22\n3 01 01001 2.0 23\n4 01 01001 2.0 31"
},
"metadata": {}
}
],
"execution_count": 509,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "markdown",
"source": [
"## Crecimiento Variables IMSS"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"cve_mun = '01001'\n",
"cve_ent = cve_mun[0:2]\n",
"scian = '11'"
],
"outputs": [],
"execution_count": 613,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"score_imss = pd.read_csv('scores_modelo2/scoreVariablesIMSS.csv',\n",
" dtype={'cve_mun': str, 'scian': str})\\\n",
" .rename(columns={'scoreCrecimientoNumeroAsegurados': 'score_CrecimientoNumeroAsegurados',\n",
" 'scoreCrecimientoSalario': 'score_CrecimientoSalario'})\\\n",
" .assign(cve_mun=lambda x:x['cve_mun'].str.zfill(5))\\\n",
" [['cve_mun', 'scian', 'score_CrecimientoNumeroAsegurados', 'score_CrecimientoSalario']]\\\n",
" .sort_values(['cve_mun', 'scian'])\\\n",
" .set_index(['cve_mun', 'scian'])\n",
"\n",
"score_imss\n",
"\n",
"score_imss['score_CrecimientoNumeroAsegurados'].get((cve_mun, scian), pd.Series([0])).mean()\n",
"score_imss['score_CrecimientoSalario'].get((cve_mun, scian), pd.Series([1])).mean()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 617,
"data": {
"text/plain": "2.0"
},
"metadata": {}
}
],
"execution_count": 617,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## Crecimiento empleo"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"score_empleo = pd.read_csv('scores_modelo2/score_CrecimientoEmpleo.csv', \n",
" dtype={'cve_ent': str, 'scian': str})\\\n",
" [['cve_ent', 'scian', 'score_CrecimientoEmpleo']]\\\n",
" .set_index(['cve_ent', 'scian'])\n",
"score_empleo.head()\n",
"score_empleo['score_CrecimientoEmpleo'].get((cve_ent, scian), pd.Series([1])).mean()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 622,
"data": {
"text/plain": "1.0"
},
"metadata": {}
}
],
"execution_count": 622,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## Empadronados SAT"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"score_sat_emp_ent = pd.read_csv('scores_modelo2/empadronadosSAT.csv',\n",
" dtype={'CVE_ENT': str})\\\n",
" .rename(columns={'CVE_ENT': 'cve_ent',\n",
" 'scoreEmpadronadosSAT': 'score_EmpadronadosSAT'})\\\n",
" .assign(cve_ent=lambda x:x['cve_ent'].str.zfill(2))\\\n",
" [['cve_ent', 'score_EmpadronadosSAT']]\\\n",
" .set_index(['cve_ent'])\n",
"\n",
"score_sat_emp_ent.head()\n",
"score_sat_emp_ent['score_EmpadronadosSAT'].get(cve_ent, pd.Series([1])).mean()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 626,
"data": {
"text/plain": "3.0"
},
"metadata": {}
}
],
"execution_count": 626,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## Crecimiento Empadronados Sat por entidad"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"score_sat_ent = pd.read_csv('scores_modelo2/crecEmpadronadosSAT.csv',\n",
" dtype={'CVE_ENT': str})\\\n",
" .rename(columns={'CVE_ENT': 'cve_ent',\n",
" 'scoreCrecEmpadronadosSAT': 'score_CrecEmpadronadosSAT'})\\\n",
" .assign(cve_ent=lambda x:x['cve_ent'].str.zfill(2))\\\n",
" [['cve_ent', 'score_CrecEmpadronadosSAT']]\\\n",
" .set_index(['cve_ent'])\n",
"\n",
"score_sat_ent.head()\n",
"score_sat_ent['score_CrecEmpadronadosSAT'].get(cve_ent, pd.Series([1])).mean()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 629,
"data": {
"text/plain": "1.0"
},
"metadata": {}
}
],
"execution_count": 629,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## Crecimiento sector SAT"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"score_sat_sector = pd.read_csv('scores_modelo2/crecimientoSectorSAT.csv', \n",
" dtype={'CVE_SCIAN': str})\\\n",
" .rename(columns={'CVE_SCIAN': 'scian',\n",
" 'scoreCrecimientoSectorSAT': 'score_CrecimientoSectorSAT'})\\\n",
" [['scian', 'score_CrecimientoSectorSAT']]\\\n",
" .set_index(['scian'])\n",
" \n",
"\n",
"score_sat_sector.head()\n",
"score_sat_sector['score_CrecimientoSectorSAT'].get(scian, pd.Series([1])).mean()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 631,
"data": {
"text/plain": "3.0"
},
"metadata": {}
}
],
"execution_count": 631,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## Sucursales bancarias"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"score_sucursales = pd.read_csv('scores_modelo2/sucursalesBancarias.csv',\n",
" dtype={'cve_mun': str, 'cve_ent': str})\\\n",
" [['cve_mun', 'cve_ent', 'score_sucursalesBancarias']]\\\n",
" .set_index(['cve_mun'])\n",
"score_sucursales.head()\n",
"score_sucursales['score_sucursalesBancarias'].get(cve_mun, pd.Series([1])).mean()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 632,
"data": {
"text/plain": "3.0"
},
"metadata": {}
}
],
"execution_count": 632,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## Contratos de créditos"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"score_creditos = pd.read_csv('scores_modelo2/creditos.csv', dtype={'cve_mun': str, 'cve_ent': str})\\\n",
" [['cve_mun', 'cve_ent', 'score_creditos']]\\\n",
" .set_index('cve_mun')\n",
"\n",
"score_creditos.head()\n",
"score_creditos['score_creditos'].get(cve_mun, pd.Series([1])).mean()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 633,
"data": {
"text/plain": "3.0"
},
"metadata": {}
}
],
"execution_count": 633,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## Tarjetas de crédito"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"score_tcredit = pd.read_csv('scores_modelo2/tarjetasCredito.csv', dtype={'cve_mun': str, 'cve_ent': str})\\\n",
" [['cve_mun', 'cve_ent', 'score_tarjetasCredito']]\\\n",
" .set_index('cve_mun')\n",
"\n",
"score_tcredit.head()\n",
"score_tcredit['score_tarjetasCredito'].get(cve_mun, pd.Series([1])).mean()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 634,
"data": {
"text/plain": "3.0"
},
"metadata": {}
}
],
"execution_count": 634,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## Cuentas Transaccionales"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"score_ctransac = pd.read_csv('scores_modelo2/cuentasTransaccionales.csv', dtype={'cve_mun': str, 'cve_ent': str})\\\n",
" [['cve_mun', 'cve_ent', 'score_cuentasTransaccionales']]\\\n",
" .set_index('cve_mun')\n",
"\n",
"score_ctransac.head()\n",
"score_ctransac['score_cuentasTransaccionales'].get(cve_mun, pd.Series([1])).mean()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 635,
"data": {
"text/plain": "3.0"
},
"metadata": {}
}
],
"execution_count": 635,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## contratos Banca Movil"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"score_banca_movil = pd.read_csv('scores_modelo2/contratosBancaMovil.csv',\n",
" dtype={'cve_mun': str, 'cve_ent': str})\\\n",
" [['cve_mun', 'cve_ent', 'score_contratosBancaMovil']]\\\n",
" .set_index('cve_mun')\n",
"\n",
"score_banca_movil.head()\n",
"score_banca_movil['score_contratosBancaMovil'].get(cve_mun, pd.Series([1])).mean()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 636,
"data": {
"text/plain": "3.0"
},
"metadata": {}
}
],
"execution_count": 636,
"metadata": {
"hidden": true,
"scrolled": true
}
},
{
"cell_type": "markdown",
"source": [
"## Establecimientos con TPV"
],
"metadata": {
"heading_collapsed": true,
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"score_tpv = pd.read_csv('scores_modelo2/establecimientosConTPV.csv', dtype={'cve_mun': str, 'cve_ent': str})\\\n",
" [['cve_mun', 'cve_ent', 'score_establecimientosConTPV']]\\\n",
" .set_index('cve_mun')\n",
"\n",
"score_tpv.head()\n",
"score_tpv['score_establecimientosConTPV'].get(cve_mun, pd.Series([1])).mean()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 637,
"data": {
"text/plain": "3.0"
},
"metadata": {}
}
],
"execution_count": 637,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## Score variables de seguridad"
],
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"score_seguridad = pd.read_csv('scores_modelo2/Scores_variables_seguridad.csv',\n",
" dtype={'cve_mun': str, 'cve_ent': str})\\\n",
" .query('cve_ent!=\"00\"')\\\n",
" .set_index('cve_mun')\n",
"\n",
"score_seguridad['score_homicidios'].get(cve_mun, pd.Series([1])).mean()\n",
"score_seguridad['score_tasaPolicia'].get(cve_mun, pd.Series([1])).mean()\n",
"score_seguridad['score_percepInseguridad'].get(cve_mun, pd.Series([1])).mean()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 642,
"data": {
"text/plain": "3.0"
},
"metadata": {}
}
],
"execution_count": 642,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"## Score final"
],
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"ponderadores = {\n",
" 'crec_produccion': 0.1,\n",
" 'crec_pocup': 0.1,\n",
" 'gasto_servicios': 0.1,\n",
" 'gasto_publicidad': 0.1,\n",
" 'tasa_homicidio': 0.1,\n",
" 'tasa_policias': 0.1,\n",
" 'percep_insegur': 0.1,\n",
" 'crec_padron_sat_ent': 0.03,\n",
" 'crec_padron_sat_sector': 0.03,\n",
" 'crec_empleo': 0.03,\n",
" 'crec_empleo_imss': 0.03,\n",
" 'crec_salario_imss': 0.03,\n",
" 'banca_movil': 0.03,\n",
" 'cuentas_transac': 0.03,\n",
" 'tarjetas_credito': 0.03,\n",
" 'sucursales_bank': 0.03,\n",
" 'estab_tpv': 0.03\n",
" }\n",
"seleccion_empresa = {\n",
" 'crec_produccion': {'Nueva empresa (menor a un año)': 1,\n",
" 'Entre 0 y 10% de crecimiento anual': 2,\n",
" 'Más del 10% de crecimiento anual': 3},\n",
" 'crec_pocup': {'Nueva empresa (menor a un año)': 1,\n",
" 'Te mantienes con los mismos empleados': 1,\n",
" 'Ha crecido el número de empleados entre 1% y 30%': 2,\n",
" 'Ha crecido el número de empleados mas del 30%': 3},\n",
" 'gasto_servicios': {'Entre 0 y 5%': 3,\n",
" 'Entre 5 y 20%': 2,\n",
" 'Mayor al 20%': 1},\n",
" 'gasto_publicidad': {'Inviertes un poco en publicidad tradicional y/o RRSS': 1,\n",
" 'Inviertes una buena cantidad en publicidad tradicional y/o RRSS': 2,\n",
" 'Inviertes mucho en publicidad tradicional y/o RRSS y cuentas con una comunidad y marca reconocida': 3,\n",
" }\n",
"}"
],
"outputs": [],
"execution_count": 772,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"def get_puntaje_2(scian, cve_mun, empresa_crec_produccion,\n",
" empresa_crec_pocup, empresa_gasto_servicios,\n",
" empresa_gasto_publicidad):\n",
" cve_ent = cve_mun[0:2]\n",
" valores = {\n",
" 'crec_produccion': empresa_crec_produccion,\n",
" 'crec_pocup': empresa_crec_pocup,\n",
" 'gasto_servicios': empresa_gasto_servicios,\n",
" 'gasto_publicidad': empresa_gasto_publicidad,\n",
" 'tasa_homicidio': score_seguridad['score_homicidios'].get(cve_mun, pd.Series([1])).mean(),\n",
" 'tasa_policias': score_seguridad['score_tasaPolicia'].get(cve_mun, pd.Series([1])).mean(),\n",
" 'percep_insegur': score_seguridad['score_percepInseguridad'].get(cve_mun, pd.Series([1])).mean(),\n",
" 'crec_padron_sat_ent': score_sat_ent['score_CrecEmpadronadosSAT'].get(cve_ent, pd.Series([1])).mean(),\n",
" 'crec_padron_sat_sector': score_sat_sector['score_CrecimientoSectorSAT'].get(scian, pd.Series([1])).mean(),\n",
" 'crec_empleo': score_empleo['score_CrecimientoEmpleo'].get((cve_ent, scian), pd.Series([1])).mean(),\n",
" 'crec_empleo_imss': score_imss['score_CrecimientoNumeroAsegurados'].get((cve_mun, scian), pd.Series([0])).mean(),\n",
" 'crec_salario_imss': score_imss['score_CrecimientoSalario'].get((cve_mun, scian), pd.Series([0])).mean(),\n",
" 'banca_movil': score_banca_movil['score_contratosBancaMovil'].get(cve_mun, pd.Series([1])).mean(),\n",
" 'cuentas_transac': score_ctransac['score_cuentasTransaccionales'].get(cve_mun, pd.Series([1])).mean(),\n",
" 'tarjetas_credito': score_tcredit['score_tarjetasCredito'].get(cve_mun, pd.Series([1])).mean(),\n",
" 'sucursales_bank': score_sucursales['score_sucursalesBancarias'].get(cve_mun, pd.Series([1])).mean(),\n",
" 'estab_tpv': score_tpv['score_establecimientosConTPV'].get(cve_mun, pd.Series([1])).mean()\n",
" }\n",
"\n",
" puntaje2 = ((sum([v*valores[k] for k, v in ponderadores.items()])-1)/2)*0.2\n",
" return puntaje2"
],
"outputs": [],
"execution_count": 742,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [],
"outputs": [],
"execution_count": null,
"metadata": {
"hidden": true
}
},
{
"cell_type": "markdown",
"source": [
"# Inputs usuario"
],
"metadata": {}
},
{
"cell_type": "code",
"source": [
"dicc_municipios = demog.set_index('municipio')['cve_mun'].to_dict()\n",
"opciones_tipo_poblacion = demog.tipo_de_poblacion.unique().tolist()\n",
"opciones_tipo_personas = base_modelo.tipo_persona.unique().tolist()\n",
"opciones_nb_actividad = list(dicc_sector_scian.keys())\n",
"opciones_rango_ventas = base_modelo.rango_ventas.unique().tolist()\n",
"opciones_municipio = list(dicc_municipios.keys())\n",
"\n",
"\n",
"monto_maximo = [round(0.2*1000000*int(x[-3:-1])) for x in opciones_rango_ventas]\n",
"\n",
"\n",
"# CONVERSIÓN\n",
"\n",
"def get_puntaje_1(input_antiguedad, input_municipio, input_tipo_persona, input_rango_ventas, input_nb_actividad ):\n",
" scian = dicc_sector_scian[opciones_nb_actividad[input_nb_actividad]]\n",
" cve_mun = dicc_municipios[opciones_municipio[input_municipio]]\n",
" tipo_persona = opciones_tipo_personas[input_tipo_persona]\n",
" rango_ventas = opciones_rango_ventas[input_tipo_persona]\n",
" nb_actividad = opciones_nb_actividad[input_nb_actividad]\n",
"\n",
" # VECTOR CON VALORES\n",
" df_input = base_modelo\\\n",
" .query(f'scian==\"{scian}\" & cve_mun==\"{cve_mun}\"')\\\n",
" [vars_externas+['tipo_de_poblacion']].drop_duplicates()\n",
" df_input['antiguedad_meses'] = input_antiguedad\n",
" df_input['tipo_persona'] = tipo_persona\n",
" df_input['nb_actividad'] = nb_actividad\n",
" df_input['rango_ventas'] = rango_ventas\n",
" \n",
" predicho = clf.predict_proba(df_input)[0][1]\n",
" return predicho\n"
],
"outputs": [],
"execution_count": 824,
"metadata": {}
},
{
"cell_type": "code",
"source": [
"predicho = get_puntaje_1(input_antiguedad = 24, input_municipio = 0, \n",
" input_tipo_persona = 1, input_rango_ventas = 4, input_nb_actividad = 2)\n",
"\n",
"if predicho>cutoff_2:\n",
" print('Puedes acceder a crédito, hasta un máximo de', monto_maximo[input_rango_ventas])\n",
" print(round(predicho, 3))\n",
"elif predicho<cutoff_1:\n",
" print('Lo sentimos, Su oferta no fue aprobada')\n",
" print(round(predicho, 3))\n",
"else:\n",
" print('Por favor, ayúdanos con otros datos adicionales', print(round(predicho, 3)))\n",
" empresa_crec_produccion = seleccion_empresa['crec_produccion']['Más del 10% de crecimiento anual']\n",
" empresa_crec_pocup = seleccion_empresa['crec_pocup']['Ha crecido el número de empleados mas del 30%']\n",
" empresa_gasto_servicios = seleccion_empresa['gasto_servicios']['Entre 0 y 5%']\n",
" empresa_gasto_publicidad = seleccion_empresa['gasto_publicidad']['Inviertes mucho en publicidad tradicional y/o RRSS y cuentas con una comunidad y marca reconocida']\n",
" \n",
" puntaje_adicional = get_puntaje_2(scian, cve_mun, empresa_crec_produccion,\n",
" empresa_crec_pocup, empresa_gasto_servicios,\n",
" empresa_gasto_publicidad)\n",
" puntaje_final = predicho+puntaje_adicional\n",
" if puntaje_final>cutoff_2:\n",
" print('Puedes acceder a crédito, hasta un máximo de', monto_maximo[input_rango_ventas])\n",
" print(round(puntaje_final, 3))\n",
" else:\n",
" print('Lo sentimos, Su oferta no fue aprobada. Puntaje insuficiente: ', round(puntaje_final, 3) )"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0.464\n",
"Por favor, ayúdanos con otros datos adicionales None\n",
"Puedes acceder a crédito, hasta un máximo de 1600000\n",
"0.616\n"
]
}
],
"execution_count": 825,
"metadata": {}
},
{
"cell_type": "code",
"source": [],
"outputs": [],
"execution_count": null,
"metadata": {}
},
{
"cell_type": "markdown",
"source": [
"# Modelo con datos de prueba"
],
"metadata": {
"heading_collapsed": true
}
},
{
"cell_type": "code",
"source": [
"dicc_cliente_municipio = df.set_index('cliente')['cve_mun'].to_dict()"
],
"outputs": [],
"execution_count": 784,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"base_test = pd.read_excel('BASEPYME_USUARIOFINAL.xlsx',\n",
" dtype={'CD_POSTAL': str, 'codgeo': str})\\\n",
" .assign(cve_mun=lambda x: x['CLIENTE'].map(dicc_cliente_municipio))\\\n",
" .rename(columns=normaliza)\\\n",
" .rename(columns={'codgeo': 'cve_mun'})\\\n",
" .assign(cve_ent=lambda x: x['cve_mun'].str[0:2],\n",
" nacimiento=lambda x: x['fh_nacimiento'].pipe(pd.to_datetime, errors='coerce'),\n",
" antiguedad_banco=lambda x: x['fh_antiguedad_con_banco'].pipe(pd.to_datetime, errors='coerce'),\n",
" nacimiento_meses=lambda x:(pd.datetime.today()-x['nacimiento']).dt.days.div(30),\n",
" antiguedad_meses=lambda x:(pd.datetime.today()-x['antiguedad_banco']).dt.days.div(30),\n",
" mora=lambda x: x['moratarj_tarjeta_pyme'].add(x['moracs_credito_pyme']).gt(0).astype(int),\n",
" oferta=lambda x: x['oferta_credito_en_2018'].notnull().astype(int),\n",
" scian=lambda x: x['nb_sector_n2'].map(dicc_sector_scian),\n",
" sector_imss1=lambda x: x['cliente'].map(dicc_cliente_imss))\n",
"\n",
"base_final = base_test.merge(demog, on='cve_mun', how='left')\n",
"base_final = base_final.merge(denue, on=['cve_mun', 'scian'], how='left')\n",
"base_final = base_final.merge(imss, on=['cve_mun', 'sector_imss1'], how='left')\n",
"base_final = base_final.merge(inclusion, on='cve_mun', how='left')\n",
"base_final = base_final.merge(cartera_ent, on='cve_ent', how='left')\n",
"base_final = base_final.merge(sat_ent, on='cve_ent', how='left')\n",
"base_final = base_final.merge(sat_sector, on='scian', how='left')\n",
"base_final = base_final.merge(itae, on='cve_ent', how='left')\n",
"base_final = base_final.merge(segur, on='cve_mun', how='left')"
],
"outputs": [],
"execution_count": 791,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"base_modelo = base_final.pipe(lambda x: pd.concat([x.loc[x.oferta.eq(1)],\n",
" x.sample(n=x.oferta.eq(1).sum(), random_state=12345)]))"
],
"outputs": [],
"execution_count": 792,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"# LogisticRegression(solver='lbfgs')\n",
"#clf = RandomForestClassifier(n_estimators=100, max_depth=7, random_state=0)\n",
"\n",
"X = base_modelo[numeric_features+categorical_features]\n",
"y = base_modelo['oferta']\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
"\n",
"modelo1 = clf.fit(X_train, y_train)\n",
"\n",
"print(\"model score: %.3f\" % clf.score(X_test, y_test))\n",
"print(resultados_modelo(modelo1))"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"model score: 0.852\n",
"{'precision': 0.8486917346686449, 'recall': 0.8536102752178547, 'f1': 0.8501001637984134, 'confusion': array([[271, 41],\n",
" [ 66, 343]], dtype=int64), 'cohen_k': 0.700563963033834, 'precision_0': 0.8041543026706232, 'precision_1': 0.8932291666666666}\n"
]
}
],
"execution_count": 793,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"prob = pd.concat([pd.DataFrame(clf.predict_proba(X))\\\n",
" .rename(columns={1: 'prob'})[['prob']],\n",
" y.reset_index(),\n",
" pd.DataFrame(clf.predict(X), columns=['predict'])], axis=1)\\\n",
" .assign(error=lambda x: x['oferta'].ne(x['predict']).astype(int))\n",
"\n",
"prob.head()"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 795,
"data": {
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>prob</th>\n <th>index</th>\n <th>oferta</th>\n <th>predict</th>\n <th>error</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>0.251980</td>\n <td>49</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n </tr>\n <tr>\n <th>1</th>\n <td>0.668538</td>\n <td>96</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>0.791692</td>\n <td>113</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>0.607739</td>\n <td>158</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>0.558732</td>\n <td>174</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " prob index oferta predict error\n0 0.251980 49 1 0 1\n1 0.668538 96 1 1 0\n2 0.791692 113 1 1 0\n3 0.607739 158 1 1 0\n4 0.558732 174 1 1 0"
},
"metadata": {}
}
],
"execution_count": 795,
"metadata": {
"hidden": true
}
},
{
"cell_type": "code",
"source": [
"cutoff_1 = 0.4 # si el modelo 2 es bueno podríamos bajar más esta\n",
"cutoff_2 = 0.6\n",
"# No les di, pero debería haberles dado\n",
"fn = prob.sort_values('prob').query(f'prob<={cutoff_2}')['oferta'].sum()\n",
"# les di, pero no debí\n",
"fp = prob.query(f'prob>={cutoff_2} & oferta==0')['oferta'].count()\n",
"# Número de observaciones en zona gris\n",
"n_obs = prob.shape[0]\n",
"n_gris = prob.sort_values('prob').query(f'{cutoff_1}<=prob<={cutoff_2}')['oferta'].count()\n",
"error_rec = prob.sort_values('prob').query(f'{cutoff_1}<=prob<={cutoff_2}')['error'].sum()\n",
"print(f'FP:{fp}\\nFN:{fn} ({fn/prob[\"oferta\"].sum():0.1%})\\nTotal error: {fp+fn} ({(fp+fn)/n_obs:0.1%})'\n",
" f'\\nZona gris: {n_gris} ({n_gris/n_obs:0.1%})'\n",
" f'\\nError recuperable: {error_rec} ({error_rec/n_obs:0.1%})')"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"FP:174\n",
"FN:305 (16.3%)\n",
"Total error: 479 (13.3%)\n",
"Zona gris: 462 (12.8%)\n",
"Error recuperable: 171 (4.7%)\n"
]
}
],
"execution_count": 796,
"metadata": {
"hidden": true
}
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
},
"toc": {
"toc_position": {},
"skip_h1_title": false,
"number_sections": true,
"title_cell": "Table of Contents",
"toc_window_display": false,
"base_numbering": 1,
"toc_section_display": true,
"title_sidebar": "Contents",
"toc_cell": false,
"nav_menu": {},
"sideBar": true
},
"nteract": {
"version": "0.28.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment