Last active
November 17, 2022 21:44
-
-
Save taniki/3c6537e6f1f4988e5ef53db35901185a to your computer and use it in GitHub Desktop.
convertir le fichier FINESS entités juridiques dans un format csv propre
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "62e8f37e-bd33-4090-abb6-52501312b4ca", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "07371684-f1a2-4948-bb59-4fe0bbcc2431", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cols = [\n", | |
" 'section',\n", | |
" 'nofiness',\n", | |
" 'rs',\n", | |
" 'rslongue',\n", | |
" 'complrs',\n", | |
" 'numvoie',\n", | |
" 'typvoie',\n", | |
" 'voie',\n", | |
" 'compvoie',\n", | |
" 'compldistrib',\n", | |
" 'lieuditbp',\n", | |
" 'commune',\n", | |
" 'ligneacheminement',\n", | |
" 'departement',\n", | |
" 'libdepartement',\n", | |
" 'telephone',\n", | |
" 'statutjuridique',\n", | |
" 'libstatutjuridique',\n", | |
" 'categetab',\n", | |
" 'libcategetab',\n", | |
" 'siren',\n", | |
" 'codeape',\n", | |
" 'datecrea',\n", | |
"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "64e66a27-4a73-4f53-8d04-edf4d4d3223c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"raw = (\n", | |
" pd\n", | |
" .read_csv(\n", | |
" 'https://www.data.gouv.fr/fr/datasets/r/2cba77b2-f1de-4ef8-8428-bfe660e86844',\n", | |
" sep=';',\n", | |
" encoding='iso-8859-1',\n", | |
" header=None,\n", | |
" skiprows=1,\n", | |
" dtype='str',\n", | |
" names=cols\n", | |
" )\n", | |
" .drop(columns=['section'])\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "0298ce3d-6030-4d02-ac90-1a4c374d7bb7", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>nofiness</th>\n", | |
" <th>rs</th>\n", | |
" <th>rslongue</th>\n", | |
" <th>complrs</th>\n", | |
" <th>numvoie</th>\n", | |
" <th>typvoie</th>\n", | |
" <th>voie</th>\n", | |
" <th>compvoie</th>\n", | |
" <th>compldistrib</th>\n", | |
" <th>lieuditbp</th>\n", | |
" <th>...</th>\n", | |
" <th>departement</th>\n", | |
" <th>libdepartement</th>\n", | |
" <th>telephone</th>\n", | |
" <th>statutjuridique</th>\n", | |
" <th>libstatutjuridique</th>\n", | |
" <th>categetab</th>\n", | |
" <th>libcategetab</th>\n", | |
" <th>siren</th>\n", | |
" <th>codeape</th>\n", | |
" <th>datecrea</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>010000156</td>\n", | |
" <td>CLINIQUE CONVERT</td>\n", | |
" <td>CLINIQUE CONVERT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>62</td>\n", | |
" <td>AV</td>\n", | |
" <td>DE JASSERON</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>01</td>\n", | |
" <td>AIN</td>\n", | |
" <td>0428631234</td>\n", | |
" <td>73</td>\n", | |
" <td>Société Anonyme (S.A.)</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>772201489</td>\n", | |
" <td>8610Z</td>\n", | |
" <td>2001-01-01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>010000222</td>\n", | |
" <td>CLINIQUE DU SOUFFLE LE PONTET</td>\n", | |
" <td>CLINIQUE DU SOUFFLE LE PONTET</td>\n", | |
" <td>NaN</td>\n", | |
" <td>311</td>\n", | |
" <td>R</td>\n", | |
" <td>DE LA CHAPELLE</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>01</td>\n", | |
" <td>AIN</td>\n", | |
" <td>0474404949</td>\n", | |
" <td>95</td>\n", | |
" <td>Société par Actions Simplifiée (S.A.S.)</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>546120122</td>\n", | |
" <td>8610Z</td>\n", | |
" <td>1961-01-01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>010000255</td>\n", | |
" <td>ASS.ACCUEIL FORMA INSERT PERSON SOURD</td>\n", | |
" <td>ASSOCIATION POUR LA FORMATION ET L'INSERTION D...</td>\n", | |
" <td>ASS AFIS</td>\n", | |
" <td>5</td>\n", | |
" <td>R</td>\n", | |
" <td>DU LYCEE</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>01</td>\n", | |
" <td>AIN</td>\n", | |
" <td>0474226685</td>\n", | |
" <td>60</td>\n", | |
" <td>Association Loi 1901 non Reconnue d'Utilité Pu...</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>779309194</td>\n", | |
" <td>8710B</td>\n", | |
" <td>2001-01-01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>010000339</td>\n", | |
" <td>RESIDENCE FONTELUNE</td>\n", | |
" <td>RESIDENCE FONTELUNE</td>\n", | |
" <td>NaN</td>\n", | |
" <td>10</td>\n", | |
" <td>R</td>\n", | |
" <td>DE LA COMMUNE 1871</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>01</td>\n", | |
" <td>AIN</td>\n", | |
" <td>0474461420</td>\n", | |
" <td>21</td>\n", | |
" <td>Etablissement Social et Médico-Social Communal</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>260100011</td>\n", | |
" <td>8710A</td>\n", | |
" <td>2001-01-01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>010000347</td>\n", | |
" <td>RESIDENCE D'URFE</td>\n", | |
" <td>RESIDENCE D'URFE</td>\n", | |
" <td>NaN</td>\n", | |
" <td>74</td>\n", | |
" <td>R</td>\n", | |
" <td>CONDAMNALE</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>01</td>\n", | |
" <td>AIN</td>\n", | |
" <td>0385304221</td>\n", | |
" <td>21</td>\n", | |
" <td>Etablissement Social et Médico-Social Communal</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>260100029</td>\n", | |
" <td>8710A</td>\n", | |
" <td>2001-01-01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>51407</th>\n", | |
" <td>980502231</td>\n", | |
" <td>LIEU DE VIE MAECHA NA BARAKA</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>101</td>\n", | |
" <td>R</td>\n", | |
" <td>DE LA MATERNELLE GNAMBO</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>97650 BANDRABOUA</td>\n", | |
" <td>...</td>\n", | |
" <td>9F</td>\n", | |
" <td>MAYOTTE</td>\n", | |
" <td>NaN</td>\n", | |
" <td>60</td>\n", | |
" <td>Association Loi 1901 non Reconnue d'Utilité Pu...</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>850632860</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2019-01-07</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>51408</th>\n", | |
" <td>980502280</td>\n", | |
" <td>SAS MDZHADE</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>IMP</td>\n", | |
" <td>MAHARAJAH</td>\n", | |
" <td>NaN</td>\n", | |
" <td>CENTRE MAHARAJAH - BATIMENT H1</td>\n", | |
" <td>Z,I KAWENI</td>\n", | |
" <td>...</td>\n", | |
" <td>9F</td>\n", | |
" <td>MAYOTTE</td>\n", | |
" <td>0272741104</td>\n", | |
" <td>95</td>\n", | |
" <td>Société par Actions Simplifiée (S.A.S.)</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>899388896</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2021-05-18</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>51409</th>\n", | |
" <td>980502306</td>\n", | |
" <td>FRANCE ALZHEIMER MAYOTTE</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1221</td>\n", | |
" <td>AV</td>\n", | |
" <td>DU COLLEGE</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>...</td>\n", | |
" <td>9F</td>\n", | |
" <td>MAYOTTE</td>\n", | |
" <td>0269602892</td>\n", | |
" <td>60</td>\n", | |
" <td>Association Loi 1901 non Reconnue d'Utilité Pu...</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>822239802</td>\n", | |
" <td>8899B</td>\n", | |
" <td>2016-07-01</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>51410</th>\n", | |
" <td>980502447</td>\n", | |
" <td>LE CONFORT MEDICAL</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>100</td>\n", | |
" <td>RTE</td>\n", | |
" <td>DES BADAMIERS</td>\n", | |
" <td>C</td>\n", | |
" <td>NaN</td>\n", | |
" <td>LABATTOIR</td>\n", | |
" <td>...</td>\n", | |
" <td>9F</td>\n", | |
" <td>MAYOTTE</td>\n", | |
" <td>NaN</td>\n", | |
" <td>72</td>\n", | |
" <td>Société A Responsabilité Limitée (S.A.R.L.)</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2022-09-08</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>51411</th>\n", | |
" <td>980502462</td>\n", | |
" <td>PHARMACIE AL SHIFAA</td>\n", | |
" <td>PHARMACIE AL SHIFAA</td>\n", | |
" <td>NaN</td>\n", | |
" <td>54</td>\n", | |
" <td>R</td>\n", | |
" <td>Des Manguiers</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Tsoundzou 2</td>\n", | |
" <td>...</td>\n", | |
" <td>9F</td>\n", | |
" <td>MAYOTTE</td>\n", | |
" <td>NaN</td>\n", | |
" <td>78</td>\n", | |
" <td>Entreprise Unipersonnelle Responsabilité Limit...</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2022-07-25</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>51412 rows × 22 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" nofiness rs \\\n", | |
"0 010000156 CLINIQUE CONVERT \n", | |
"1 010000222 CLINIQUE DU SOUFFLE LE PONTET \n", | |
"2 010000255 ASS.ACCUEIL FORMA INSERT PERSON SOURD \n", | |
"3 010000339 RESIDENCE FONTELUNE \n", | |
"4 010000347 RESIDENCE D'URFE \n", | |
"... ... ... \n", | |
"51407 980502231 LIEU DE VIE MAECHA NA BARAKA \n", | |
"51408 980502280 SAS MDZHADE \n", | |
"51409 980502306 FRANCE ALZHEIMER MAYOTTE \n", | |
"51410 980502447 LE CONFORT MEDICAL \n", | |
"51411 980502462 PHARMACIE AL SHIFAA \n", | |
"\n", | |
" rslongue complrs numvoie \\\n", | |
"0 CLINIQUE CONVERT NaN 62 \n", | |
"1 CLINIQUE DU SOUFFLE LE PONTET NaN 311 \n", | |
"2 ASSOCIATION POUR LA FORMATION ET L'INSERTION D... ASS AFIS 5 \n", | |
"3 RESIDENCE FONTELUNE NaN 10 \n", | |
"4 RESIDENCE D'URFE NaN 74 \n", | |
"... ... ... ... \n", | |
"51407 NaN NaN 101 \n", | |
"51408 NaN NaN NaN \n", | |
"51409 NaN NaN 1221 \n", | |
"51410 NaN NaN 100 \n", | |
"51411 PHARMACIE AL SHIFAA NaN 54 \n", | |
"\n", | |
" typvoie voie compvoie \\\n", | |
"0 AV DE JASSERON NaN \n", | |
"1 R DE LA CHAPELLE NaN \n", | |
"2 R DU LYCEE NaN \n", | |
"3 R DE LA COMMUNE 1871 NaN \n", | |
"4 R CONDAMNALE NaN \n", | |
"... ... ... ... \n", | |
"51407 R DE LA MATERNELLE GNAMBO NaN \n", | |
"51408 IMP MAHARAJAH NaN \n", | |
"51409 AV DU COLLEGE NaN \n", | |
"51410 RTE DES BADAMIERS C \n", | |
"51411 R Des Manguiers NaN \n", | |
"\n", | |
" compldistrib lieuditbp ... departement \\\n", | |
"0 NaN NaN ... 01 \n", | |
"1 NaN NaN ... 01 \n", | |
"2 NaN NaN ... 01 \n", | |
"3 NaN NaN ... 01 \n", | |
"4 NaN NaN ... 01 \n", | |
"... ... ... ... ... \n", | |
"51407 NaN 97650 BANDRABOUA ... 9F \n", | |
"51408 CENTRE MAHARAJAH - BATIMENT H1 Z,I KAWENI ... 9F \n", | |
"51409 NaN NaN ... 9F \n", | |
"51410 NaN LABATTOIR ... 9F \n", | |
"51411 NaN Tsoundzou 2 ... 9F \n", | |
"\n", | |
" libdepartement telephone statutjuridique \\\n", | |
"0 AIN 0428631234 73 \n", | |
"1 AIN 0474404949 95 \n", | |
"2 AIN 0474226685 60 \n", | |
"3 AIN 0474461420 21 \n", | |
"4 AIN 0385304221 21 \n", | |
"... ... ... ... \n", | |
"51407 MAYOTTE NaN 60 \n", | |
"51408 MAYOTTE 0272741104 95 \n", | |
"51409 MAYOTTE 0269602892 60 \n", | |
"51410 MAYOTTE NaN 72 \n", | |
"51411 MAYOTTE NaN 78 \n", | |
"\n", | |
" libstatutjuridique categetab \\\n", | |
"0 Société Anonyme (S.A.) NaN \n", | |
"1 Société par Actions Simplifiée (S.A.S.) NaN \n", | |
"2 Association Loi 1901 non Reconnue d'Utilité Pu... NaN \n", | |
"3 Etablissement Social et Médico-Social Communal NaN \n", | |
"4 Etablissement Social et Médico-Social Communal NaN \n", | |
"... ... ... \n", | |
"51407 Association Loi 1901 non Reconnue d'Utilité Pu... NaN \n", | |
"51408 Société par Actions Simplifiée (S.A.S.) NaN \n", | |
"51409 Association Loi 1901 non Reconnue d'Utilité Pu... NaN \n", | |
"51410 Société A Responsabilité Limitée (S.A.R.L.) NaN \n", | |
"51411 Entreprise Unipersonnelle Responsabilité Limit... NaN \n", | |
"\n", | |
" libcategetab siren codeape datecrea \n", | |
"0 NaN 772201489 8610Z 2001-01-01 \n", | |
"1 NaN 546120122 8610Z 1961-01-01 \n", | |
"2 NaN 779309194 8710B 2001-01-01 \n", | |
"3 NaN 260100011 8710A 2001-01-01 \n", | |
"4 NaN 260100029 8710A 2001-01-01 \n", | |
"... ... ... ... ... \n", | |
"51407 NaN 850632860 NaN 2019-01-07 \n", | |
"51408 NaN 899388896 NaN 2021-05-18 \n", | |
"51409 NaN 822239802 8899B 2016-07-01 \n", | |
"51410 NaN NaN NaN 2022-09-08 \n", | |
"51411 NaN NaN NaN 2022-07-25 \n", | |
"\n", | |
"[51412 rows x 22 columns]" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(\n", | |
" raw\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "8dc5631d-755c-4a15-8042-ceecaa2c17bb", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"raw.to_csv('datasets/finess_ej.csv', index=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "d889e7b0-e61c-4dd4-ae7c-2e6127312367", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment