Created
March 20, 2021 07:55
-
-
Save ashutoshsahu2015/79c208079ca7d2e9f6a8ae90ce5d8a16 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" <th>4</th>\n", | |
" <th>5</th>\n", | |
" <th>6</th>\n", | |
" <th>7</th>\n", | |
" <th>8</th>\n", | |
" <th>9</th>\n", | |
" <th>10</th>\n", | |
" <th>11</th>\n", | |
" <th>12</th>\n", | |
" <th>13</th>\n", | |
" <th>14</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>39</td>\n", | |
" <td>State-gov</td>\n", | |
" <td>77516</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>13</td>\n", | |
" <td>Never-married</td>\n", | |
" <td>Adm-clerical</td>\n", | |
" <td>Not-in-family</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>2174</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>United-States</td>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>50</td>\n", | |
" <td>Self-emp-not-inc</td>\n", | |
" <td>83311</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>13</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Exec-managerial</td>\n", | |
" <td>Husband</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>13</td>\n", | |
" <td>United-States</td>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>38</td>\n", | |
" <td>Private</td>\n", | |
" <td>215646</td>\n", | |
" <td>HS-grad</td>\n", | |
" <td>9</td>\n", | |
" <td>Divorced</td>\n", | |
" <td>Handlers-cleaners</td>\n", | |
" <td>Not-in-family</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>United-States</td>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>53</td>\n", | |
" <td>Private</td>\n", | |
" <td>234721</td>\n", | |
" <td>11th</td>\n", | |
" <td>7</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Handlers-cleaners</td>\n", | |
" <td>Husband</td>\n", | |
" <td>Black</td>\n", | |
" <td>Male</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>United-States</td>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>28</td>\n", | |
" <td>Private</td>\n", | |
" <td>338409</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>13</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Prof-specialty</td>\n", | |
" <td>Wife</td>\n", | |
" <td>Black</td>\n", | |
" <td>Female</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>40</td>\n", | |
" <td>Cuba</td>\n", | |
" <td><=50K</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 0 1 2 3 4 5 \\\n", | |
"0 39 State-gov 77516 Bachelors 13 Never-married \n", | |
"1 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse \n", | |
"2 38 Private 215646 HS-grad 9 Divorced \n", | |
"3 53 Private 234721 11th 7 Married-civ-spouse \n", | |
"4 28 Private 338409 Bachelors 13 Married-civ-spouse \n", | |
"\n", | |
" 6 7 8 9 10 11 12 \\\n", | |
"0 Adm-clerical Not-in-family White Male 2174 0 40 \n", | |
"1 Exec-managerial Husband White Male 0 0 13 \n", | |
"2 Handlers-cleaners Not-in-family White Male 0 0 40 \n", | |
"3 Handlers-cleaners Husband Black Male 0 0 40 \n", | |
"4 Prof-specialty Wife Black Female 0 0 40 \n", | |
"\n", | |
" 13 14 \n", | |
"0 United-States <=50K \n", | |
"1 United-States <=50K \n", | |
"2 United-States <=50K \n", | |
"3 United-States <=50K \n", | |
"4 Cuba <=50K " | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dataset = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data' , header = None,index_col=None) \n", | |
"dataset.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>1</th>\n", | |
" <th>3</th>\n", | |
" <th>5</th>\n", | |
" <th>6</th>\n", | |
" <th>7</th>\n", | |
" <th>8</th>\n", | |
" <th>9</th>\n", | |
" <th>13</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>State-gov</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>Never-married</td>\n", | |
" <td>Adm-clerical</td>\n", | |
" <td>Not-in-family</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Self-emp-not-inc</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Exec-managerial</td>\n", | |
" <td>Husband</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Private</td>\n", | |
" <td>HS-grad</td>\n", | |
" <td>Divorced</td>\n", | |
" <td>Handlers-cleaners</td>\n", | |
" <td>Not-in-family</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Private</td>\n", | |
" <td>11th</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Handlers-cleaners</td>\n", | |
" <td>Husband</td>\n", | |
" <td>Black</td>\n", | |
" <td>Male</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Private</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Prof-specialty</td>\n", | |
" <td>Wife</td>\n", | |
" <td>Black</td>\n", | |
" <td>Female</td>\n", | |
" <td>Cuba</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 1 3 5 6 \\\n", | |
"0 State-gov Bachelors Never-married Adm-clerical \n", | |
"1 Self-emp-not-inc Bachelors Married-civ-spouse Exec-managerial \n", | |
"2 Private HS-grad Divorced Handlers-cleaners \n", | |
"3 Private 11th Married-civ-spouse Handlers-cleaners \n", | |
"4 Private Bachelors Married-civ-spouse Prof-specialty \n", | |
"\n", | |
" 7 8 9 13 \n", | |
"0 Not-in-family White Male United-States \n", | |
"1 Husband White Male United-States \n", | |
"2 Not-in-family White Male United-States \n", | |
"3 Husband Black Male United-States \n", | |
"4 Wife Black Female Cuba " | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"columns=[1,3,5,6,7,8,9,13]\n", | |
"dataset[columns].head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Employment</th>\n", | |
" <th>Degree</th>\n", | |
" <th>Status</th>\n", | |
" <th>Designation</th>\n", | |
" <th>Family_job</th>\n", | |
" <th>Race</th>\n", | |
" <th>Sex</th>\n", | |
" <th>Country</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>State-gov</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>Never-married</td>\n", | |
" <td>Adm-clerical</td>\n", | |
" <td>Not-in-family</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Self-emp-not-inc</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Exec-managerial</td>\n", | |
" <td>Husband</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Private</td>\n", | |
" <td>HS-grad</td>\n", | |
" <td>Divorced</td>\n", | |
" <td>Handlers-cleaners</td>\n", | |
" <td>Not-in-family</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Private</td>\n", | |
" <td>11th</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Handlers-cleaners</td>\n", | |
" <td>Husband</td>\n", | |
" <td>Black</td>\n", | |
" <td>Male</td>\n", | |
" <td>United-States</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Private</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Prof-specialty</td>\n", | |
" <td>Wife</td>\n", | |
" <td>Black</td>\n", | |
" <td>Female</td>\n", | |
" <td>Cuba</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Employment Degree Status Designation \\\n", | |
"0 State-gov Bachelors Never-married Adm-clerical \n", | |
"1 Self-emp-not-inc Bachelors Married-civ-spouse Exec-managerial \n", | |
"2 Private HS-grad Divorced Handlers-cleaners \n", | |
"3 Private 11th Married-civ-spouse Handlers-cleaners \n", | |
"4 Private Bachelors Married-civ-spouse Prof-specialty \n", | |
"\n", | |
" Family_job Race Sex Country \n", | |
"0 Not-in-family White Male United-States \n", | |
"1 Husband White Male United-States \n", | |
"2 Not-in-family White Male United-States \n", | |
"3 Husband Black Male United-States \n", | |
"4 Wife Black Female Cuba " | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dataset=dataset[columns]\n", | |
"dataset.columns=['Employment','Degree','Status','Designation','Family_job','Race','Sex','Country']\n", | |
"dataset.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Employment : 9 labels\n", | |
"Degree : 16 labels\n", | |
"Status : 7 labels\n", | |
"Designation : 15 labels\n", | |
"Family_job : 6 labels\n", | |
"Race : 5 labels\n", | |
"Sex : 2 labels\n", | |
"Country : 42 labels\n" | |
] | |
} | |
], | |
"source": [ | |
"for col in dataset.columns[:]:\n", | |
" print(col,':',len(dataset[col].unique()),'labels')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"country_map=dataset['Country'].value_counts().to_dict()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Employment</th>\n", | |
" <th>Degree</th>\n", | |
" <th>Status</th>\n", | |
" <th>Designation</th>\n", | |
" <th>Family_job</th>\n", | |
" <th>Race</th>\n", | |
" <th>Sex</th>\n", | |
" <th>Country</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>State-gov</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>Never-married</td>\n", | |
" <td>Adm-clerical</td>\n", | |
" <td>Not-in-family</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>29170</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Self-emp-not-inc</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Exec-managerial</td>\n", | |
" <td>Husband</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>29170</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Private</td>\n", | |
" <td>HS-grad</td>\n", | |
" <td>Divorced</td>\n", | |
" <td>Handlers-cleaners</td>\n", | |
" <td>Not-in-family</td>\n", | |
" <td>White</td>\n", | |
" <td>Male</td>\n", | |
" <td>29170</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Private</td>\n", | |
" <td>11th</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Handlers-cleaners</td>\n", | |
" <td>Husband</td>\n", | |
" <td>Black</td>\n", | |
" <td>Male</td>\n", | |
" <td>29170</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Private</td>\n", | |
" <td>Bachelors</td>\n", | |
" <td>Married-civ-spouse</td>\n", | |
" <td>Prof-specialty</td>\n", | |
" <td>Wife</td>\n", | |
" <td>Black</td>\n", | |
" <td>Female</td>\n", | |
" <td>95</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Employment Degree Status Designation \\\n", | |
"0 State-gov Bachelors Never-married Adm-clerical \n", | |
"1 Self-emp-not-inc Bachelors Married-civ-spouse Exec-managerial \n", | |
"2 Private HS-grad Divorced Handlers-cleaners \n", | |
"3 Private 11th Married-civ-spouse Handlers-cleaners \n", | |
"4 Private Bachelors Married-civ-spouse Prof-specialty \n", | |
"\n", | |
" Family_job Race Sex Country \n", | |
"0 Not-in-family White Male 29170 \n", | |
"1 Husband White Male 29170 \n", | |
"2 Not-in-family White Male 29170 \n", | |
"3 Husband Black Male 29170 \n", | |
"4 Wife Black Female 95 " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dataset['Country']=dataset['Country'].map(country_map)\n", | |
"dataset.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment