Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ashutoshsahu2015/79c208079ca7d2e9f6a8ae90ce5d8a16 to your computer and use it in GitHub Desktop.
Save ashutoshsahu2015/79c208079ca7d2e9f6a8ae90ce5d8a16 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>10</th>\n",
" <th>11</th>\n",
" <th>12</th>\n",
" <th>13</th>\n",
" <th>14</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>39</td>\n",
" <td>State-gov</td>\n",
" <td>77516</td>\n",
" <td>Bachelors</td>\n",
" <td>13</td>\n",
" <td>Never-married</td>\n",
" <td>Adm-clerical</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>2174</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" <td>&lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>50</td>\n",
" <td>Self-emp-not-inc</td>\n",
" <td>83311</td>\n",
" <td>Bachelors</td>\n",
" <td>13</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Exec-managerial</td>\n",
" <td>Husband</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>13</td>\n",
" <td>United-States</td>\n",
" <td>&lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>38</td>\n",
" <td>Private</td>\n",
" <td>215646</td>\n",
" <td>HS-grad</td>\n",
" <td>9</td>\n",
" <td>Divorced</td>\n",
" <td>Handlers-cleaners</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" <td>&lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>53</td>\n",
" <td>Private</td>\n",
" <td>234721</td>\n",
" <td>11th</td>\n",
" <td>7</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Handlers-cleaners</td>\n",
" <td>Husband</td>\n",
" <td>Black</td>\n",
" <td>Male</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>United-States</td>\n",
" <td>&lt;=50K</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>28</td>\n",
" <td>Private</td>\n",
" <td>338409</td>\n",
" <td>Bachelors</td>\n",
" <td>13</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Prof-specialty</td>\n",
" <td>Wife</td>\n",
" <td>Black</td>\n",
" <td>Female</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>40</td>\n",
" <td>Cuba</td>\n",
" <td>&lt;=50K</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 \\\n",
"0 39 State-gov 77516 Bachelors 13 Never-married \n",
"1 50 Self-emp-not-inc 83311 Bachelors 13 Married-civ-spouse \n",
"2 38 Private 215646 HS-grad 9 Divorced \n",
"3 53 Private 234721 11th 7 Married-civ-spouse \n",
"4 28 Private 338409 Bachelors 13 Married-civ-spouse \n",
"\n",
" 6 7 8 9 10 11 12 \\\n",
"0 Adm-clerical Not-in-family White Male 2174 0 40 \n",
"1 Exec-managerial Husband White Male 0 0 13 \n",
"2 Handlers-cleaners Not-in-family White Male 0 0 40 \n",
"3 Handlers-cleaners Husband Black Male 0 0 40 \n",
"4 Prof-specialty Wife Black Female 0 0 40 \n",
"\n",
" 13 14 \n",
"0 United-States <=50K \n",
"1 United-States <=50K \n",
"2 United-States <=50K \n",
"3 United-States <=50K \n",
"4 Cuba <=50K "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data' , header = None,index_col=None) \n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>1</th>\n",
" <th>3</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>13</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>State-gov</td>\n",
" <td>Bachelors</td>\n",
" <td>Never-married</td>\n",
" <td>Adm-clerical</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Self-emp-not-inc</td>\n",
" <td>Bachelors</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Exec-managerial</td>\n",
" <td>Husband</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Private</td>\n",
" <td>HS-grad</td>\n",
" <td>Divorced</td>\n",
" <td>Handlers-cleaners</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Private</td>\n",
" <td>11th</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Handlers-cleaners</td>\n",
" <td>Husband</td>\n",
" <td>Black</td>\n",
" <td>Male</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Private</td>\n",
" <td>Bachelors</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Prof-specialty</td>\n",
" <td>Wife</td>\n",
" <td>Black</td>\n",
" <td>Female</td>\n",
" <td>Cuba</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 1 3 5 6 \\\n",
"0 State-gov Bachelors Never-married Adm-clerical \n",
"1 Self-emp-not-inc Bachelors Married-civ-spouse Exec-managerial \n",
"2 Private HS-grad Divorced Handlers-cleaners \n",
"3 Private 11th Married-civ-spouse Handlers-cleaners \n",
"4 Private Bachelors Married-civ-spouse Prof-specialty \n",
"\n",
" 7 8 9 13 \n",
"0 Not-in-family White Male United-States \n",
"1 Husband White Male United-States \n",
"2 Not-in-family White Male United-States \n",
"3 Husband Black Male United-States \n",
"4 Wife Black Female Cuba "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"columns=[1,3,5,6,7,8,9,13]\n",
"dataset[columns].head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Employment</th>\n",
" <th>Degree</th>\n",
" <th>Status</th>\n",
" <th>Designation</th>\n",
" <th>Family_job</th>\n",
" <th>Race</th>\n",
" <th>Sex</th>\n",
" <th>Country</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>State-gov</td>\n",
" <td>Bachelors</td>\n",
" <td>Never-married</td>\n",
" <td>Adm-clerical</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Self-emp-not-inc</td>\n",
" <td>Bachelors</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Exec-managerial</td>\n",
" <td>Husband</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Private</td>\n",
" <td>HS-grad</td>\n",
" <td>Divorced</td>\n",
" <td>Handlers-cleaners</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Private</td>\n",
" <td>11th</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Handlers-cleaners</td>\n",
" <td>Husband</td>\n",
" <td>Black</td>\n",
" <td>Male</td>\n",
" <td>United-States</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Private</td>\n",
" <td>Bachelors</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Prof-specialty</td>\n",
" <td>Wife</td>\n",
" <td>Black</td>\n",
" <td>Female</td>\n",
" <td>Cuba</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Employment Degree Status Designation \\\n",
"0 State-gov Bachelors Never-married Adm-clerical \n",
"1 Self-emp-not-inc Bachelors Married-civ-spouse Exec-managerial \n",
"2 Private HS-grad Divorced Handlers-cleaners \n",
"3 Private 11th Married-civ-spouse Handlers-cleaners \n",
"4 Private Bachelors Married-civ-spouse Prof-specialty \n",
"\n",
" Family_job Race Sex Country \n",
"0 Not-in-family White Male United-States \n",
"1 Husband White Male United-States \n",
"2 Not-in-family White Male United-States \n",
"3 Husband Black Male United-States \n",
"4 Wife Black Female Cuba "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset=dataset[columns]\n",
"dataset.columns=['Employment','Degree','Status','Designation','Family_job','Race','Sex','Country']\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Employment : 9 labels\n",
"Degree : 16 labels\n",
"Status : 7 labels\n",
"Designation : 15 labels\n",
"Family_job : 6 labels\n",
"Race : 5 labels\n",
"Sex : 2 labels\n",
"Country : 42 labels\n"
]
}
],
"source": [
"for col in dataset.columns[:]:\n",
" print(col,':',len(dataset[col].unique()),'labels')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"country_map=dataset['Country'].value_counts().to_dict()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Employment</th>\n",
" <th>Degree</th>\n",
" <th>Status</th>\n",
" <th>Designation</th>\n",
" <th>Family_job</th>\n",
" <th>Race</th>\n",
" <th>Sex</th>\n",
" <th>Country</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>State-gov</td>\n",
" <td>Bachelors</td>\n",
" <td>Never-married</td>\n",
" <td>Adm-clerical</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>29170</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Self-emp-not-inc</td>\n",
" <td>Bachelors</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Exec-managerial</td>\n",
" <td>Husband</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>29170</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Private</td>\n",
" <td>HS-grad</td>\n",
" <td>Divorced</td>\n",
" <td>Handlers-cleaners</td>\n",
" <td>Not-in-family</td>\n",
" <td>White</td>\n",
" <td>Male</td>\n",
" <td>29170</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Private</td>\n",
" <td>11th</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Handlers-cleaners</td>\n",
" <td>Husband</td>\n",
" <td>Black</td>\n",
" <td>Male</td>\n",
" <td>29170</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Private</td>\n",
" <td>Bachelors</td>\n",
" <td>Married-civ-spouse</td>\n",
" <td>Prof-specialty</td>\n",
" <td>Wife</td>\n",
" <td>Black</td>\n",
" <td>Female</td>\n",
" <td>95</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Employment Degree Status Designation \\\n",
"0 State-gov Bachelors Never-married Adm-clerical \n",
"1 Self-emp-not-inc Bachelors Married-civ-spouse Exec-managerial \n",
"2 Private HS-grad Divorced Handlers-cleaners \n",
"3 Private 11th Married-civ-spouse Handlers-cleaners \n",
"4 Private Bachelors Married-civ-spouse Prof-specialty \n",
"\n",
" Family_job Race Sex Country \n",
"0 Not-in-family White Male 29170 \n",
"1 Husband White Male 29170 \n",
"2 Not-in-family White Male 29170 \n",
"3 Husband Black Male 29170 \n",
"4 Wife Black Female 95 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset['Country']=dataset['Country'].map(country_map)\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment