Created
March 20, 2021 07:48
-
-
Save ashutoshsahu2015/17b24a43e6c3636ae2ca45e2e91198d1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>X0</th>\n", | |
" <th>X1</th>\n", | |
" <th>X2</th>\n", | |
" <th>X3</th>\n", | |
" <th>X4</th>\n", | |
" <th>X5</th>\n", | |
" <th>X6</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>k</td>\n", | |
" <td>v</td>\n", | |
" <td>at</td>\n", | |
" <td>a</td>\n", | |
" <td>d</td>\n", | |
" <td>u</td>\n", | |
" <td>j</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>k</td>\n", | |
" <td>t</td>\n", | |
" <td>av</td>\n", | |
" <td>e</td>\n", | |
" <td>d</td>\n", | |
" <td>y</td>\n", | |
" <td>l</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>az</td>\n", | |
" <td>w</td>\n", | |
" <td>n</td>\n", | |
" <td>c</td>\n", | |
" <td>d</td>\n", | |
" <td>x</td>\n", | |
" <td>j</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>az</td>\n", | |
" <td>t</td>\n", | |
" <td>n</td>\n", | |
" <td>f</td>\n", | |
" <td>d</td>\n", | |
" <td>x</td>\n", | |
" <td>l</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>az</td>\n", | |
" <td>v</td>\n", | |
" <td>n</td>\n", | |
" <td>f</td>\n", | |
" <td>d</td>\n", | |
" <td>h</td>\n", | |
" <td>d</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" X0 X1 X2 X3 X4 X5 X6\n", | |
"0 k v at a d u j\n", | |
"1 k t av e d y l\n", | |
"2 az w n c d x j\n", | |
"3 az t n f d x l\n", | |
"4 az v n f d h d" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dataset=pd.read_csv('mercendez.csv',usecols=['X0','X1','X2','X3','X4','X5','X6'])\n", | |
"dataset.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"47\n", | |
"27\n", | |
"44\n", | |
"7\n", | |
"4\n", | |
"29\n", | |
"12\n" | |
] | |
} | |
], | |
"source": [ | |
"## Check for unique labels in each column\n", | |
"for col in dataset.columns:\n", | |
" print(len(dataset[col].unique()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index(['aa', 's', 'b', 'l', 'v', 'r', 'i', 'a', 'c', 'o'], dtype='object')" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"list_top_10=dataset.X1.value_counts().sort_values(ascending=False).head(10).index\n", | |
"list_top_10" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>X0</th>\n", | |
" <th>X1</th>\n", | |
" <th>X2</th>\n", | |
" <th>X3</th>\n", | |
" <th>X4</th>\n", | |
" <th>X5</th>\n", | |
" <th>X6</th>\n", | |
" <th>aa</th>\n", | |
" <th>s</th>\n", | |
" <th>b</th>\n", | |
" <th>l</th>\n", | |
" <th>v</th>\n", | |
" <th>r</th>\n", | |
" <th>i</th>\n", | |
" <th>a</th>\n", | |
" <th>c</th>\n", | |
" <th>o</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>k</td>\n", | |
" <td>v</td>\n", | |
" <td>at</td>\n", | |
" <td>a</td>\n", | |
" <td>d</td>\n", | |
" <td>u</td>\n", | |
" <td>j</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>k</td>\n", | |
" <td>t</td>\n", | |
" <td>av</td>\n", | |
" <td>e</td>\n", | |
" <td>d</td>\n", | |
" <td>y</td>\n", | |
" <td>l</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>az</td>\n", | |
" <td>w</td>\n", | |
" <td>n</td>\n", | |
" <td>c</td>\n", | |
" <td>d</td>\n", | |
" <td>x</td>\n", | |
" <td>j</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>az</td>\n", | |
" <td>t</td>\n", | |
" <td>n</td>\n", | |
" <td>f</td>\n", | |
" <td>d</td>\n", | |
" <td>x</td>\n", | |
" <td>l</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>az</td>\n", | |
" <td>v</td>\n", | |
" <td>n</td>\n", | |
" <td>f</td>\n", | |
" <td>d</td>\n", | |
" <td>h</td>\n", | |
" <td>d</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" X0 X1 X2 X3 X4 X5 X6 aa s b l v r i a c o\n", | |
"0 k v at a d u j 0 0 0 0 1 0 0 0 0 0\n", | |
"1 k t av e d y l 0 0 0 0 0 0 0 0 0 0\n", | |
"2 az w n c d x j 0 0 0 0 0 0 0 0 0 0\n", | |
"3 az t n f d x l 0 0 0 0 0 0 0 0 0 0\n", | |
"4 az v n f d h d 0 0 0 0 1 0 0 0 0 0" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"for category in list_top_10:\n", | |
" dataset[category]=np.where(dataset['X1']==category,1,0)\n", | |
"dataset.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>X0</th>\n", | |
" <th>X2</th>\n", | |
" <th>X3</th>\n", | |
" <th>X4</th>\n", | |
" <th>X5</th>\n", | |
" <th>X6</th>\n", | |
" <th>aa</th>\n", | |
" <th>s</th>\n", | |
" <th>b</th>\n", | |
" <th>l</th>\n", | |
" <th>v</th>\n", | |
" <th>r</th>\n", | |
" <th>i</th>\n", | |
" <th>a</th>\n", | |
" <th>c</th>\n", | |
" <th>o</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>k</td>\n", | |
" <td>at</td>\n", | |
" <td>a</td>\n", | |
" <td>d</td>\n", | |
" <td>u</td>\n", | |
" <td>j</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>k</td>\n", | |
" <td>av</td>\n", | |
" <td>e</td>\n", | |
" <td>d</td>\n", | |
" <td>y</td>\n", | |
" <td>l</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>az</td>\n", | |
" <td>n</td>\n", | |
" <td>c</td>\n", | |
" <td>d</td>\n", | |
" <td>x</td>\n", | |
" <td>j</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>az</td>\n", | |
" <td>n</td>\n", | |
" <td>f</td>\n", | |
" <td>d</td>\n", | |
" <td>x</td>\n", | |
" <td>l</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>az</td>\n", | |
" <td>n</td>\n", | |
" <td>f</td>\n", | |
" <td>d</td>\n", | |
" <td>h</td>\n", | |
" <td>d</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" X0 X2 X3 X4 X5 X6 aa s b l v r i a c o\n", | |
"0 k at a d u j 0 0 0 0 1 0 0 0 0 0\n", | |
"1 k av e d y l 0 0 0 0 0 0 0 0 0 0\n", | |
"2 az n c d x j 0 0 0 0 0 0 0 0 0 0\n", | |
"3 az n f d x l 0 0 0 0 0 0 0 0 0 0\n", | |
"4 az n f d h d 0 0 0 0 1 0 0 0 0 0" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dataset.drop('X1',axis=1,inplace=True)\n", | |
"dataset.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment