Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ashutoshsahu2015/17b24a43e6c3636ae2ca45e2e91198d1 to your computer and use it in GitHub Desktop.
Save ashutoshsahu2015/17b24a43e6c3636ae2ca45e2e91198d1 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X0</th>\n",
" <th>X1</th>\n",
" <th>X2</th>\n",
" <th>X3</th>\n",
" <th>X4</th>\n",
" <th>X5</th>\n",
" <th>X6</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>k</td>\n",
" <td>v</td>\n",
" <td>at</td>\n",
" <td>a</td>\n",
" <td>d</td>\n",
" <td>u</td>\n",
" <td>j</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>k</td>\n",
" <td>t</td>\n",
" <td>av</td>\n",
" <td>e</td>\n",
" <td>d</td>\n",
" <td>y</td>\n",
" <td>l</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>az</td>\n",
" <td>w</td>\n",
" <td>n</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td>x</td>\n",
" <td>j</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>az</td>\n",
" <td>t</td>\n",
" <td>n</td>\n",
" <td>f</td>\n",
" <td>d</td>\n",
" <td>x</td>\n",
" <td>l</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>az</td>\n",
" <td>v</td>\n",
" <td>n</td>\n",
" <td>f</td>\n",
" <td>d</td>\n",
" <td>h</td>\n",
" <td>d</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" X0 X1 X2 X3 X4 X5 X6\n",
"0 k v at a d u j\n",
"1 k t av e d y l\n",
"2 az w n c d x j\n",
"3 az t n f d x l\n",
"4 az v n f d h d"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset=pd.read_csv('mercendez.csv',usecols=['X0','X1','X2','X3','X4','X5','X6'])\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"47\n",
"27\n",
"44\n",
"7\n",
"4\n",
"29\n",
"12\n"
]
}
],
"source": [
"## Check for unique labels in each column\n",
"for col in dataset.columns:\n",
" print(len(dataset[col].unique()))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['aa', 's', 'b', 'l', 'v', 'r', 'i', 'a', 'c', 'o'], dtype='object')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list_top_10=dataset.X1.value_counts().sort_values(ascending=False).head(10).index\n",
"list_top_10"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X0</th>\n",
" <th>X1</th>\n",
" <th>X2</th>\n",
" <th>X3</th>\n",
" <th>X4</th>\n",
" <th>X5</th>\n",
" <th>X6</th>\n",
" <th>aa</th>\n",
" <th>s</th>\n",
" <th>b</th>\n",
" <th>l</th>\n",
" <th>v</th>\n",
" <th>r</th>\n",
" <th>i</th>\n",
" <th>a</th>\n",
" <th>c</th>\n",
" <th>o</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>k</td>\n",
" <td>v</td>\n",
" <td>at</td>\n",
" <td>a</td>\n",
" <td>d</td>\n",
" <td>u</td>\n",
" <td>j</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>k</td>\n",
" <td>t</td>\n",
" <td>av</td>\n",
" <td>e</td>\n",
" <td>d</td>\n",
" <td>y</td>\n",
" <td>l</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>az</td>\n",
" <td>w</td>\n",
" <td>n</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td>x</td>\n",
" <td>j</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>az</td>\n",
" <td>t</td>\n",
" <td>n</td>\n",
" <td>f</td>\n",
" <td>d</td>\n",
" <td>x</td>\n",
" <td>l</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>az</td>\n",
" <td>v</td>\n",
" <td>n</td>\n",
" <td>f</td>\n",
" <td>d</td>\n",
" <td>h</td>\n",
" <td>d</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" X0 X1 X2 X3 X4 X5 X6 aa s b l v r i a c o\n",
"0 k v at a d u j 0 0 0 0 1 0 0 0 0 0\n",
"1 k t av e d y l 0 0 0 0 0 0 0 0 0 0\n",
"2 az w n c d x j 0 0 0 0 0 0 0 0 0 0\n",
"3 az t n f d x l 0 0 0 0 0 0 0 0 0 0\n",
"4 az v n f d h d 0 0 0 0 1 0 0 0 0 0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"for category in list_top_10:\n",
" dataset[category]=np.where(dataset['X1']==category,1,0)\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>X0</th>\n",
" <th>X2</th>\n",
" <th>X3</th>\n",
" <th>X4</th>\n",
" <th>X5</th>\n",
" <th>X6</th>\n",
" <th>aa</th>\n",
" <th>s</th>\n",
" <th>b</th>\n",
" <th>l</th>\n",
" <th>v</th>\n",
" <th>r</th>\n",
" <th>i</th>\n",
" <th>a</th>\n",
" <th>c</th>\n",
" <th>o</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>k</td>\n",
" <td>at</td>\n",
" <td>a</td>\n",
" <td>d</td>\n",
" <td>u</td>\n",
" <td>j</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>k</td>\n",
" <td>av</td>\n",
" <td>e</td>\n",
" <td>d</td>\n",
" <td>y</td>\n",
" <td>l</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>az</td>\n",
" <td>n</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td>x</td>\n",
" <td>j</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>az</td>\n",
" <td>n</td>\n",
" <td>f</td>\n",
" <td>d</td>\n",
" <td>x</td>\n",
" <td>l</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>az</td>\n",
" <td>n</td>\n",
" <td>f</td>\n",
" <td>d</td>\n",
" <td>h</td>\n",
" <td>d</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" X0 X2 X3 X4 X5 X6 aa s b l v r i a c o\n",
"0 k at a d u j 0 0 0 0 1 0 0 0 0 0\n",
"1 k av e d y l 0 0 0 0 0 0 0 0 0 0\n",
"2 az n c d x j 0 0 0 0 0 0 0 0 0 0\n",
"3 az n f d x l 0 0 0 0 0 0 0 0 0 0\n",
"4 az n f d h d 0 0 0 0 1 0 0 0 0 0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset.drop('X1',axis=1,inplace=True)\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment