Created
March 20, 2021 08:07
-
-
Save ashutoshsahu2015/e706aa5ab88194443ae12c0b74054057 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Survived</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Cabin</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>A</th>\n", | |
" <td>0.466667</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>B</th>\n", | |
" <td>0.744681</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>C</th>\n", | |
" <td>0.593220</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>D</th>\n", | |
" <td>0.757576</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>E</th>\n", | |
" <td>0.750000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>F</th>\n", | |
" <td>0.615385</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>G</th>\n", | |
" <td>0.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>M</th>\n", | |
" <td>0.299854</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>T</th>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Survived\n", | |
"Cabin \n", | |
"A 0.466667\n", | |
"B 0.744681\n", | |
"C 0.593220\n", | |
"D 0.757576\n", | |
"E 0.750000\n", | |
"F 0.615385\n", | |
"G 0.500000\n", | |
"M 0.299854\n", | |
"T 0.000000" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"prob=dataset.groupby(['Cabin'])['Survived'].mean()\n", | |
"prob_df=pd.DataFrame(prob)\n", | |
"prob_df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Survived</th>\n", | |
" <th>Died</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Cabin</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>A</th>\n", | |
" <td>0.466667</td>\n", | |
" <td>0.533333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>B</th>\n", | |
" <td>0.744681</td>\n", | |
" <td>0.255319</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>C</th>\n", | |
" <td>0.593220</td>\n", | |
" <td>0.406780</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>D</th>\n", | |
" <td>0.757576</td>\n", | |
" <td>0.242424</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>E</th>\n", | |
" <td>0.750000</td>\n", | |
" <td>0.250000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>F</th>\n", | |
" <td>0.615385</td>\n", | |
" <td>0.384615</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>G</th>\n", | |
" <td>0.500000</td>\n", | |
" <td>0.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>M</th>\n", | |
" <td>0.299854</td>\n", | |
" <td>0.700146</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>T</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Survived Died\n", | |
"Cabin \n", | |
"A 0.466667 0.533333\n", | |
"B 0.744681 0.255319\n", | |
"C 0.593220 0.406780\n", | |
"D 0.757576 0.242424\n", | |
"E 0.750000 0.250000\n", | |
"F 0.615385 0.384615\n", | |
"G 0.500000 0.500000\n", | |
"M 0.299854 0.700146\n", | |
"T 0.000000 1.000000" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"prob_df['Died']=1-prob_df['Survived']\n", | |
"prob_df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Survived</th>\n", | |
" <th>Died</th>\n", | |
" <th>Probability Ratio</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Cabin</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>A</th>\n", | |
" <td>0.466667</td>\n", | |
" <td>0.533333</td>\n", | |
" <td>0.875000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>B</th>\n", | |
" <td>0.744681</td>\n", | |
" <td>0.255319</td>\n", | |
" <td>2.916667</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>C</th>\n", | |
" <td>0.593220</td>\n", | |
" <td>0.406780</td>\n", | |
" <td>1.458333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>D</th>\n", | |
" <td>0.757576</td>\n", | |
" <td>0.242424</td>\n", | |
" <td>3.125000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>E</th>\n", | |
" <td>0.750000</td>\n", | |
" <td>0.250000</td>\n", | |
" <td>3.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>F</th>\n", | |
" <td>0.615385</td>\n", | |
" <td>0.384615</td>\n", | |
" <td>1.600000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>G</th>\n", | |
" <td>0.500000</td>\n", | |
" <td>0.500000</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>M</th>\n", | |
" <td>0.299854</td>\n", | |
" <td>0.700146</td>\n", | |
" <td>0.428274</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>T</th>\n", | |
" <td>0.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Survived Died Probability Ratio\n", | |
"Cabin \n", | |
"A 0.466667 0.533333 0.875000\n", | |
"B 0.744681 0.255319 2.916667\n", | |
"C 0.593220 0.406780 1.458333\n", | |
"D 0.757576 0.242424 3.125000\n", | |
"E 0.750000 0.250000 3.000000\n", | |
"F 0.615385 0.384615 1.600000\n", | |
"G 0.500000 0.500000 1.000000\n", | |
"M 0.299854 0.700146 0.428274\n", | |
"T 0.000000 1.000000 0.000000" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"prob_df['Probability Ratio']=prob_df['Survived']/prob_df['Died']\n", | |
"prob_df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'A': 0.875,\n", | |
" 'B': 2.916666666666666,\n", | |
" 'C': 1.4583333333333333,\n", | |
" 'D': 3.125,\n", | |
" 'E': 3.0,\n", | |
" 'F': 1.6000000000000003,\n", | |
" 'G': 1.0,\n", | |
" 'M': 0.42827442827442824,\n", | |
" 'T': 0.0}" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"prob_encod_dictionary=prob_df['Probability Ratio'].to_dict()\n", | |
"prob_encod_dictionary" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Survived</th>\n", | |
" <th>Cabin</th>\n", | |
" <th>Cabin_probabilty_ratio</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>M</td>\n", | |
" <td>0.428274</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>C</td>\n", | |
" <td>1.458333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>M</td>\n", | |
" <td>0.428274</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>C</td>\n", | |
" <td>1.458333</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>M</td>\n", | |
" <td>0.428274</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Survived Cabin Cabin_probabilty_ratio\n", | |
"0 0 M 0.428274\n", | |
"1 1 C 1.458333\n", | |
"2 1 M 0.428274\n", | |
"3 1 C 1.458333\n", | |
"4 0 M 0.428274" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dataset['Cabin_probabilty_ratio']=dataset['Cabin'].map(prob_encod_dictionary)\n", | |
"dataset.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment