Skip to content

Instantly share code, notes, and snippets.

@ashutoshsahu2015
Created March 20, 2021 08:07
Show Gist options
  • Save ashutoshsahu2015/e706aa5ab88194443ae12c0b74054057 to your computer and use it in GitHub Desktop.
Save ashutoshsahu2015/e706aa5ab88194443ae12c0b74054057 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Cabin</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>0.466667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.744681</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>0.593220</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.757576</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>0.615385</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>0.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>M</th>\n",
" <td>0.299854</td>\n",
" </tr>\n",
" <tr>\n",
" <th>T</th>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived\n",
"Cabin \n",
"A 0.466667\n",
"B 0.744681\n",
"C 0.593220\n",
"D 0.757576\n",
"E 0.750000\n",
"F 0.615385\n",
"G 0.500000\n",
"M 0.299854\n",
"T 0.000000"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prob=dataset.groupby(['Cabin'])['Survived'].mean()\n",
"prob_df=pd.DataFrame(prob)\n",
"prob_df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Died</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Cabin</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>0.466667</td>\n",
" <td>0.533333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.744681</td>\n",
" <td>0.255319</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>0.593220</td>\n",
" <td>0.406780</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.757576</td>\n",
" <td>0.242424</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.750000</td>\n",
" <td>0.250000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>0.615385</td>\n",
" <td>0.384615</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>0.500000</td>\n",
" <td>0.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>M</th>\n",
" <td>0.299854</td>\n",
" <td>0.700146</td>\n",
" </tr>\n",
" <tr>\n",
" <th>T</th>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Died\n",
"Cabin \n",
"A 0.466667 0.533333\n",
"B 0.744681 0.255319\n",
"C 0.593220 0.406780\n",
"D 0.757576 0.242424\n",
"E 0.750000 0.250000\n",
"F 0.615385 0.384615\n",
"G 0.500000 0.500000\n",
"M 0.299854 0.700146\n",
"T 0.000000 1.000000"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prob_df['Died']=1-prob_df['Survived']\n",
"prob_df"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Died</th>\n",
" <th>Probability Ratio</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Cabin</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>0.466667</td>\n",
" <td>0.533333</td>\n",
" <td>0.875000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>0.744681</td>\n",
" <td>0.255319</td>\n",
" <td>2.916667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>0.593220</td>\n",
" <td>0.406780</td>\n",
" <td>1.458333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>0.757576</td>\n",
" <td>0.242424</td>\n",
" <td>3.125000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>0.750000</td>\n",
" <td>0.250000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>0.615385</td>\n",
" <td>0.384615</td>\n",
" <td>1.600000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>0.500000</td>\n",
" <td>0.500000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>M</th>\n",
" <td>0.299854</td>\n",
" <td>0.700146</td>\n",
" <td>0.428274</td>\n",
" </tr>\n",
" <tr>\n",
" <th>T</th>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Died Probability Ratio\n",
"Cabin \n",
"A 0.466667 0.533333 0.875000\n",
"B 0.744681 0.255319 2.916667\n",
"C 0.593220 0.406780 1.458333\n",
"D 0.757576 0.242424 3.125000\n",
"E 0.750000 0.250000 3.000000\n",
"F 0.615385 0.384615 1.600000\n",
"G 0.500000 0.500000 1.000000\n",
"M 0.299854 0.700146 0.428274\n",
"T 0.000000 1.000000 0.000000"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prob_df['Probability Ratio']=prob_df['Survived']/prob_df['Died']\n",
"prob_df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'A': 0.875,\n",
" 'B': 2.916666666666666,\n",
" 'C': 1.4583333333333333,\n",
" 'D': 3.125,\n",
" 'E': 3.0,\n",
" 'F': 1.6000000000000003,\n",
" 'G': 1.0,\n",
" 'M': 0.42827442827442824,\n",
" 'T': 0.0}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prob_encod_dictionary=prob_df['Probability Ratio'].to_dict()\n",
"prob_encod_dictionary"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Cabin</th>\n",
" <th>Cabin_probabilty_ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>M</td>\n",
" <td>0.428274</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>C</td>\n",
" <td>1.458333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>M</td>\n",
" <td>0.428274</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>C</td>\n",
" <td>1.458333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>M</td>\n",
" <td>0.428274</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Cabin Cabin_probabilty_ratio\n",
"0 0 M 0.428274\n",
"1 1 C 1.458333\n",
"2 1 M 0.428274\n",
"3 1 C 1.458333\n",
"4 0 M 0.428274"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset['Cabin_probabilty_ratio']=dataset['Cabin'].map(prob_encod_dictionary)\n",
"dataset.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment