Created
April 11, 2016 19:44
-
-
Save aegorenkov/6a78fd0074664318a32b13c7ed0bc781 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>ri</th>\n", | |
" <th>na</th>\n", | |
" <th>mg</th>\n", | |
" <th>al</th>\n", | |
" <th>si</th>\n", | |
" <th>k</th>\n", | |
" <th>ca</th>\n", | |
" <th>ba</th>\n", | |
" <th>fe</th>\n", | |
" <th>glass_type</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>id</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td>1.51966</td>\n", | |
" <td>14.77</td>\n", | |
" <td>3.75</td>\n", | |
" <td>0.29</td>\n", | |
" <td>72.02</td>\n", | |
" <td>0.03</td>\n", | |
" <td>9.00</td>\n", | |
" <td>0</td>\n", | |
" <td>0.00</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>185</th>\n", | |
" <td>1.51115</td>\n", | |
" <td>17.38</td>\n", | |
" <td>0.00</td>\n", | |
" <td>0.34</td>\n", | |
" <td>75.41</td>\n", | |
" <td>0.00</td>\n", | |
" <td>6.65</td>\n", | |
" <td>0</td>\n", | |
" <td>0.00</td>\n", | |
" <td>6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>40</th>\n", | |
" <td>1.52213</td>\n", | |
" <td>14.21</td>\n", | |
" <td>3.82</td>\n", | |
" <td>0.47</td>\n", | |
" <td>71.77</td>\n", | |
" <td>0.11</td>\n", | |
" <td>9.57</td>\n", | |
" <td>0</td>\n", | |
" <td>0.00</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>39</th>\n", | |
" <td>1.52213</td>\n", | |
" <td>14.21</td>\n", | |
" <td>3.82</td>\n", | |
" <td>0.47</td>\n", | |
" <td>71.77</td>\n", | |
" <td>0.11</td>\n", | |
" <td>9.57</td>\n", | |
" <td>0</td>\n", | |
" <td>0.00</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>51</th>\n", | |
" <td>1.52320</td>\n", | |
" <td>13.72</td>\n", | |
" <td>3.72</td>\n", | |
" <td>0.51</td>\n", | |
" <td>71.75</td>\n", | |
" <td>0.09</td>\n", | |
" <td>10.06</td>\n", | |
" <td>0</td>\n", | |
" <td>0.16</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" ri na mg al si k ca ba fe glass_type\n", | |
"id \n", | |
"22 1.51966 14.77 3.75 0.29 72.02 0.03 9.00 0 0.00 1\n", | |
"185 1.51115 17.38 0.00 0.34 75.41 0.00 6.65 0 0.00 6\n", | |
"40 1.52213 14.21 3.82 0.47 71.77 0.11 9.57 0 0.00 1\n", | |
"39 1.52213 14.21 3.82 0.47 71.77 0.11 9.57 0 0.00 1\n", | |
"51 1.52320 13.72 3.72 0.51 71.75 0.09 10.06 0 0.16 1" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"from sklearn.linear_model import LogisticRegression\n", | |
"\n", | |
"url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data'\n", | |
"col_names = ['id','ri','na','mg','al','si','k','ca','ba','fe','glass_type']\n", | |
"glass = pd.read_csv(url, names=col_names, index_col='id')\n", | |
"glass.sort_values('al', inplace=True)\n", | |
"glass.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"glass['household'] = glass.glass_type.map({1:0, 2:0, 3:0, 5:1, 6:1, 7:1})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"glass_normal = glass" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"glass_doubled = pd.concat([glass_normal, glass[glass.household == 1]], axis=0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 163\n", | |
"1 51\n", | |
"Name: household, dtype: int64" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"glass_normal.household.value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0 163\n", | |
"1 102\n", | |
"Name: household, dtype: int64" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"glass_doubled.household.value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 4.18040386]])" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#Run normal regression on normal data\n", | |
"\n", | |
"logreg = LogisticRegression(C=1e9)\n", | |
"feature_cols = ['al']\n", | |
"X = glass_normal[feature_cols]\n", | |
"y = glass_normal.household\n", | |
"logreg.fit(X, y)\n", | |
"logreg.coef_" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 3.85191349]])" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#Run weighted regression on normal data\n", | |
"\n", | |
"logreg = LogisticRegression(C=1e9, class_weight={0:1, 1:2})\n", | |
"feature_cols = ['al']\n", | |
"X = glass_normal[feature_cols]\n", | |
"y = glass_normal.household\n", | |
"logreg.fit(X, y)\n", | |
"logreg.coef_" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 3.85191349]])" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#Run normal regression on oversampled data\n", | |
"\n", | |
"logreg = LogisticRegression(C=1e9)\n", | |
"feature_cols = ['al']\n", | |
"X = glass_doubled[feature_cols]\n", | |
"y = glass_doubled.household\n", | |
"logreg.fit(X, y)\n", | |
"logreg.coef_" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment