Created
February 16, 2017 14:55
-
-
Save mritchie712/7c854bbd58feee1a9c054c7270852903 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import random" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"i = 0\n", | |
"while i < 10:\n", | |
" df['col' + str(i)] = [random.randrange(1,100) for x in range(1,1000)]\n", | |
" i+=1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df['colAB'] = 'A'\n", | |
"df.loc[df.col0 > 75, 'colAB'] = 'B'\n", | |
"df.loc[df.col8 > 75, 'colAB'] = None" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df['caseWhen'] = 'X'\n", | |
"df.loc[(df.col1 > 75)&(df.col2 < 75) , 'caseWhen'] = df[(df.col1 > 75)&(df.col2<25)].colAB\n", | |
"df.loc[(df.col8 > 50)|(df.col9 > 50) , 'caseWhen'] = df.loc[(df.col8 > 50)|(df.col9>50)].colAB\n", | |
"df.loc[(df.col6 > 20)&(df.colAB == 'A') , 'caseWhen'] = 'C'\n", | |
"df.loc[~df.col4 > 70 , 'caseWhen'] = 'D'\n", | |
"df.loc[df.colAB.isnull(), 'caseWhen'] = 'E'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"for col in df.columns[:5]:\n", | |
" colMean = df[col].mean()\n", | |
" newCol = col + '_GT' + str(int(colMean))\n", | |
" df[newCol] = df[col] > colMean" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"C 445\n", | |
"E 248\n", | |
"B 126\n", | |
"X 89\n", | |
"A 76\n", | |
"Name: caseWhen, dtype: int64" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.caseWhen.value_counts()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>col0</th>\n", | |
" <th>col1</th>\n", | |
" <th>col2</th>\n", | |
" <th>col3</th>\n", | |
" <th>col4</th>\n", | |
" <th>col5</th>\n", | |
" <th>col6</th>\n", | |
" <th>col7</th>\n", | |
" <th>col8</th>\n", | |
" <th>col9</th>\n", | |
" <th>colAB</th>\n", | |
" <th>caseWhen</th>\n", | |
" <th>col0_GT49</th>\n", | |
" <th>col1_GT50</th>\n", | |
" <th>col2_GT50</th>\n", | |
" <th>col3_GT49</th>\n", | |
" <th>col4_GT50</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>83</td>\n", | |
" <td>38</td>\n", | |
" <td>47</td>\n", | |
" <td>55</td>\n", | |
" <td>44</td>\n", | |
" <td>14</td>\n", | |
" <td>6</td>\n", | |
" <td>76</td>\n", | |
" <td>66</td>\n", | |
" <td>77</td>\n", | |
" <td>B</td>\n", | |
" <td>B</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>42</td>\n", | |
" <td>36</td>\n", | |
" <td>39</td>\n", | |
" <td>33</td>\n", | |
" <td>98</td>\n", | |
" <td>58</td>\n", | |
" <td>14</td>\n", | |
" <td>29</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>A</td>\n", | |
" <td>X</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>51</td>\n", | |
" <td>76</td>\n", | |
" <td>1</td>\n", | |
" <td>93</td>\n", | |
" <td>66</td>\n", | |
" <td>20</td>\n", | |
" <td>75</td>\n", | |
" <td>29</td>\n", | |
" <td>16</td>\n", | |
" <td>11</td>\n", | |
" <td>A</td>\n", | |
" <td>C</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>99</td>\n", | |
" <td>50</td>\n", | |
" <td>45</td>\n", | |
" <td>8</td>\n", | |
" <td>82</td>\n", | |
" <td>87</td>\n", | |
" <td>1</td>\n", | |
" <td>98</td>\n", | |
" <td>47</td>\n", | |
" <td>1</td>\n", | |
" <td>B</td>\n", | |
" <td>X</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>90</td>\n", | |
" <td>16</td>\n", | |
" <td>92</td>\n", | |
" <td>93</td>\n", | |
" <td>28</td>\n", | |
" <td>55</td>\n", | |
" <td>39</td>\n", | |
" <td>67</td>\n", | |
" <td>27</td>\n", | |
" <td>77</td>\n", | |
" <td>B</td>\n", | |
" <td>B</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>53</td>\n", | |
" <td>42</td>\n", | |
" <td>89</td>\n", | |
" <td>56</td>\n", | |
" <td>91</td>\n", | |
" <td>31</td>\n", | |
" <td>55</td>\n", | |
" <td>40</td>\n", | |
" <td>90</td>\n", | |
" <td>50</td>\n", | |
" <td>None</td>\n", | |
" <td>E</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>81</td>\n", | |
" <td>94</td>\n", | |
" <td>95</td>\n", | |
" <td>36</td>\n", | |
" <td>27</td>\n", | |
" <td>84</td>\n", | |
" <td>12</td>\n", | |
" <td>57</td>\n", | |
" <td>21</td>\n", | |
" <td>59</td>\n", | |
" <td>B</td>\n", | |
" <td>B</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>53</td>\n", | |
" <td>85</td>\n", | |
" <td>28</td>\n", | |
" <td>28</td>\n", | |
" <td>1</td>\n", | |
" <td>42</td>\n", | |
" <td>74</td>\n", | |
" <td>16</td>\n", | |
" <td>91</td>\n", | |
" <td>82</td>\n", | |
" <td>None</td>\n", | |
" <td>E</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>19</td>\n", | |
" <td>27</td>\n", | |
" <td>45</td>\n", | |
" <td>86</td>\n", | |
" <td>79</td>\n", | |
" <td>65</td>\n", | |
" <td>92</td>\n", | |
" <td>28</td>\n", | |
" <td>96</td>\n", | |
" <td>26</td>\n", | |
" <td>None</td>\n", | |
" <td>E</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>90</td>\n", | |
" <td>25</td>\n", | |
" <td>45</td>\n", | |
" <td>17</td>\n", | |
" <td>34</td>\n", | |
" <td>67</td>\n", | |
" <td>7</td>\n", | |
" <td>3</td>\n", | |
" <td>69</td>\n", | |
" <td>83</td>\n", | |
" <td>B</td>\n", | |
" <td>B</td>\n", | |
" <td>True</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" col0 col1 col2 col3 col4 col5 col6 col7 col8 col9 colAB caseWhen \\\n", | |
"0 83 38 47 55 44 14 6 76 66 77 B B \n", | |
"1 42 36 39 33 98 58 14 29 3 2 A X \n", | |
"2 51 76 1 93 66 20 75 29 16 11 A C \n", | |
"3 99 50 45 8 82 87 1 98 47 1 B X \n", | |
"4 90 16 92 93 28 55 39 67 27 77 B B \n", | |
"5 53 42 89 56 91 31 55 40 90 50 None E \n", | |
"6 81 94 95 36 27 84 12 57 21 59 B B \n", | |
"7 53 85 28 28 1 42 74 16 91 82 None E \n", | |
"8 19 27 45 86 79 65 92 28 96 26 None E \n", | |
"9 90 25 45 17 34 67 7 3 69 83 B B \n", | |
"\n", | |
" col0_GT49 col1_GT50 col2_GT50 col3_GT49 col4_GT50 \n", | |
"0 True False False True False \n", | |
"1 False False False False True \n", | |
"2 True True False True True \n", | |
"3 True False False False True \n", | |
"4 True False True True False \n", | |
"5 True False True True True \n", | |
"6 True True True False False \n", | |
"7 True True False False False \n", | |
"8 False False False True True \n", | |
"9 True False False False False " | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head(10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment