zwkcoding · April 19, 2019 07:46 · Dec 6, 2018 · Dec 6, 2018 · Dec 6, 2018 · Nov 7, 2018
diff --git a/useful_pandas_snippets.md b/useful_pandas_snippets.md
@@ -56,15 +56,19 @@ df = df.loc[df.column == 'somevalue']
 Grab DataFrame rows where column value is present in a list
 
 ```
-valuelist = ['value1', 'value2', 'value3']
-df = df[df.column.isin(valuelist)]
+test_data = {'hi': 'yo', 'bye': 'later'}
+df = pd.DataFrame(list(d.items()), columns=['col1', 'col2'])
+valuelist = ['yo', 'heya']
+df[df.col2.isin(valuelist)]
 ```
 
 Grab DataFrame rows where column value is not present in a list
 
 ```
-valuelist = ['value1', 'value2', 'value3']
-df = df[~df.column.isin(value_list)]
+test_data = {'hi': 'yo', 'bye': 'later'}
+df = pd.DataFrame(list(d.items()), columns=['col1', 'col2'])
+valuelist = ['yo', 'heya']
+df[~df.col2.isin(valuelist)]
 ```
 
 Select from DataFrame using criteria from multiple columns  

diff --git a/useful_pandas_snippets.md b/useful_pandas_snippets.md
@@ -1,19 +1,9 @@
 # Useful Pandas Snippets
 
-A personal diary of DataFrame munging from over the years.
+A personal diary of DataFrame munging over the years.
 
-List unique values in a DataFrame column  
-(h/t [@makmanalp](https://github.com/makmanalp) for the updated syntax!)
-
-```
-df['Column Name'].unique()
-```
 
-For each unique value in a DataFrame column, get a frequency count
-
-```
-df['Column Name'].value_counts()
-```
+## Data Types and Conversion
 
 Convert Series datatype to numeric (will error if column has non-numeric values)  
 (h/t [@makmanalp](https://github.com/makmanalp))
@@ -29,62 +19,60 @@ Convert Series datatype to numeric, changing  non-numeric values to NaN
 pd.to_numeric(df['Column Name'], errors='coerce')
 ```
 
-Grab DataFrame rows where column = a specific value
+Change data type of DataFrame column
 
 ```
-df = df.loc[df.column == 'somevalue']
+df.column_name = df.column_name.astype(np.int64)
 ```
 
-Grab DataFrame rows where column value is present in a list
+## Exploring and Finding Data
+
+Get a report of all duplicate records in a DataFrame, based on specific columns
 
 ```
-valuelist = ['value1', 'value2', 'value3']
-df = df[df.column.isin(valuelist)]
+dupes = df[df.duplicated(
+    ['col1', 'col2', 'col3'], keep=False)]
 ```
 
-Grab DataFrame rows where column value is not present in a list
+List unique values in a DataFrame column  
+(h/t [@makmanalp](https://github.com/makmanalp) for the updated syntax!)
 
 ```
-valuelist = ['value1', 'value2', 'value3']
-df = df[~df.column.isin(value_list)]
+df['Column Name'].unique()
 ```
 
-Delete column from DataFrame
+For each unique value in a DataFrame column, get a frequency count
 
 ```
-del df['column']
+df['Column Name'].value_counts()
 ```
 
-Select from DataFrame using criteria from multiple columns  
-(use `|` instead of `&` to do an OR)
+Grab DataFrame rows where column = a specific value
 
 ```
-newdf = df[(df['column_one']>2004) & (df['column_two']==9)]
+df = df.loc[df.column == 'somevalue']
 ```
 
-Rename several DataFrame columns
+Grab DataFrame rows where column value is present in a list
 
 ```
-df = df.rename(columns = {
-    'col1 old name':'col1 new name',
-    'col2 old name':'col2 new name',
-    'col3 old name':'col3 new name',
-})
+valuelist = ['value1', 'value2', 'value3']
+df = df[df.column.isin(valuelist)]
 ```
 
-Lower-case all DataFrame column names
+Grab DataFrame rows where column value is not present in a list
 
 ```
-df.columns = map(str.lower, df.columns)
+valuelist = ['value1', 'value2', 'value3']
+df = df[~df.column.isin(value_list)]
 ```
 
-Even more fancy DataFrame column re-naming  
-lower-case all DataFrame column names (for example)
+Select from DataFrame using criteria from multiple columns  
+(use `|` instead of `&` to do an OR)
 
 ```
-df.rename(columns=lambda x: x.split('.')[-1], inplace=True)
+newdf = df[(df['column_one']>2004) & (df['column_two']==9)]
 ```
-
 Loop through rows in a DataFrame  
 (if you must)
 
@@ -101,19 +89,39 @@ for row in df.itertuples():
     print(row)
 ```
 
-Next few examples show how to work with text data in Pandas.  
-Full list of .str functions: http://pandas.pydata.org/pandas-docs/stable/text.html
+Get top n for each group of columns in a sorted DataFrame  
+(make sure DataFrame is sorted first)
+
+```
+top5 = df.groupby(
+    ['groupingcol1',
+    'groupingcol2']).head(5)
+```
+
+Grab DataFrame rows where specific column is null/notnull
+
+```
+newdf = df[df['column'].isnull()]
+```
+
+Select from DataFrame using multiple keys of a hierarchical index
+
+```
+df.xs(
+    ('index level 1 value','index level 2 value'),
+    level=('level 1','level 2'))
+```
 
 Slice values in a DataFrame column (aka Series)
 
 ```
 df.column.str[0:2]
 ```
 
-Lower-case everything in a DataFrame column
+Get quick count of rows in a DataFrame
 
 ```
-df.column_name = df.column_name.str.lower()
+len(df.index)
 ```
 
 Get length of data in a DataFrame column
@@ -122,69 +130,61 @@ Get length of data in a DataFrame column
 df.column_name.str.len()
 ```
 
-Sort DataFrame by multiple columns
+## Updating and Cleaning Data
 
-```
-df = df.sort_values(
-    ['col1','col2','col3'],ascending=[1,1,0])
-```
-
-Get top n for each group of columns in a sorted DataFrame  
-(make sure DataFrame is sorted first)
+Delete column from DataFrame
 
 ```
-top5 = df.groupby(
-    ['groupingcol1',
-    'groupingcol2']).head(5)
+del df['column']
 ```
 
-Grab DataFrame rows where specific column is null/notnull
+Rename several DataFrame columns
 
 ```
-newdf = df[df['column'].isnull()]
+df = df.rename(columns = {
+    'col1 old name':'col1 new name',
+    'col2 old name':'col2 new name',
+    'col3 old name':'col3 new name',
+})
 ```
 
-Select from DataFrame using multiple keys of a hierarchical index
+Lower-case all DataFrame column names
 
 ```
-df.xs(
-    ('index level 1 value','index level 2 value'),
-    level=('level 1','level 2'))
+df.columns = map(str.lower, df.columns)
 ```
 
-Change all NaNs to None (useful before loading to a db)
+Even more fancy DataFrame column re-naming  
+lower-case all DataFrame column names (for example)
 
 ```
-df = df.where((pd.notnull(df)), None)
+df.rename(columns=lambda x: x.split('.')[-1], inplace=True)
 ```
 
-More pre-db insert cleanup...make a pass through the DataFrame, stripping whitespace from strings and changing any empty values to `None`  
-(not especially recommended but including here b/c I had to do this in real life one time)
+Lower-case everything in a DataFrame column
 
 ```
-df = df.applymap(lambda x: str(x).strip() if len(str(x).strip()) else None)
+df.column_name = df.column_name.str.lower()
 ```
 
-Get quick count of rows in a DataFrame
+Sort DataFrame by multiple columns
 
 ```
-len(df.index)
+df = df.sort_values(
+    ['col1','col2','col3'],ascending=[1,1,0])
 ```
 
-Pivot data (with flexibility about what what becomes a column and what stays a row).  
-Syntax works on Pandas >= .14
+Change all NaNs to None (useful before loading to a db)
 
 ```
-pd.pivot_table(
-  df,values='cell_value',
-  index=['col1', 'col2', 'col3'], #these stay as columns; will fail silently if any of these cols have null values
-  columns=['col4']) #data values in this column become their own column
- ```
+df = df.where((pd.notnull(df)), None)
+```
 
-Change data type of DataFrame column
+More pre-db insert cleanup...make a pass through the DataFrame, stripping whitespace from strings and changing any empty values to `None`  
+(not especially recommended but including here b/c I had to do this in real life once)
 
 ```
-df.column_name = df.column_name.astype(np.int64)
+df = df.applymap(lambda x: str(x).strip() if len(str(x).strip()) else None)
 ```
 
 Get rid of non-numeric values throughout a DataFrame:
@@ -214,21 +214,11 @@ df = df.fillna({
     'col6': '99'
 })
 ```
-
-Concatenate two DataFrame columns into a new, single column  
-(useful when dealing with composite keys, for example)  
-(h/t [@makmanalp](https://github.com/makmanalp) for improving this one!)
-
-```
-df['newcol'] = df['col1'].astype(str) + df['col2'].astype(str)
-```
-
-Doing calculations with DataFrame columns that have missing values  
-In example below, swap in 0 for df['col1'] cells that contain null.
+Doing calculations with DataFrame columns that have missing values. In example below, swap in 0 for df['col1'] cells that contain null.
 
 ```
 df['new_col'] = np.where(
-    pd.isnull(df['col1']),0,df['col1']) + df['col2']
+    pd.isnull(df['col1']), 0, df['col1']) + df['col2']
 ```
 
 Split delimited values in a DataFrame column into two new columns  
@@ -245,27 +235,26 @@ Collapse hierarchical column indexes
 df.columns = df.columns.get_level_values(0)
 ```
 
-Convert Django queryset to DataFrame
+## Reshaping, Concatenating, and Merging Data
 
-```
-qs = DjangoModelName.objects.all()
-q = qs.values()
-df = pd.DataFrame.from_records(q)
-```
-
-Create a DataFrame from a Python dictionary
+Pivot data (with flexibility about what what becomes a column and what stays a row).  
 
 ```
-df = pd.DataFrame(list(a_dictionary.items()), columns = ['column1', 'column2'])
-```
+pd.pivot_table(
+  df,values='cell_value',
+  index=['col1', 'col2', 'col3'], #these stay as columns; will fail silently if any of these cols have null values
+  columns=['col4']) #data values in this column become their own column
+ ```
 
-Get a report of all duplicate records in a DataFrame, based on specific columns
+Concatenate two DataFrame columns into a new, single column  
+(useful when dealing with composite keys, for example)  
+(h/t [@makmanalp](https://github.com/makmanalp) for improving this one!)
 
 ```
-dupes = df[df.duplicated(
-    ['col1', 'col2', 'col3'], keep=False)]
+df['newcol'] = df['col1'].astype(str) + df['col2'].astype(str)
 ```
 
+## Display and formatting
 Set up formatting so larger numbers aren't displayed in scientific notation  
 (h/t [@thecapacity](https://github.com/thecapacity))
 
@@ -278,3 +267,19 @@ To display with commas and no decimals
 ```
 pd.options.display.float_format = '{:,.0f}'.format
 ```
+
+## Creating DataFrames
+
+Create a DataFrame from a Python dictionary
+
+```
+df = pd.DataFrame(list(a_dictionary.items()), columns = ['column1', 'column2'])
+```
+
+Convert Django queryset to DataFrame
+
+```
+qs = DjangoModelName.objects.all()
+q = qs.values()
+df = pd.DataFrame.from_records(q)
+```
diff --git a/useful_pandas_snippets.md b/useful_pandas_snippets.md
@@ -0,0 +1,280 @@
+# Useful Pandas Snippets
+
+A personal diary of DataFrame munging from over the years.
+
+List unique values in a DataFrame column  
+(h/t [@makmanalp](https://github.com/makmanalp) for the updated syntax!)
+
+```
+df['Column Name'].unique()
+```
+
+For each unique value in a DataFrame column, get a frequency count
+
+```
+df['Column Name'].value_counts()
+```
+
+Convert Series datatype to numeric (will error if column has non-numeric values)  
+(h/t [@makmanalp](https://github.com/makmanalp))
+
+```
+pd.to_numeric(df['Column Name'])
+```
+
+Convert Series datatype to numeric, changing  non-numeric values to NaN  
+(h/t [@makmanalp](https://github.com/makmanalp) for the updated syntax!)
+
+```
+pd.to_numeric(df['Column Name'], errors='coerce')
+```
+
+Grab DataFrame rows where column = a specific value
+
+```
+df = df.loc[df.column == 'somevalue']
+```
+
+Grab DataFrame rows where column value is present in a list
+
+```
+valuelist = ['value1', 'value2', 'value3']
+df = df[df.column.isin(valuelist)]
+```
+
+Grab DataFrame rows where column value is not present in a list
+
+```
+valuelist = ['value1', 'value2', 'value3']
+df = df[~df.column.isin(value_list)]
+```
+
+Delete column from DataFrame
+
+```
+del df['column']
+```
+
+Select from DataFrame using criteria from multiple columns  
+(use `|` instead of `&` to do an OR)
+
+```
+newdf = df[(df['column_one']>2004) & (df['column_two']==9)]
+```
+
+Rename several DataFrame columns
+
+```
+df = df.rename(columns = {
+    'col1 old name':'col1 new name',
+    'col2 old name':'col2 new name',
+    'col3 old name':'col3 new name',
+})
+```
+
+Lower-case all DataFrame column names
+
+```
+df.columns = map(str.lower, df.columns)
+```
+
+Even more fancy DataFrame column re-naming  
+lower-case all DataFrame column names (for example)
+
+```
+df.rename(columns=lambda x: x.split('.')[-1], inplace=True)
+```
+
+Loop through rows in a DataFrame  
+(if you must)
+
+```
+for index, row in df.iterrows():
+    print (index, row['some column'])
+```
+
+Much faster way to loop through DataFrame rows if you can work with tuples  
+(h/t [hughamacmullaniv](https://github.com/hughamacmullaniv))
+
+```
+for row in df.itertuples():
+    print(row)
+```
+
+Next few examples show how to work with text data in Pandas.  
+Full list of .str functions: http://pandas.pydata.org/pandas-docs/stable/text.html
+
+Slice values in a DataFrame column (aka Series)
+
+```
+df.column.str[0:2]
+```
+
+Lower-case everything in a DataFrame column
+
+```
+df.column_name = df.column_name.str.lower()
+```
+
+Get length of data in a DataFrame column
+
+```
+df.column_name.str.len()
+```
+
+Sort DataFrame by multiple columns
+
+```
+df = df.sort_values(
+    ['col1','col2','col3'],ascending=[1,1,0])
+```
+
+Get top n for each group of columns in a sorted DataFrame  
+(make sure DataFrame is sorted first)
+
+```
+top5 = df.groupby(
+    ['groupingcol1',
+    'groupingcol2']).head(5)
+```
+
+Grab DataFrame rows where specific column is null/notnull
+
+```
+newdf = df[df['column'].isnull()]
+```
+
+Select from DataFrame using multiple keys of a hierarchical index
+
+```
+df.xs(
+    ('index level 1 value','index level 2 value'),
+    level=('level 1','level 2'))
+```
+
+Change all NaNs to None (useful before loading to a db)
+
+```
+df = df.where((pd.notnull(df)), None)
+```
+
+More pre-db insert cleanup...make a pass through the DataFrame, stripping whitespace from strings and changing any empty values to `None`  
+(not especially recommended but including here b/c I had to do this in real life one time)
+
+```
+df = df.applymap(lambda x: str(x).strip() if len(str(x).strip()) else None)
+```
+
+Get quick count of rows in a DataFrame
+
+```
+len(df.index)
+```
+
+Pivot data (with flexibility about what what becomes a column and what stays a row).  
+Syntax works on Pandas >= .14
+
+```
+pd.pivot_table(
+  df,values='cell_value',
+  index=['col1', 'col2', 'col3'], #these stay as columns; will fail silently if any of these cols have null values
+  columns=['col4']) #data values in this column become their own column
+ ```
+
+Change data type of DataFrame column
+
+```
+df.column_name = df.column_name.astype(np.int64)
+```
+
+Get rid of non-numeric values throughout a DataFrame:
+
+```
+for col in refunds.columns.values:
+  refunds[col] = refunds[col].replace(
+      '[^0-9]+.-', '', regex=True)
+ ```
+
+Set DataFrame column values based on other column   values  
+(h/t: [@mlevkov](https://github.com/mlevkov))
+
+```
+df.loc[(df['column1'] == some_value) & (df['column2'] == some_other_value), ['column_to_change']] = new_value
+```
+
+Clean up missing values in multiple DataFrame columns
+
+```
+df = df.fillna({
+    'col1': 'missing',
+    'col2': '99.999',
+    'col3': '999',
+    'col4': 'missing',
+    'col5': 'missing',
+    'col6': '99'
+})
+```
+
+Concatenate two DataFrame columns into a new, single column  
+(useful when dealing with composite keys, for example)  
+(h/t [@makmanalp](https://github.com/makmanalp) for improving this one!)
+
+```
+df['newcol'] = df['col1'].astype(str) + df['col2'].astype(str)
+```
+
+Doing calculations with DataFrame columns that have missing values  
+In example below, swap in 0 for df['col1'] cells that contain null.
+
+```
+df['new_col'] = np.where(
+    pd.isnull(df['col1']),0,df['col1']) + df['col2']
+```
+
+Split delimited values in a DataFrame column into two new columns  
+
+```
+df['new_col1'], df['new_col2'] = zip(
+    *df['original_col'].apply(
+        lambda x: x.split(': ', 1)))
+```
+
+Collapse hierarchical column indexes
+
+```
+df.columns = df.columns.get_level_values(0)
+```
+
+Convert Django queryset to DataFrame
+
+```
+qs = DjangoModelName.objects.all()
+q = qs.values()
+df = pd.DataFrame.from_records(q)
+```
+
+Create a DataFrame from a Python dictionary
+
+```
+df = pd.DataFrame(list(a_dictionary.items()), columns = ['column1', 'column2'])
+```
+
+Get a report of all duplicate records in a DataFrame, based on specific columns
+
+```
+dupes = df[df.duplicated(
+    ['col1', 'col2', 'col3'], keep=False)]
+```
+
+Set up formatting so larger numbers aren't displayed in scientific notation  
+(h/t [@thecapacity](https://github.com/thecapacity))
+
+```
+pd.set_option('display.float_format', lambda x: '%.3f' % x)
+```
+
+To display with commas and no decimals
+
+```
+pd.options.display.float_format = '{:,.0f}'.format
+```
diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -1,153 +0,0 @@
-# List unique values in a DataFrame column
-# h/t @makmanalp for the updated syntax!
-df['Column Name'].unique()
-
-# For each unique value in a DataFrame column, get a frequency count
-df['Column Name'].value_counts()
-
-# Convert Series datatype to numeric (will error if column has non-numeric values)
-# h/t @makmanalp
-pd.to_numeric(df['Column Name'])
-
-# Convert Series datatype to numeric, changing non-numeric values to NaN
-# h/t @makmanalp for the updated syntax!
-pd.to_numeric(df['Column Name'], errors='coerce')
-
-# Grab DataFrame rows where column = a specific value
-df = df.loc[df.column == 'somevalue']
-
-# Grab DataFrame rows where column value is present in a list
-valuelist = ['value1', 'value2', 'value3']
-df = df[df.column.isin(valuelist)]
-
-# Grab DataFrame rows where column value is not present in a list
-valuelist = ['value1', 'value2', 'value3']
-df = df[~df.column.isin(value_list)]
-
-# Delete column from DataFrame
-del df['column']
-
-# Select from DataFrame using criteria from multiple columns
-# (use `|` instead of `&` to do an OR)
-newdf = df[(df['column_one']>2004) & (df['column_two']==9)]
-
-# Rename several DataFrame columns
-df = df.rename(columns = {
-    'col1 old name':'col1 new name',
-    'col2 old name':'col2 new name',
-    'col3 old name':'col3 new name',
-})
-
-# Lower-case all DataFrame column names
-df.columns = map(str.lower, df.columns)
-
-# Even more fancy DataFrame column re-naming
-# lower-case all DataFrame column names (for example)
-df.rename(columns=lambda x: x.split('.')[-1], inplace=True)
-
-# Loop through rows in a DataFrame
-# (if you must)
-for index, row in df.iterrows():
-    print index, row['some column']
-
-# Much faster way to loop through DataFrame rows
-# if you can work with tuples
-# (h/t hughamacmullaniv)
-for row in df.itertuples():
-    print(row)
-
-# Next few examples show how to work with text data in Pandas.
-# Full list of .str functions: http://pandas.pydata.org/pandas-docs/stable/text.html
-
-# Slice values in a DataFrame column (aka Series)
-df.column.str[0:2]
-
-# Lower-case everything in a DataFrame column
-df.column_name = df.column_name.str.lower()
-
-# Get length of data in a DataFrame column
-df.column_name.str.len()
-
-# Sort dataframe by multiple columns
-df = df.sort_values(['col1','col2','col3'],ascending=[1,1,0])
-
-# Get top n for each group of columns in a sorted dataframe
-# (make sure dataframe is sorted first)
-top5 = df.groupby(['groupingcol1', 'groupingcol2']).head(5)
-
-# Grab DataFrame rows where specific column is null/notnull
-newdf = df[df['column'].isnull()]
-
-# Select from DataFrame using multiple keys of a hierarchical index
-df.xs(('index level 1 value','index level 2 value'), level=('level 1','level 2'))
-
-# Change all NaNs to None (useful before
-# loading to a db)
-df = df.where((pd.notnull(df)), None)
-
-# More pre-db insert cleanup...make a pass through the dataframe, stripping whitespace
-# from strings and changing any empty values to None
-# (not especially recommended but including here b/c I had to do this in real life one time)
-df = df.applymap(lambda x: str(x).strip() if len(str(x).strip()) else None)
-
-# Get quick count of rows in a DataFrame
-len(df.index)
-
-# Pivot data (with flexibility about what what
-# becomes a column and what stays a row).
-# Syntax works on Pandas >= .14
-pd.pivot_table(
-  df,values='cell_value',
-  index=['col1', 'col2', 'col3'], #these stay as columns; will fail silently if any of these cols have null values
-  columns=['col4']) #data values in this column become their own column
-
-# Change data type of DataFrame column
-df.column_name = df.column_name.astype(np.int64)
-
-# Get rid of non-numeric values throughout a DataFrame:
-for col in refunds.columns.values:
-  refunds[col] = refunds[col].replace('[^0-9]+.-', '', regex=True)
-
-# Set DataFrame column values based on other column values (h/t: @mlevkov)
-df.loc[(df['column1'] == some_value) & (df['column2'] == some_other_value), ['column_to_change']] = new_value
-
-# Clean up missing values in multiple DataFrame columns
-df = df.fillna({
-    'col1': 'missing',
-    'col2': '99.999',
-    'col3': '999',
-    'col4': 'missing',
-    'col5': 'missing',
-    'col6': '99'
-})
-
-# Concatenate two DataFrame columns into a new, single column
-# (useful when dealing with composite keys, for example)
-# (h/t @makmanalp for improving this one!)
-df['newcol'] = df['col1'].astype(str) + df['col2'].astype(str)
-
-# Doing calculations with DataFrame columns that have missing values
-# In example below, swap in 0 for df['col1'] cells that contain null
-df['new_col'] = np.where(pd.isnull(df['col1']),0,df['col1']) + df['col2']
-
-# Split delimited values in a DataFrame column into two new columns
-df['new_col1'], df['new_col2'] = zip(*df['original_col'].apply(lambda x: x.split(': ', 1)))
-
-# Collapse hierarchical column indexes
-df.columns = df.columns.get_level_values(0)
-
-# Convert Django queryset to DataFrame
-qs = DjangoModelName.objects.all()
-q = qs.values()
-df = pd.DataFrame.from_records(q)
-
-# Create a DataFrame from a Python dictionary
-df = pd.DataFrame(list(a_dictionary.items()), columns = ['column1', 'column2'])
-
-# Get a report of all duplicate records in a dataframe, based on specific columns
-dupes = df[df.duplicated(['col1', 'col2', 'col3'], keep=False)]
-
-# Set up formatting so larger numbers aren't displayed in scientific notation (h/t @thecapacity)
-pd.set_option('display.float_format', lambda x: '%.3f' % x)
-# To display with commas and no decimals
-pd.options.display.float_format = '{:,.0f}'.format

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -2,6 +2,9 @@
 # h/t @makmanalp for the updated syntax!
 df['Column Name'].unique()
 
+# For each unique value in a DataFrame column, get a frequency count
+df['Column Name'].value_counts()
+
 # Convert Series datatype to numeric (will error if column has non-numeric values)
 # h/t @makmanalp
 pd.to_numeric(df['Column Name'])

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -10,11 +10,14 @@
 # h/t @makmanalp for the updated syntax!
 pd.to_numeric(df['Column Name'], errors='coerce')
 
-# Grab DataFrame rows where column has certain values
+# Grab DataFrame rows where column = a specific value
+df = df.loc[df.column == 'somevalue']
+
+# Grab DataFrame rows where column value is present in a list
 valuelist = ['value1', 'value2', 'value3']
 df = df[df.column.isin(valuelist)]
 
-# Grab DataFrame rows where column doesn't have certain values
+# Grab DataFrame rows where column value is not present in a list
 valuelist = ['value1', 'value2', 'value3']
 df = df[~df.column.isin(value_list)]
 

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -142,4 +142,6 @@
 dupes = df[df.duplicated(['col1', 'col2', 'col3'], keep=False)]
 
 # Set up formatting so larger numbers aren't displayed in scientific notation (h/t @thecapacity)
-pd.set_option('display.float_format', lambda x: '%.3f' % x)
+pd.set_option('display.float_format', lambda x: '%.3f' % x)
+# To display with commas and no decimals
+pd.options.display.float_format = '{:,.0f}'.format
diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -63,7 +63,7 @@
 df.column_name.str.len()
 
 # Sort dataframe by multiple columns
-df = df.sort(['col1','col2','col3'],ascending=[1,1,0])
+df = df.sort_values(['col1','col2','col3'],ascending=[1,1,0])
 
 # Get top n for each group of columns in a sorted dataframe
 # (make sure dataframe is sorted first)

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -2,8 +2,13 @@
 # h/t @makmanalp for the updated syntax!
 df['Column Name'].unique()
 
-# Convert Series datatype to numeric, getting rid of any non-numeric values
-df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)
+# Convert Series datatype to numeric (will error if column has non-numeric values)
+# h/t @makmanalp
+pd.to_numeric(df['Column Name'])
+
+# Convert Series datatype to numeric, changing non-numeric values to NaN
+# h/t @makmanalp for the updated syntax!
+pd.to_numeric(df['Column Name'], errors='coerce')
 
 # Grab DataFrame rows where column has certain values
 valuelist = ['value1', 'value2', 'value3']

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -1,5 +1,6 @@
 # List unique values in a DataFrame column
-pd.unique(df.column_name.ravel())
+# h/t @makmanalp for the updated syntax!
+df['Column Name'].unique()
 
 # Convert Series datatype to numeric, getting rid of any non-numeric values
 df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -111,7 +111,8 @@
 
 # Concatenate two DataFrame columns into a new, single column
 # (useful when dealing with composite keys, for example)
-df['newcol'] = df['col1'].map(str) + df['col2'].map(str)
+# (h/t @makmanalp for improving this one!)
+df['newcol'] = df['col1'].astype(str) + df['col2'].astype(str)
 
 # Doing calculations with DataFrame columns that have missing values
 # In example below, swap in 0 for df['col1'] cells that contain null

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -36,7 +36,13 @@
 # Loop through rows in a DataFrame
 # (if you must)
 for index, row in df.iterrows():
-    print index, row['some column']  
+    print index, row['some column']
+
+# Much faster way to loop through DataFrame rows
+# if you can work with tuples
+# (h/t hughamacmullaniv)
+for row in df.itertuples():
+    print(row)
 
 # Next few examples show how to work with text data in Pandas.
 # Full list of .str functions: http://pandas.pydata.org/pandas-docs/stable/text.html

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -67,6 +67,11 @@
 # loading to a db)
 df = df.where((pd.notnull(df)), None)
 
+# More pre-db insert cleanup...make a pass through the dataframe, stripping whitespace
+# from strings and changing any empty values to None
+# (not especially recommended but including here b/c I had to do this in real life one time)
+df = df.applymap(lambda x: str(x).strip() if len(str(x).strip()) else None)
+
 # Get quick count of rows in a DataFrame
 len(df.index)
 

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -121,4 +121,7 @@
 df = pd.DataFrame(list(a_dictionary.items()), columns = ['column1', 'column2'])
 
 # Get a report of all duplicate records in a dataframe, based on specific columns
-dupes = df[df.duplicated(['col1', 'col2', 'col3'], keep=False)]
+dupes = df[df.duplicated(['col1', 'col2', 'col3'], keep=False)]
+
+# Set up formatting so larger numbers aren't displayed in scientific notation (h/t @thecapacity)
+pd.set_option('display.float_format', lambda x: '%.3f' % x)
diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -75,7 +75,7 @@
 # Syntax works on Pandas >= .14
 pd.pivot_table(
   df,values='cell_value',
-  index=['col1', 'col2', 'col3'], #these stay as columns
+  index=['col1', 'col2', 'col3'], #these stay as columns; will fail silently if any of these cols have null values
   columns=['col4']) #data values in this column become their own column
 
 # Change data type of DataFrame column

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -1,93 +1,94 @@
-#List unique values in a DataFrame column
+# List unique values in a DataFrame column
 pd.unique(df.column_name.ravel())
 
-#Convert Series datatype to numeric, getting rid of any non-numeric values
+# Convert Series datatype to numeric, getting rid of any non-numeric values
 df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)
 
-#Grab DataFrame rows where column has certain values
+# Grab DataFrame rows where column has certain values
 valuelist = ['value1', 'value2', 'value3']
 df = df[df.column.isin(valuelist)]
 
-#Grab DataFrame rows where column doesn't have certain values
+# Grab DataFrame rows where column doesn't have certain values
 valuelist = ['value1', 'value2', 'value3']
 df = df[~df.column.isin(value_list)]
 
-#Delete column from DataFrame
+# Delete column from DataFrame
 del df['column']
 
-#Select from DataFrame using criteria from multiple columns
+# Select from DataFrame using criteria from multiple columns
+# (use `|` instead of `&` to do an OR)
 newdf = df[(df['column_one']>2004) & (df['column_two']==9)]
 
-#Rename several DataFrame columns
+# Rename several DataFrame columns
 df = df.rename(columns = {
     'col1 old name':'col1 new name',
     'col2 old name':'col2 new name',
     'col3 old name':'col3 new name',
 })
 
-#lower-case all DataFrame column names
+# Lower-case all DataFrame column names
 df.columns = map(str.lower, df.columns)
 
-#even more fancy DataFrame column re-naming
-#lower-case all DataFrame column names (for example)
+# Even more fancy DataFrame column re-naming
+# lower-case all DataFrame column names (for example)
 df.rename(columns=lambda x: x.split('.')[-1], inplace=True)
 
-#Loop through rows in a DataFrame
-#(if you must)
+# Loop through rows in a DataFrame
+# (if you must)
 for index, row in df.iterrows():
     print index, row['some column']  
 
-#Next few examples show how to work with text data in Pandas.
-#Full list of .str functions: http://pandas.pydata.org/pandas-docs/stable/text.html
+# Next few examples show how to work with text data in Pandas.
+# Full list of .str functions: http://pandas.pydata.org/pandas-docs/stable/text.html
 
-#Slice values in a DataFrame column (aka Series)
+# Slice values in a DataFrame column (aka Series)
 df.column.str[0:2]
 
-#Lower-case everything in a DataFrame column
+# Lower-case everything in a DataFrame column
 df.column_name = df.column_name.str.lower()
 
-#Get length of data in a DataFrame column
+# Get length of data in a DataFrame column
 df.column_name.str.len()
 
-#Sort dataframe by multiple columns
+# Sort dataframe by multiple columns
 df = df.sort(['col1','col2','col3'],ascending=[1,1,0])
 
-#get top n for each group of columns in a sorted dataframe
-#(make sure dataframe is sorted first)
+# Get top n for each group of columns in a sorted dataframe
+# (make sure dataframe is sorted first)
 top5 = df.groupby(['groupingcol1', 'groupingcol2']).head(5)
 
-#Grab DataFrame rows where specific column is null/notnull
+# Grab DataFrame rows where specific column is null/notnull
 newdf = df[df['column'].isnull()]
 
-#select from DataFrame using multiple keys of a hierarchical index
+# Select from DataFrame using multiple keys of a hierarchical index
 df.xs(('index level 1 value','index level 2 value'), level=('level 1','level 2'))
 
-#Change all NaNs to None (useful before
-#loading to a db)
+# Change all NaNs to None (useful before
+# loading to a db)
 df = df.where((pd.notnull(df)), None)
 
-#Get quick count of rows in a DataFrame
+# Get quick count of rows in a DataFrame
 len(df.index)
 
-#Pivot data (with flexibility about what what
-#becomes a column and what stays a row).
-#Syntax works on Pandas >= .14
+# Pivot data (with flexibility about what what
+# becomes a column and what stays a row).
+# Syntax works on Pandas >= .14
 pd.pivot_table(
   df,values='cell_value',
   index=['col1', 'col2', 'col3'], #these stay as columns
   columns=['col4']) #data values in this column become their own column
 
-#change data type of DataFrame column
+# Change data type of DataFrame column
 df.column_name = df.column_name.astype(np.int64)
 
 # Get rid of non-numeric values throughout a DataFrame:
 for col in refunds.columns.values:
   refunds[col] = refunds[col].replace('[^0-9]+.-', '', regex=True)
 
-#Set DataFrame column values based on other column values (h/t: @mlevkov)
+# Set DataFrame column values based on other column values (h/t: @mlevkov)
 df.loc[(df['column1'] == some_value) & (df['column2'] == some_other_value), ['column_to_change']] = new_value
 
-#Clean up missing values in multiple DataFrame columns
+# Clean up missing values in multiple DataFrame columns
 df = df.fillna({
     'col1': 'missing',
     'col2': '99.999',
@@ -97,12 +98,12 @@
     'col6': '99'
 })
 
-#Concatenate two DataFrame columns into a new, single column
-#(useful when dealing with composite keys, for example)
+# Concatenate two DataFrame columns into a new, single column
+# (useful when dealing with composite keys, for example)
 df['newcol'] = df['col1'].map(str) + df['col2'].map(str)
 
-#Doing calculations with DataFrame columns that have missing values
-#In example below, swap in 0 for df['col1'] cells that contain null
+# Doing calculations with DataFrame columns that have missing values
+# In example below, swap in 0 for df['col1'] cells that contain null
 df['new_col'] = np.where(pd.isnull(df['col1']),0,df['col1']) + df['col2']
 
 # Split delimited values in a DataFrame column into two new columns
@@ -111,13 +112,13 @@
 # Collapse hierarchical column indexes
 df.columns = df.columns.get_level_values(0)
 
-#Convert Django queryset to DataFrame
+# Convert Django queryset to DataFrame
 qs = DjangoModelName.objects.all()
 q = qs.values()
 df = pd.DataFrame.from_records(q)
 
-#Create a DataFrame from a Python dictionary
+# Create a DataFrame from a Python dictionary
 df = pd.DataFrame(list(a_dictionary.items()), columns = ['column1', 'column2'])
 
-#Get a report of all duplicate records in a dataframe, based on specific columns
+# Get a report of all duplicate records in a dataframe, based on specific columns
 dupes = df[df.duplicated(['col1', 'col2', 'col3'], keep=False)]
diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -117,4 +117,7 @@
 df = pd.DataFrame.from_records(q)
 
 #Create a DataFrame from a Python dictionary
-df = pd.DataFrame(list(a_dictionary.items()), columns = ['column1', 'column2'])
+df = pd.DataFrame(list(a_dictionary.items()), columns = ['column1', 'column2'])
+
+#Get a report of all duplicate records in a dataframe, based on specific columns
+dupes = df[df.duplicated(['col1', 'col2', 'col3'], keep=False)]
diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -84,8 +84,8 @@
 for col in refunds.columns.values:
   refunds[col] = refunds[col].replace('[^0-9]+.-', '', regex=True)
 
-#Set DataFrame column values based on other column values
-df['column_to_change'][(df['column1'] == some_value) & (df['column2'] == some_other_value)] = new_value
+#Set DataFrame column values based on other column values (h/t: @mlevkov)
+df.loc[(df['column1'] == some_value) & (df['column2'] == some_other_value), ['column_to_change']] = new_value
 
 #Clean up missing values in multiple DataFrame columns
 df = df.fillna({

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -6,7 +6,7 @@
 
 #Grab DataFrame rows where column has certain values
 valuelist = ['value1', 'value2', 'value3']
-df = df[df.column.isin(value_list)]
+df = df[df.column.isin(valuelist)]
 
 #Grab DataFrame rows where column doesn't have certain values
 valuelist = ['value1', 'value2', 'value3']

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -37,9 +37,18 @@
 for index, row in df.iterrows():
     print index, row['some column']  
 
+#Next few examples show how to work with text data in Pandas.
+#Full list of .str functions: http://pandas.pydata.org/pandas-docs/stable/text.html
+
+#Slice values in a DataFrame column (aka Series)
+df.column.str[0:2]
+
 #Lower-case everything in a DataFrame column
 df.column_name = df.column_name.str.lower()
 
+#Get length of data in a DataFrame column
+df.column_name.str.len()
+
 #Sort dataframe by multiple columns
 df = df.sort(['col1','col2','col3'],ascending=[1,1,0])
 
@@ -49,7 +58,6 @@
 
 #Grab DataFrame rows where specific column is null/notnull
 newdf = df[df['column'].isnull()]
-len(newdf)
 
 #select from DataFrame using multiple keys of a hierarchical index
 df.xs(('index level 1 value','index level 2 value'), level=('level 1','level 2'))
@@ -58,9 +66,6 @@
 #loading to a db)
 df = df.where((pd.notnull(df)), None)
 
-#Slice values in a DataFrame column (aka Series)
-df.column.str[0:2]
-
 #Get quick count of rows in a DataFrame
 len(df.index)
 

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -34,7 +34,7 @@
 
 #Loop through rows in a DataFrame
 #(if you must)
-for index, row in df:
+for index, row in df.iterrows():
     print index, row['some column']  
 
 #Lower-case everything in a DataFrame column

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -107,7 +107,9 @@
 df.columns = df.columns.get_level_values(0)
 
 #Convert Django queryset to DataFrame
-#convert queryset to dataframe
 qs = DjangoModelName.objects.all()
 q = qs.values()
-df = pd.DataFrame.from_records(q)
+df = pd.DataFrame.from_records(q)
+
+#Create a DataFrame from a Python dictionary
+df = pd.DataFrame(list(a_dictionary.items()), columns = ['column1', 'column2'])
diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -92,6 +92,10 @@
     'col6': '99'
 })
 
+#Concatenate two DataFrame columns into a new, single column
+#(useful when dealing with composite keys, for example)
+df['newcol'] = df['col1'].map(str) + df['col2'].map(str)
+
 #Doing calculations with DataFrame columns that have missing values
 #In example below, swap in 0 for df['col1'] cells that contain null
 df['new_col'] = np.where(pd.isnull(df['col1']),0,df['col1']) + df['col2']

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -99,6 +99,9 @@
 # Split delimited values in a DataFrame column into two new columns
 df['new_col1'], df['new_col2'] = zip(*df['original_col'].apply(lambda x: x.split(': ', 1)))
 
+# Collapse hierarchical column indexes
+df.columns = df.columns.get_level_values(0)
+
 #Convert Django queryset to DataFrame
 #convert queryset to dataframe
 qs = DjangoModelName.objects.all()

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -32,10 +32,6 @@
 #lower-case all DataFrame column names (for example)
 df.rename(columns=lambda x: x.split('.')[-1], inplace=True)
 
-#set entire column values in a way that avoids the 
-#SettingWithCopy Warning
-df.loc[:, 'column_name'] = 'new value'
-
 #Loop through rows in a DataFrame
 #(if you must)
 for index, row in df:

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -34,7 +34,7 @@
 
 #set entire column values in a way that avoids the 
 #SettingWithCopy Warning
-df.loc[df.index, 'column_name'] = 'new value'
+df.loc[:, 'column_name'] = 'new value'
 
 #Loop through rows in a DataFrame
 #(if you must)

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -32,6 +32,10 @@
 #lower-case all DataFrame column names (for example)
 df.rename(columns=lambda x: x.split('.')[-1], inplace=True)
 
+#set entire column values in a way that avoids the 
+#SettingWithCopy Warning
+df.loc[df.index, 'column_name'] = 'new value'
+
 #Loop through rows in a DataFrame
 #(if you must)
 for index, row in df:

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -12,6 +12,9 @@
 valuelist = ['value1', 'value2', 'value3']
 df = df[~df.column.isin(value_list)]
 
+#Delete column from DataFrame
+del df['column']
+
 #Select from DataFrame using criteria from multiple columns
 newdf = df[(df['column_one']>2004) & (df['column_two']==9)]
 

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -55,6 +55,9 @@
 #loading to a db)
 df = df.where((pd.notnull(df)), None)
 
+#Slice values in a DataFrame column (aka Series)
+df.column.str[0:2]
+
 #Get quick count of rows in a DataFrame
 len(df.index)
 

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -22,14 +22,18 @@
     'col3 old name':'col3 new name',
 })
 
+#lower-case all DataFrame column names
+df.columns = map(str.lower, df.columns)
+
+#even more fancy DataFrame column re-naming
+#lower-case all DataFrame column names (for example)
+df.rename(columns=lambda x: x.split('.')[-1], inplace=True)
+
 #Loop through rows in a DataFrame
 #(if you must)
 for index, row in df:
     print index, row['some column']  
 
-#lower-case all DataFrame column names
-df.columns = map(str.lower, df.columns)
-
 #Lower-case everything in a DataFrame column
 df.column_name = df.column_name.str.lower()
 

diff --git a/useful_pandas_snippets.py b/useful_pandas_snippets.py
@@ -69,6 +69,9 @@
 for col in refunds.columns.values:
   refunds[col] = refunds[col].replace('[^0-9]+.-', '', regex=True)
 
+#Set DataFrame column values based on other column values
+df['column_to_change'][(df['column1'] == some_value) & (df['column2'] == some_other_value)] = new_value
+
 #Clean up missing values in multiple DataFrame columns
 df = df.fillna({
     'col1': 'missing',