henryroe · June 20, 2014 21:37
diff --git a/import_nsf_ast_csv.py b/import_nsf_ast_csv.py
 import pandas
 import numpy as np
 awards = pandas.read_csv("awards_dump_2014-06-20.csv",
                         parse_dates=['StartDate', 'LastAmendmentDate', 'ExpirationDate', 
                                      'AwardedAmountToDate'],
                         converters={'AwardedAmountToDate': lambda x: 
                                     float(x.replace('$', '').replace(',', ''))},
                         dtype={'AwardedAmountToDate':np.float64})
 awards['DurationYears'] = ((awards['ExpirationDate'] - awards['StartDate']) / 
                           (365.25 * np.timedelta64(1, 'D')))
 # unsure why, but AwardedAmountToDate wasn't correctly converted to float
 awards['AwardedAmountToDate'] = awards['AwardedAmountToDate'].astype(float)
 # A few awards have ExpirationDate < StartDate or implausibly long durations.  
 # Let's eliminate those. (That's 17 out of ~6500)
 awards = awards[(awards['DurationYears'] > 0) & (awards['DurationYears'] <= 11.)]
	import pandas
	import numpy as np
	awards = pandas.read_csv("awards_dump_2014-06-20.csv",
	parse_dates=['StartDate', 'LastAmendmentDate', 'ExpirationDate',
	'AwardedAmountToDate'],
	converters={'AwardedAmountToDate': lambda x:
	float(x.replace('$', '').replace(',', ''))},
	dtype={'AwardedAmountToDate':np.float64})
	awards['DurationYears'] = ((awards['ExpirationDate'] - awards['StartDate']) /
	(365.25 * np.timedelta64(1, 'D')))
	# unsure why, but AwardedAmountToDate wasn't correctly converted to float
	awards['AwardedAmountToDate'] = awards['AwardedAmountToDate'].astype(float)
	# A few awards have ExpirationDate < StartDate or implausibly long durations.
	# Let's eliminate those. (That's 17 out of ~6500)
	awards = awards[(awards['DurationYears'] > 0) & (awards['DurationYears'] <= 11.)]