Created
March 7, 2012 14:22
-
-
Save hodbby/1993415 to your computer and use it in GitHub Desktop.
regex
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def find_year (filename): | |
file1 = open (filename, 'rU') | |
match = re.findall (r'>Popularity in ([\d]+)<', file1.read()) | |
file1.close() | |
return match | |
def find_name_and_rank (filename): | |
temp_tuple_name_and_rank = {} | |
file2 = open (filename, 'rU') | |
temp_tuple_name_and_rank = re.findall (r'<tr align="right"><td>([\d+])</td><td>([\w]+)</td><td>([\w]+)</td>', file2.read()) | |
file2.close | |
return temp_tuple_name_and_rank | |
def convert_tuple_to_unisex_list (tuple_name_and_rank): | |
dict_men = {} | |
dict_women = {} | |
# Convert tuple into dictionary | |
for item_tuple in tuple_name_and_rank: | |
dict_men[item_tuple[1]] = item_tuple[0] | |
for item_tuple in tuple_name_and_rank: | |
dict_women[item_tuple[2]] = item_tuple[0] | |
#convert dictionary into list | |
list_men = list(dict_men.items()) | |
list_women = list (dict_women.items()) | |
#sort both lists into one men and women sorted list | |
unisex_list = list_men | |
unisex_list.extend (list_women) | |
unisex_list.sort() | |
return (unisex_list) | |
def extract_names(filename): | |
""" | |
Given a file name for baby.html, returns a list starting with the year string | |
followed by the name-rank strings in alphabetical order. | |
['2006', 'Aaliyah 91', Aaron 57', 'Abagail 895', ' ...] | |
""" | |
tuple_name_and_rank = {} | |
# Looking for name and its rank and insert into a tuple | |
tuple_name_and_rank = find_name_and_rank (filename) | |
# Convert tuple into sorted list | |
list_sorted = convert_tuple_to_unisex_list (tuple_name_and_rank) | |
return list_sorted | |
def main(): | |
args = sys.argv[1:] | |
if not args: | |
print 'usage: [--summaryfile] file [file ...]' | |
sys.exit(1) | |
summary = False | |
if args[0] == '--summaryfile': | |
summary = True | |
del args[0] | |
# For each filename, get the names, then either print the text output (False) | |
# or write it to a summary file (True) | |
if summary == False: | |
the_year_is = find_year (args[0]) | |
print '\n', the_year_is, | |
list_sorted = extract_names(args[0]) | |
print list_sorted | |
else: | |
list_sorted = extract_names(args[0]) | |
file_output = open ('result.txt', 'a') | |
print >> file_output, find_year(args[0]) | |
print >> file_output,list_sorted | |
file_output.close | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment