Created
February 13, 2012 07:21
-
-
Save hodbby/1814587 to your computer and use it in GitHub Desktop.
wordcount
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
def calculate_data (line_data): | |
# Got a sring and creates dictionary of (word : count) | |
dict = {} | |
for word in line_data: | |
if word in dict: | |
dict[word] += 1 | |
else: | |
dict[word] = 1 | |
return dict | |
def file_to_dict(file1): | |
# convert file into string by lower case and removing \n | |
data = file1.read() | |
data = data.replace ('\n', ' ') | |
data = data.lower() | |
# Split by space | |
for word in data: | |
line_data = data.split (" "); | |
# Sending to function that creates and return dictionary | |
return calculate_data (line_data) | |
def print_top (filename): | |
dict1 = {} | |
file1 = open (filename,'rU') | |
dict1 = file_to_dict (file1) # convert file into dictionary | |
# Print the dictionary top20 sorted by values- The solution. | |
dict1 = sorted(dict1.items(), key=lambda (k,v):(v,k), reverse=True) | |
for i in range (5): | |
print dict1[i] | |
file1.close () | |
return filename | |
def print_words (filename): | |
dict1 = {} | |
file1 = open (filename,'rU') | |
dict1 = file_to_dict (file1) # convert file into dictionary | |
# Print the dictionary sorted- The solution. | |
print sorted (dict1.items()) | |
file1.close () | |
return filename | |
def main(): | |
if len(sys.argv) != 3: | |
print 'usage: ./wordcount.py {--count | --topcount} file' | |
sys.exit(1) | |
option = sys.argv[1] | |
filename = sys.argv[2] | |
if option == '--count': | |
print_words(filename) | |
elif option == '--topcount': | |
print_top(filename) | |
else: | |
print 'unknown option: ' + option | |
sys.exit(1) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment