Created
January 30, 2014 07:27
-
-
Save jirivrany/8704099 to your computer and use it in GitHub Desktop.
Python - speed comparison of dictionary filling
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
def defdic(words): | |
freq = defaultdict(int) | |
for word in words: | |
freq[word] += 1 | |
return dict(freq) | |
def clasic(words): | |
freq = {} | |
for word in words: | |
if word in freq: | |
freq[word] += 1 | |
else: | |
freq[word] = 1 | |
return freq | |
def clasic_try(words): | |
freq = {} | |
for word in words: | |
try: | |
freq[word] += 1 | |
except KeyError: | |
freq[word] = 1 | |
return freq | |
def dictget_loc(words): | |
freq = {} | |
myget = freq.get | |
for word in words: | |
freq[word] = myget(word, 0) + 1 | |
return freq | |
def dictget(words): | |
freq = {} | |
for word in words: | |
freq[word] = freq.get(word, 0) + 1 | |
return freq | |
def time_function(fname, param, rep=100): | |
print(fname) | |
call_name = "{}({})".format(fname, param) | |
setup_name = "from __main__ import {}".format(fname) | |
print("{} calls of function {} took {} seconds".format( | |
rep, fname, timeit.timeit(call_name, setup=setup_name, number=rep))) | |
def get_data(): | |
import re | |
interpunkce = re.compile(r'[.,?;"!:]') | |
with open('data/txt/les_miserables.txt') as source: | |
text = source.read() | |
text = interpunkce.sub(' ', text) | |
text = text.lower() | |
words = text.split() | |
return words | |
if __name__ == '__main__': | |
import timeit | |
DATA = get_data() | |
time_function('defdic', DATA) | |
time_function('clasic', DATA) | |
time_function('clasic_try', DATA) | |
time_function('dictget', DATA) | |
time_function('dictget_loc', DATA) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment