Created
May 4, 2011 16:24
-
-
Save originell/955513 to your computer and use it in GitHub Desktop.
Fastes way to search for something in a list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pytz | |
import re | |
""" | |
Trying to find a fast way to search through a list. | |
As example we use pytz's all_timezones list and set, which | |
are both pretty nicely filled with strings. | |
OSX 10.6.7, 2.53ghz Core2Duo: | |
Regex Map: 1595.96 usec/pass | |
Regex Set: 1613.65 usec/pass | |
Cached Regex Map: 1576.19 usec/pass | |
Cached Regex Set: 1627.34 usec/pass | |
Search Through: 42.85 usec/pass | |
Cached Search Through: 45.37 usec/pass | |
Set Search Through: 33.18 usec/pass | |
Cached Set Search Through: 32.97 usec/pass | |
Not that much of a surprise that the easiest method to write | |
is also the fastest that I could find :-) | |
Feel free to correct me in any way! :) | |
""" | |
city = 'Zulu' # worst case, last list item | |
regex = re.compile('\w+/%s$' % city) | |
tz = pytz.all_timezones | |
tzs = pytz.all_timezones_set | |
def regex_map(): | |
result = map(lambda s: re.match(regex, s), pytz.all_timezones) | |
for x in result: | |
if x is not None: | |
return x.string | |
def regex_set_map(): | |
result = map(lambda s: re.match(regex, s), pytz.all_timezones_set) | |
for x in result: | |
if x is not None: | |
return x.string | |
def cached_regex_map(): | |
result = map(lambda s: re.match(regex, s), tz) | |
for x in result: | |
if x is not None: | |
return x.string | |
def cached_regex_set_map(): | |
result = map(lambda s: re.match(regex, s), tzs) | |
for x in result: | |
if x is not None: | |
return x.string | |
def search_through(): | |
for x in pytz.all_timezones: | |
if city in x: | |
return x | |
def cached_search_through(): | |
for x in tz: | |
if city in x: | |
return x | |
def set_search_through(): | |
for x in pytz.all_timezones_set: | |
if city in x: | |
return x | |
def cached_set_search_through(): | |
for x in tzs: | |
if city in x: | |
return x | |
if __name__ == '__main__': | |
from timeit import Timer | |
print "Running Tests..." | |
t1 = Timer('regex_map()', 'from __main__ import regex_map') | |
print "Regex Map: %.2f usec/pass" % (1000000 * t1.timeit(number=1000)/1000) | |
t2 = Timer('regex_set_map()', 'from __main__ import regex_set_map') | |
print "Regex Set: %.2f usec/pass" % (1000000 * t2.timeit(number=1000)/1000) | |
t3 = Timer('cached_regex_map()', 'from __main__ import cached_regex_map') | |
print "Cached Regex Map: %.2f usec/pass" % (1000000 * t3.timeit(number=1000)/1000) | |
t4 = Timer('cached_regex_set_map()', 'from __main__ import cached_regex_set_map') | |
print "Cached Regex Set: %.2f usec/pass" % (1000000 * t4.timeit(number=1000)/1000) | |
t5 = Timer('search_through()', 'from __main__ import search_through') | |
print "Search Through: %.2f usec/pass" % (1000000 * t5.timeit(number=1000)/1000) | |
t6 = Timer('cached_search_through()', 'from __main__ import cached_search_through') | |
print "Cached Search Through: %.2f usec/pass" % (1000000 * t6.timeit(number=1000)/1000) | |
t7 = Timer('set_search_through()', 'from __main__ import set_search_through') | |
print "Set Search Through: %.2f usec/pass" % (1000000 * t7.timeit(number=100000)/100000) | |
t8 = Timer('cached_set_search_through()', 'from __main__ import cached_set_search_through') | |
print "Cached Set Search Through: %.2f usec/pass" % (1000000 * t8.timeit(number=100000)/100000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment