Skip to content

Instantly share code, notes, and snippets.

@originell
Created May 4, 2011 16:24
Show Gist options
  • Save originell/955513 to your computer and use it in GitHub Desktop.
Save originell/955513 to your computer and use it in GitHub Desktop.
Fastes way to search for something in a list
import pytz
import re
"""
Trying to find a fast way to search through a list.
As example we use pytz's all_timezones list and set, which
are both pretty nicely filled with strings.
OSX 10.6.7, 2.53ghz Core2Duo:
Regex Map: 1595.96 usec/pass
Regex Set: 1613.65 usec/pass
Cached Regex Map: 1576.19 usec/pass
Cached Regex Set: 1627.34 usec/pass
Search Through: 42.85 usec/pass
Cached Search Through: 45.37 usec/pass
Set Search Through: 33.18 usec/pass
Cached Set Search Through: 32.97 usec/pass
Not that much of a surprise that the easiest method to write
is also the fastest that I could find :-)
Feel free to correct me in any way! :)
"""
city = 'Zulu' # worst case, last list item
regex = re.compile('\w+/%s$' % city)
tz = pytz.all_timezones
tzs = pytz.all_timezones_set
def regex_map():
result = map(lambda s: re.match(regex, s), pytz.all_timezones)
for x in result:
if x is not None:
return x.string
def regex_set_map():
result = map(lambda s: re.match(regex, s), pytz.all_timezones_set)
for x in result:
if x is not None:
return x.string
def cached_regex_map():
result = map(lambda s: re.match(regex, s), tz)
for x in result:
if x is not None:
return x.string
def cached_regex_set_map():
result = map(lambda s: re.match(regex, s), tzs)
for x in result:
if x is not None:
return x.string
def search_through():
for x in pytz.all_timezones:
if city in x:
return x
def cached_search_through():
for x in tz:
if city in x:
return x
def set_search_through():
for x in pytz.all_timezones_set:
if city in x:
return x
def cached_set_search_through():
for x in tzs:
if city in x:
return x
if __name__ == '__main__':
from timeit import Timer
print "Running Tests..."
t1 = Timer('regex_map()', 'from __main__ import regex_map')
print "Regex Map: %.2f usec/pass" % (1000000 * t1.timeit(number=1000)/1000)
t2 = Timer('regex_set_map()', 'from __main__ import regex_set_map')
print "Regex Set: %.2f usec/pass" % (1000000 * t2.timeit(number=1000)/1000)
t3 = Timer('cached_regex_map()', 'from __main__ import cached_regex_map')
print "Cached Regex Map: %.2f usec/pass" % (1000000 * t3.timeit(number=1000)/1000)
t4 = Timer('cached_regex_set_map()', 'from __main__ import cached_regex_set_map')
print "Cached Regex Set: %.2f usec/pass" % (1000000 * t4.timeit(number=1000)/1000)
t5 = Timer('search_through()', 'from __main__ import search_through')
print "Search Through: %.2f usec/pass" % (1000000 * t5.timeit(number=1000)/1000)
t6 = Timer('cached_search_through()', 'from __main__ import cached_search_through')
print "Cached Search Through: %.2f usec/pass" % (1000000 * t6.timeit(number=1000)/1000)
t7 = Timer('set_search_through()', 'from __main__ import set_search_through')
print "Set Search Through: %.2f usec/pass" % (1000000 * t7.timeit(number=100000)/100000)
t8 = Timer('cached_set_search_through()', 'from __main__ import cached_set_search_through')
print "Cached Set Search Through: %.2f usec/pass" % (1000000 * t8.timeit(number=100000)/100000)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment