Last active
June 19, 2017 19:30
-
-
Save coffeemancy/9ed8b681655bd7aca34b8fe59dac4728 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import chain | |
def lcs(s, t): | |
"""Find longest common substring in strings. | |
>>> lcs('abcdxyz', 'xyzabcd') | |
(4, 'abcd') | |
>>> lcs('abcdxyzabcyzabcdzabcdefghijkllm', 'xyzabcdefghijklm') | |
(13, 'zabcdefghijkl') | |
""" | |
# determine which string is longer | |
shorter, longer = (s, iter(t)) if len(s) < len(t) else (t, iter(s)) | |
# get max one | |
all_lcs = list(gen_lcs(shorter, longer)) | |
length, longest_substring = max(all_lcs or [(0, '')]) | |
return (length, longest_substring) | |
def gen_lcs(string, itr): | |
"""Generate longest common substrings. | |
>>> list(gen_lcs('abcdxyz', iter('xyzabcdabc'))) | |
[(3, 'xyz'), (4, 'abcd'), (3, 'abc')] | |
""" | |
first_miss = '' | |
while itr: | |
# advance itr along until it hits a match of any char in string | |
first_match = next( | |
c for c in chain(first_miss, itr) | |
if c in string) | |
# get first substring which starts with the first match | |
first_substring = find_first_substring(string, first_match) | |
# get match, advancing itr | |
match, first_miss = next_match( | |
chain(first_match, itr), first_substring) | |
if match: | |
yield (len(match), match) | |
def find_first_substring(string, char): | |
"""Find first substring from string starting with char. | |
>>> find_first_substring('abcdefg', 'd') | |
'defg' | |
>>> find_first_substring('abcabcdab', 'b') | |
'bcabcdab' | |
""" | |
parts = string.partition(char) | |
return ''.join(parts[1:]) | |
def next_match(itr, match): | |
"""Get characters from consuming itr as it matches. | |
>>> next_match(iter('bcdefg'), 'bcdxyz') | |
('bcd', 'e') | |
>> next_match(iter('abcd'), 'abcdxyz') | |
('abcd', '') | |
>>> next_match(iter('abcd'), 'axyz') | |
('a', 'b') | |
""" | |
def _gen_match(): | |
for m, i in zip(match, itr): | |
yield i | |
if m != i: | |
break | |
else: | |
yield '' | |
g = list(_gen_match()) | |
chars = ''.join(g[:-1]) | |
miss = g[-1] | |
return (chars, miss) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment