from itertools import chain def lcs(s, t): """Find longest common substring in strings. >>> lcs('abcdxyz', 'xyzabcd') (4, 'abcd') >>> lcs('abcdxyzabcyzabcdzabcdefghijkllm', 'xyzabcdefghijklm') (13, 'zabcdefghijkl') """ # determine which string is longer shorter, longer = (s, iter(t)) if len(s) < len(t) else (t, iter(s)) # get max one all_lcs = list(gen_lcs(shorter, longer)) length, longest_substring = max(all_lcs or [(0, '')]) return (length, longest_substring) def gen_lcs(string, itr): """Generate longest common substrings. >>> list(gen_lcs('abcdxyz', iter('xyzabcdabc'))) [(3, 'xyz'), (4, 'abcd'), (3, 'abc')] """ first_miss = '' while itr: # advance itr along until it hits a match of any char in string first_match = next( c for c in chain(first_miss, itr) if c in string) # get first substring which starts with the first match first_substring = find_first_substring(string, first_match) # get match, advancing itr match, first_miss = next_match( chain(first_match, itr), first_substring) if match: yield (len(match), match) def find_first_substring(string, char): """Find first substring from string starting with char. >>> find_first_substring('abcdefg', 'd') 'defg' >>> find_first_substring('abcabcdab', 'b') 'bcabcdab' """ parts = string.partition(char) return ''.join(parts[1:]) def next_match(itr, match): """Get characters from consuming itr as it matches. >>> next_match(iter('bcdefg'), 'bcdxyz') ('bcd', 'e') >> next_match(iter('abcd'), 'abcdxyz') ('abcd', '') >>> next_match(iter('abcd'), 'axyz') ('a', 'b') """ def _gen_match(): for m, i in zip(match, itr): yield i if m != i: break else: yield '' g = list(_gen_match()) chars = ''.join(g[:-1]) miss = g[-1] return (chars, miss)