hdf · February 5, 2021 23:17
diff --git a/index.html b/index.html
 <!DOCTYPE html>
 <html lang="en">
 <head>
  <meta charset="utf-8">
  <title>Redundancy checker</title>
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <!--<script src="lrs.min.js"></script>-->
 <style>
 html {
  font-family: 'Verdana', sans-serif;
  margin: 0;
  padding: 0;
 }
 pre {
  margin: 0px;
 }
 #main {
  background-color: #fff;
  border: 1px solid #999;
  border-spacing: 5px;
  width: 100%;
 }
 #main>div {
  display: flex;
 }
 #main>div>div {
  background-color: #eee;
  padding: 3px 6px 3px 6px;
  margin: 1px;
  flex: 0 0 110px;
 }
 #main>div>div:nth-child(2n) {
  flex: 1 0 auto;
 }
 #main>div>div:nth-child(3n) {
  flex: 0 1 145px;
 }
 .header {
  font-size: 1.2em;
  font-weight: bold;
 }
 center {
  display: flex;
  align-items: top;
  justify-content: center;
 }
 #file {
  display: none;
 }
 #btn {
  margin-bottom: 16px;
  padding: 0px 5px 0px 5px;
  height: 72px;
  flex: 1 1 0%;
  max-width: 256px;
  text-align: center;
  font-size: 36px;
  line-height: 72px;
  background: rgb(42, 128, 225) none repeat scroll 0% 0%;
  color: rgb(255, 255, 255);
  border-radius: 12px;
  box-shadow: inset 0 5px 5px rgba(0,0,0,0.05),0 0 5px rgba(0,0,0,0.8);
 }
 #btn:hover {
  background: rgb(82, 64, 255) none repeat scroll 0% 0%;
 }
 #chk {
  display: flex;
  margin: 0px 0px 16px 5px;
  align-items: center;
  width: 400px;
  padding: 0px 6px 0px 6px;
  background-color: #efefef;
  border-radius: 9px;
 }
 </style>
 </head>
 <body>
 <center>
  <input type="file" name="file" id="file">
  <label id="btn" for="file" tabindex="0">Browse</label>
  &nbsp;
  <div id="chk">
    <input type="checkbox" id="str" checked="checked">
    <label for="str" tabindex="1" style="text-align: left; padding-left: 5px;">
    Read file as string, rather than binary. This may result in incorrect "Occurs at:" numbers, but will increase search speed significantly.
    </label>
  </div>
 </center>
 <div id="main">
 </div>
 <script>
 if(function(){var r=Math.floor,t=function(r){return r};function n(r,n){var e,o,f=r.length,s=[],i=f,u=-1,c=0;for(n=n||t;i--;)u=Math.max(n(r[i]),u);for(o=(u>>24?32:u>>16&&24)||u>>8&&16||8;c<o;c+=4){for(i=16;i--;)s[i]=[];for(i=f;i--;)s[n(r[i])>>c&15].push(r[i]);for(e=0;e<16;e++)for(u=s[e].length;u--;)r[++i]=s[e][u]}return r}function e(r){return"function"==typeof r?r:function(r){return"[object String]"==Object.prototype.toString.call(r)}(r)?function(t){return r.charCodeAt(t)}:function(t){return r[t]}}this.suffixArray=function(t,o){var f;return o="number"==typeof(f=o)||f instanceof Number?o:t.length,function t(e,o,f){var s,i,u,c=[],a=[],l=r(2*o/3),g=o-l,d=l+1>>1,b=l,h=0,p=[],y=[];if(1==o)return[[0],[0]];u=function(r){return r>=o?0:e(r)};for(;b--;)c[b]=1+(3*b>>1);for(b=3;b--;)n(c,function(r){return u(b+r)});h=a[r(c[0]/3)+(c[0]%3==1?0:d)]=1;for(b=1;b<l;b++)u(c[b])==u(c[b-1])&&u(c[b]+1)==u(c[b-1]+1)&&u(c[b]+2)==u(c[b-1]+2)||h++,a[r(c[b]/3)+(c[b]%3==1?0:d)]=h;if(h<l)for(a=t(function(r){return a[r]},l,!0),b=l;b--;)c[b]=a[b]<d?3*a[b]+1:3*(a[b]-d)+2;for(b=l;b--;)p[c[b]]=b;p[o]=-1;p[o+1]=-2;i=function(r,t){return u(r)-u(t)||(r%3==2?u(r+1)-u(t+1)||p[r+2]-p[t+2]:p[r+1]-p[t+1])};a=o%3==1?[o-1]:[];for(b=0;b<l;b++)c[b]%3==1&&a.push(c[b]-1);n(a,function(r){return u(r)});if(f){for(b=0,h=0,s=0;b<l&&h<g;)y[s++]=i(c[b],a[h])<0?c[b++]:a[h++];for(;b<l;)y[s++]=c[b++];for(;h<g;)y[s++]=a[h++];return y}var v,S,x=[],j=[0],m=0;for(b=0,h=0,s=0;b<l&&h<g;)if(v=s,y[s++]=i(c[b],a[h])<0?c[b++]:a[h++],x[y[v]]=v,x[v]>0){for(S=y[x[v]-1];v!=o-m&&S!=o-m&&u(v+m)==u(S+m);)m++;j[x[v]]=m,m>0&&m--}for(;b<l;)if(v=s,y[s++]=c[b++],x[y[v]]=v,x[v]>0){for(S=y[x[v]-1];v!=o-m&&S!=o-m&&u(v+m)==u(S+m);)m++;j[x[v]]=m,m>0&&m--}for(;h<g;)if(v=s,y[s++]=a[h++],x[y[v]]=v,x[v]>0){for(S=y[x[v]-1];v!=o-m&&S!=o-m&&u(v+m)==u(S+m);)m++;j[x[v]]=m,m>0&&m--}return[y,j]}(e(t),o)},this.LRS=function(r,t,n){t=t||3,n=n||10,nots=r.constructor===Uint8Array,decoder=new TextDecoder;for(var e,o,f,[i,u]=suffixArray(r),c={},a=nots?decoder.decode(r):r,l=0;l<i.length;l++)if(!((e=u[l])<2||(substring=nots?r.subarray(i[l],i[l]+e):r.substring(i[l],i[l]+e),substring=nots?decoder.decode(substring):substring,(o=a.split(substring).length-1)<t))){for(s in f=!1,c)if(s.length>=e){if(s.includes(substring)){f=!0;break}}else substring.includes(s)&&delete c[s];f||(c[substring]=[o,i[l]])}return Object.entries(c).sort((r,t)=>t[1][0]*t[0].length**2-r[1][0]*r[0].length**2).slice(0,n).reduce((r,t)=>(r[t[0]]=c[t[0]],r),{})},this.suffixArray.bsort=n}.call(),"object"==typeof process){var fs=require("fs"),file=fs.readFileSync(process.argv.length>2?process.argv[2]:"suffixarray.js","utf8");console.log(LRS(file))}else"function"==typeof importScripts&&addEventListener("message",r=>postMessage(LRS(r.data)));
 </script>
 <script>
 (function() {
  var msgs = ['Browse', 'Browse again'], b;
  function makeWorker(f) {
    //var blob = new Blob(['(', f.toString(), ')();'], { type: 'text/javascript'});
    var blob = new Blob([f.toString()], {type: 'text/javascript'});
    var blobUrl = URL.createObjectURL(blob);
    var worker = new Worker(blobUrl);
    URL.revokeObjectURL(blobUrl);
    return worker;
  }
  //var w = makeWorker(e => postMessage(LRS(e.data)));
  var w = makeWorker(document.getElementsByTagName('script')[0].innerHTML.trim());
  w.addEventListener('message', function(e) {
    //var res = LRS(e.target.result);
    var res = e.data;
    var m = document.getElementById('main');
    m.innerHTML = '  <div class="header"><div>Occurs at:</div><div>Substring:</div><div>Found count:</div></div>\n';
    for (r in res)
      m.innerHTML += '  <div><div>' + res[r][1] + '</div><div><pre>' + r + '</pre></div><div>' + res[r][0] + '</div></div>\n';
    //document.getElementById('btn').textContent = msgs[Math.floor(Math.random() * msgs.length)];
    document.getElementById('btn').textContent = msgs[b=b?0:1];
  });

  function selectHandler(e) {
    var reader = new FileReader();
    var d = new TextDecoder();
    reader.onload = function(e) {
      document.getElementById('btn').textContent = 'Processing...';
      w.postMessage((typeof e.target.result==='string')?e.target.result:(new Uint8Array(e.target.result)));
    };
    if (document.getElementById('str').checked)
      reader.readAsText(e.target.files[0]);
    else
      reader.readAsArrayBuffer(e.target.files[0]);
  }

  document.getElementById('file').addEventListener('change', selectHandler, false);
 })();
 </script>
 </body>
 </html>
diff --git a/maxsubstring.py b/maxsubstring.py
 #!/usr/bin/env python

 # From: https://gist.github.com/hdf/5f0deb03e1b4fc52425ce3d9cd67cf1c
 # Original from: https://gist.github.com/hynekcer/fa340f3b63826168ffc0c4b33310ae9c

 """Find the longest repeated substring.
 "Efficient way to find longest duplicate string for Python (From Programming Pearls)"
 http://stackoverflow.com/questions/13560037/
 The algorithm is based on "Prefix doubling".
 The worst time complexity is O(n (log n)^2). Memory requirements are linear.
 """

 import time

 from random import randint
 import itertools
 import sys
 import unittest
 from itertools import groupby
 from operator import itemgetter
 import logging

 log = logging.getLogger(__name__)
 log.setLevel(logging.INFO)
 try:
    log.addHandler(logging.NullHandler())
 except AttributeError:
    pass

 cache = False


 def run():
    if sys.argv[1:] == ['-']:
        print("(Return/Enter and CTRL+D or CTRL+Z" +
               " and Return/Enter again to finish.)")
        text = sys.stdin.read()
    elif sys.argv[1:]:
        print('Reading data...')
        text = open(sys.argv[1]).read()
    else:
        text = 'banana\nabababab'
    print('Sorting...')
    global cache
    cache = False
    result = longest_common_substring(text)
    print('Longest common substrings in "{0}..." are:\n{1}'.format(
          text[:20], result))
    result = LRS(text)
    print('Top (at most) 10, atleast 3 times repeated longest' +
          ' non-overlapping substrings are:\n{0}'.format(result))


 def LRS(text, m=3, n=10): # Longest repeated non-overlapping substrings
    """
    Returns longest (maximum n number of) non-overlapping (minimum m times)
    repeated strings and how many times they are found in the text in
    descending order of count*len(substring)^2.
    """
    sa, rsa, lcp = suffix_array(text)
    result = {}
    for i in range(0, len(sa)):
        l = lcp[i]
        if l < 2: # Don't bother with individual characters.
            continue
        substring = text[sa[i]:sa[i] + l]
        c = text.count(substring) # This is the bottleneck.
        if c < m: # Not found enough times.
            continue
        halt = False
        for s in list(result.keys()): # Filter out overlaps.
            if len(s) >= l:
                if substring in s:
                    halt = True
                    break
            elif s in substring:
                del result[s]
        if not halt:
            result[substring] = c
    return dict(sorted(result.items(),
                key=lambda x: x[1]*(len(x[0])**2), reverse=True)[:n])


 def longest_common_substring(text):
    """Get the longest common substrings and their positions.
    >>> longest_common_substring('banana')
    {'ana': [1, 3]}
    >>> text = "not so Agamemnon, who spoke fiercely to "
    >>> sorted(longest_common_substring(text).items())
    [(' s', [3, 21]), ('no', [0, 13]), ('o ', [5, 20, 38])]
    This function can be easy modified for any criteria, e.g. for searching ten
    longest non overlapping repeated substrings.
    """
    sa, rsa, lcp = suffix_array(text)
    maxlen = max(lcp)
    result = {}
    for i in range(1, len(text)):
        if lcp[i] == maxlen:
            j1, j2, h = sa[i - 1], sa[i], lcp[i]
            assert text[j1:j1 + h] == text[j2:j2 + h]
            substring = text[j1:j1 + h]
            if substring not in result:
                result[substring] = [j1]
            result[substring].append(j2)
    return dict((k, sorted(v)) for k, v in result.items())


 def suffix_array(text, _step=16):
    global cache
    if cache and "unittest" not in sys.modules:
        return cache
    """Analyze all common strings in the text.
    Short substrings of the length _step a are first pre-sorted. The are the
    results repeatedly merged so that the garanteed number of compared
    characters bytes is doubled in every iteration until all substrings are
    sorted exactly.
    Arguments:
        text:  The text to be analyzed.
        _step: Is only for optimization and testing. It is the optimal length
               of substrings used for initial pre-sorting. The bigger value is
               faster if there is enough memory. Memory requirements are
               approximately (estimate for 32 bit Python 3.3):
                   len(text) * (29 + (_size + 20 if _size > 2 else 0)) + 1MB
    Return value:      (tuple)
      (sa, rsa, lcp)
        sa:  Suffix array                  for i in range(1, size):
               assert text[sa[i-1]:] < text[sa[i]:]
        rsa: Reverse suffix array          for i in range(size):
               assert rsa[sa[i]] == i
        lcp: Longest common prefix         for i in range(1, size):
               assert text[sa[i-1]:sa[i-1]+lcp[i]] == text[sa[i]:sa[i]+lcp[i]]
               if sa[i-1] + lcp[i] < len(text):
                   assert text[sa[i-1] + lcp[i]] < text[sa[i] + lcp[i]]
    >>> suffix_array(text='banana')
    ([5, 3, 1, 0, 4, 2], [3, 2, 5, 1, 4, 0], [0, 1, 3, 0, 0, 2])
    Explanation: 'a' < 'ana' < 'anana' < 'banana' < 'na' < 'nana'
    The Longest Common String is 'ana': lcp[2] == 3 == len('ana')
    It is between  tx[sa[1]:] == 'ana' < 'anana' == tx[sa[2]:]
    """
    tx = text
    t0 = time.time()
    size = len(tx)
    step = min(max(_step, 1), len(tx))
    sa = list(range(len(tx)))
    log.debug('%6.3f pre sort', time.time() - t0)
    sa.sort(key=lambda i: tx[i:i + step])
    log.debug('%6.3f after sort', time.time() - t0)
    grpstart = size * [False] + [True]  # a boolean map for iteration speedup.
    # It helps to skip yet resolved values. The last value True is a sentinel.
    rsa = size * [None]
    stgrp, igrp = '', 0
    for i, pos in enumerate(sa):
        st = tx[pos:pos + step]
        if st != stgrp:
            grpstart[igrp] = (igrp < i - 1)
            stgrp = st
            igrp = i
        rsa[pos] = igrp
        sa[i] = pos
    grpstart[igrp] = (igrp < size - 1 or size == 0)
    log.debug('%6.3f after group', time.time() - t0)
    while grpstart.index(True) < size:
        # assert step <= size
        nmerge = 0
        nextgr = grpstart.index(True)
        while nextgr < size:
            igrp = nextgr
            nextgr = grpstart.index(True, igrp + 1)
            glist = []
            for ig in range(igrp, nextgr):
                pos = sa[ig]
                if rsa[pos] != igrp:
                    break
                newgr = rsa[pos + step] if pos + step < size else -1
                glist.append((newgr, pos))
            glist.sort()
            for ig, g in groupby(glist, key=itemgetter(0)):
                g = [x[1] for x in g]
                sa[igrp:igrp + len(g)] = g
                grpstart[igrp] = (len(g) > 1)
                for pos in g:
                    rsa[pos] = igrp
                igrp += len(g)
            nmerge += len(glist)
        log.debug('%6.3f for step=%d nmerge=%d', time.time() - t0, step, nmerge)
        step *= 2
    del grpstart
    # create LCP array
    lcp = size * [0]
    h = 0
    for i in range(size):
        if rsa[i] > 0:
            j = sa[rsa[i] - 1]
            while i != size - h and j != size - h and tx[i + h] == tx[j + h]:
                h += 1
            lcp[rsa[i]] = h
            if h > 0:
                h -= 1
    log.debug('%6.3f end', time.time() - t0)
    cache = (sa, rsa, lcp)
    return cache

 # ---


 class TestMixin(object):
    def suffix_verify(self, text, step=16):
        tx = text
        sa, rsa, lcp = suffix_array(text=tx, _step=step)
        self.assertEqual(set(sa), set(range(len(tx))))
        ok = True
        for i0, i1, h in zip(sa[:-1], sa[1:], lcp[1:]):
            self.assertEqual(tx[i1:i1 + h], tx[i0:i0 + h], "Verify LCP characters equal on text '%s...'" % text[:20])
            self.assertGreater(tx[i1 + h:i1 + h + 1], tx[i0 + h:i0 + h + 1],
                               "Verify LCP+1 char is different '%s...'" % text[:20])
            self.assertLessEqual(max(i0, i1), len(tx) - h,
                                 "Verify LCP is not more than length of string '%s...'" % text[:20])
        self.assertTrue(ok)


 class SuffixArrayTest(unittest.TestCase, TestMixin):
    def test_16(self):
        # 'a' < 'ana' < 'anana' < 'banana' < 'na' < 'nana'
        expect = ([5, 3, 1, 0, 4, 2], [3, 2, 5, 1, 4, 0], [0, 1, 3, 0, 0, 2])
        self.assertEqual(suffix_array(text='banana', _step=16), expect)

    def test_1(self):
        expect = ([5, 3, 1, 0, 4, 2], [3, 2, 5, 1, 4, 0], [0, 1, 3, 0, 0, 2])
        self.assertEqual(suffix_array(text='banana', _step=1), expect)

    def test_mini(self):
        self.assertEqual(suffix_array(text='', _step=1), ([], [], []))
        self.assertEqual(suffix_array(text='a', _step=1), ([0], [0], [0]))
        self.assertEqual(suffix_array(text='aa', _step=1), ([1, 0], [1, 0], [0, 1]))
        self.assertEqual(suffix_array(text='aaa', _step=1), ([2, 1, 0], [2, 1, 0], [0, 1, 2]))

    def test_example(self):
        self.suffix_verify('abracadabra')

    def test_cartesian(self):
        """Test all combinations of alphabet "ABC" up to length 4 characters"""
        for size in range(7):
            for cartesian in itertools.product(*(size * ['ABC'])):
                text = ''.join(cartesian)
                log.debug('Testing "%s"', text)
                self.suffix_verify(text, 1)

    def test_lcp(self):
        expect = {'ana': [1, 3]}
        self.assertDictEqual(longest_common_substring('banana'), expect)
        expect = {' s': [3, 21], 'no': [0, 13], 'o ': [5, 20, 38]}
        self.assertDictEqual(longest_common_substring(
             "not so Agamemnon, who spoke fiercely to "), expect)


 class SlowTests(unittest.TestCase, TestMixin):
    """Slow development tests running many minutes.
    It can be run only by an EXPLICIT command!
        e.g.: python -m unittest maxsubstring.SlowTests._test_random
    """
    def _test_random(self):
        for power in range(2, 21, 2):
            size = randint(2 ** (power - 1), 2 ** power)
            for alphabet in (2, 4, 16, 256):
                text = ''.join(chr(65 + randint(0, alphabet - 1)) for _ in range(size))
                log.debug('%s %s %s', size, alphabet, 1)
                self.suffix_verify(text, 1)
                log.debug('%s %s %s', size, alphabet, 16)
                self.suffix_verify(text, 16)


 if __name__ == '__main__':
    run()
diff --git a/suffixarray.js b/suffixarray.js
 // From: https://gist.github.com/hdf/5f0deb03e1b4fc52425ce3d9cd67cf1c
 // (Minify with: https://javascript-minifier.com/)
 // Original from: https://github.com/tixxit/suffixarray/blob/master/suffixarray.js

 /**
 * An implementation of the linear time suffix array construction of 
 * Karkkainen & Sanders:
 *
 * "Simple Linear Work Suffix Array Construction", Karkainen and Sanders.
 *
 * Creating a suffix array is very simple; just call suffixArray(...) with
 * either a string or a function that returns integers and its length. For
 * example,
 *
 * var s = "Sort this!";
 * suffixArray(s);                  // Returns [4, 9, 0, 6, 7, 1, 2, 8, 3, 5].
 *
 * function reverse(i) { return s.charCodeAt(s.length - 1 - i) }
 * suffixArray(reverse, s.length);  // Returns [5, 0, 9, 3, 2, 8, 7, 1, 4, 6].
 *
 * @author Thomas Switzer
 */
 (function() {

 var global = this,
    floor = Math.floor,
    identity = function(x) { return x },
    undefined;

 /**
 * Sorts an array of (unsigned) integers in linear time. The values of the
 * array (a) act as keys, which are passed to the key function which returns an
 * integer value.
 *
 * @param a An array of keys to sort.
 * @param key A function that maps keys to integer values.
 * @return The array a.
 */
 function bsort(a, key) {
    var len = a.length,
        buckets = [],
        i = len, j = -1, b, d = 0,
        keys = 0,
        bits;
    key = key || identity;
    while (i--)
        j = Math.max(key(a[i]), j);
    bits = j >> 24 && 32 || j >> 16 && 24 || j >> 8 && 16 || 8;
    for (; d < bits; d += 4) {
        for (i = 16; i--;)
            buckets[i] = [];
        for (i = len; i--;)
            buckets[(key(a[i]) >> d) & 15].push(a[i]);
        for (b = 0; b < 16; b++)
            for (j = buckets[b].length; j--;)
                a[++i] = buckets[b][j];
    }
    return a;
 }


 function isInt(n) {
    return typeof n == "number" || n instanceof Number;
 }

 function isStr(s) {
    return Object.prototype.toString.call(s) == "[object String]";
 }


 function wrap(s) {
    return typeof s == "function" ? s : (isStr(s)
            ? function(i) { return s.charCodeAt(i) }
            : function(i) { return s[i] });
 }


 /**
 * Returns the suffix array of the string s. The suffix array is constructed
 * in linear time. Also returns Longest common prefix array.
 *
 * The string s can either be an Unicode string (ie. JavaScript String object)
 * or a function that takes an index (integer >= 0) and returns another
 * integer (a "symbol"). If a function is provided, then another argument
 * specifying its length (integer >= 0) must be provided.
 *
 * This also takes a 3rd optional parameter that dictates how to treat the end
 * of the string. This can be either "min" or "wrap". If it is "min", then 
 * characters after the end of the string are treated as 0's (the minimum).
 * If "wrap" is given, then the end of the string wraps back around to the
 * beginning. If this parameter is omitted, then "wrap" is assumed.
 *
 * In the case of strings, you can omit the 2nd paramter (length) and still
 * provide the 3rd paramter. For instance, suffixArray(str, "min").
 *
 * The returned array contains the indexes of the string in the lexicographical
 * order of the suffixes that start at those indexes.
 *
 * @param s A string or function that maps ints between [0, len) to integers.
 * @param len The length of s (optional if s is a string, required otherwise).
 * @param end Either "min", "wrap" or leave out (defaults to "wrap").
 * @return An array of indexes into s.
 */
 global.suffixArray = function(s, len, end) {
    end = end || len;
    len = isInt(len) ? len : s.length;

    if (end == "wrap")
        return wrappedSuffixArray(s, len);
    else
        return _suffixArray(wrap(s), len);
 }


 /**
  * Longest repeated non-overlapping substrings:
  * Returns longest (maximum n number of) non-overlapping (minimum m times)
  * repeated strings and how many times they are found in the text in
  * descending order of count*len(substring)^2.
 */
 global.LRS = function(text, m, n) {
    m = m || 3;
    n = n || 10;
    nots = (text.constructor===Uint8Array);
    decoder = new TextDecoder();

    var [sa, lcp] = suffixArray(text);
    var result = {}, l, c, halt,
        t = nots?decoder.decode(text):text;
    for (var i = 0; i < sa.length; i++) {
        l = lcp[i];
        if (l < 2) continue; // Don't bother with individual characters.
        substring = nots?text.subarray(sa[i], sa[i] + l):text.substring(sa[i], sa[i] + l); // Subarray is very slow. :(
        substring = nots?decoder.decode(substring):substring;
        c = t.split(substring).length - 1; // This is slow too.
        if (c < m) continue; // Not found enough times.
        halt = false;
        for (s in result) // Filter out overlaps.
            if (s.length >= l) {
                if (s.includes(substring)) {
                    halt = true;
                    break;
                }
            } else if (substring.includes(s))
                delete result[s];
        if (!halt) result[substring] = [c, sa[i]];
    }

    return Object.entries(result).sort((a, b) =>
               (b[1][0]*(b[0].length**2))-(a[1][0]*(a[0].length**2)))
               .slice(0, n).reduce((o, k) => {
                   o[k[0]] = result[k[0]];
                   return o;
               }, {});
 }


 // Export the Bucket Sort.
 global.suffixArray.bsort = bsort;


 /**
 * Constructs the suffix array of s. It takes either a string, an array, or a
 * function that takes an integer and returns a unsigned integer. It also takes
 * an optional 2nd paramter, the length. This is required if the first
 * parameter is a function.
 *
 * This uses the nice idea from Karkkainen & Sander's paper of replacing each
 * letter with the equivalent k-letter version (3 in their paper, 2 in this
 * algorithm). This is repeated recursively until all the letters are
 * different. This doesn't have the nice 1/3 pruning / merge step of their
 * algorithm, but still performs relatively fast, running in O(n log n).
 *
 * @param s A string, array, or function.
 * @param len The length of s.
 * @return The order of the suffixes.
 */
 function wrappedSuffixArray(s, len) {
    len = isInt(len) ? len : s.length;
    s = wrap(s);

    var array = [],
        swap = [],
        order = [],
        span,
        sym,
        i = len;

    while (i--)
        array[i] = s(order[i] = i);

    for (span = 1; sym != len && span < len; span *= 2) {
        bsort(order, function(i) { return array[(i + span) % len] });
        bsort(order, function(i) { return array[i] });

        sym = swap[order[0]] = 1;
        for (i = 1; i < len; i++) {
            if (array[order[i]] != array[order[i - 1]] || array[(order[i] + span) % len] != array[(order[i - 1] + span) % len])
                sym++;
            swap[order[i]] = sym;
        }

        tmp = array;
        array = swap;
        swap = tmp;
    }

    return order;
 }


 /* Constructs the suffix array of s. In this case, s must be a function that
 * maps integers between 0 and len - 1 to "symbols" (unsigned integers). It
 * returns the suffixes in lexicographical order as an array of indexes where
 * those suffixes start. Also returns Longest common prefix array.
 *
 * I have tried to keep the code reasonably well commented. Both for my sake,
 * and yours. That said, my code was not written with pedagogy in mind, but
 * to be relatively fast and have a small minified size.
 *
 * The description of the algorithm in the paper is very concise and is well
 * worth a read.
 *
 * The C code accompanying the paper is very terse and, IMHO, creates more
 * confusion than clarity. While the algorithm itself is fairly simple (simple
 * and fast, who wants more?), it does deal with quite a bit of abstraction.
 * That is, you are dealing with a lot of placeholders, rather than concrete
 * objects; indexes into the string to represent suffixes, lexical names
 * representing triplets of symbols, indexes of these lexical names, etc.
 */
 function _suffixArray(_s, len, sub) {
    var a = [],
        b = [],
        alen = floor(2 * len / 3),  // Number of indexes s.t. i % 3 != 0.
        blen = len - alen,          // Number of indexes s.t. i % 3 = 0.
        r = (alen + 1) >> 1,        // Number of indexes s.t. i % 3 = 1.
        i = alen,
        j = 0,
        k,
        lookup = [],
        result = [],
        tmp, cmp,
        s;

    if (len == 1)
        return [[0], [0]];

    s = function(i) { return i >= len ? 0 : _s(i) };

    // Sort suffixes w/ indices % 3 != 0 by their first 3 symbols (triplets).

    while (i--)
        a[i] = ((i * 3) >> 1) + 1;  // a = [1, 2, 4, 5, 7, 8, 10, 11, 13, ...]

    for (i = 3; i--;)
        bsort(a, function(j) { return s(i + j) });

    // Assign lexicographical names (j) to the triplets of consecutive symbols,
    // s.t. the order of the lex. names match the lex. order of the triplets.

    // Array b contains lex. names in the order they appear in s for i % 3 != 0

    j = b[floor(a[0] / 3) + (a[0] % 3 == 1 ? 0 : r)] = 1;
    for (i = 1; i < alen; i++) {
        if (s(a[i]) != s(a[i-1]) || s(a[i] + 1) != s(a[i-1] + 1) || s(a[i] + 2) != s(a[i-1] + 2))
            j++;
        b[floor(a[i] / 3) + (a[i] % 3 == 1 ? 0 : r)] = j;
    }

    // If all lex. names are unique, then a is already completely sorted.

    if (j < alen) {

        // Otherwise, recursively sort lex. names in b, then reconstruct the
        // indexes of the sorted array b so they are relative to a.
        
        b = _suffixArray(function(i) { return b[i] }, alen, true);

        for (i = alen; i--;)
            a[i] = b[i] < r ? b[i] * 3 + 1 : ((b[i] - r) * 3 + 2);

    }

    // Create a reverse lookup table for the indexes i, s.t. i % 3 != 0.
    // This table can be used to simply determine the sorted order of 2
    // suffixes whose indexes are both not divisible by 3.

    for (i = alen; i--;)
        lookup[a[i]] = i;
    lookup[len] = -1;
    lookup[len + 1] = -2;

    /**
     * This is a comparison function for the suffixes at indices m & n that
     * uses the lookup table to shorten the searches. It assumes that
     * n % 3 == 0 and m % 3 != 0.
     */
    cmp = function(m, n) {
        return (s(m) - s(n)) || (m % 3 == 2
            ? (s(m + 1) - s(n + 1)) || (lookup[m + 2] - lookup[n + 2])
            : (lookup[m + 1] - lookup[n + 1]))
    };
    
    // Sort remaining suffixes (i % 3 == 0) using prev result (i % 3 != 0).

    b = len % 3 == 1 ? [ len - 1 ] : [];
    for (i = 0; i < alen; i++)
        if (a[i] % 3 == 1)
            b.push(a[i] - 1);
    bsort(b, function(j) { return s(j) });

    // Merge a (i % 3 != 0) and b (i % 3 == 0) together. We only need to
    // compare, at most, 2 symbols before we end up comparing 2 suffixes whose
    // indices are both not divisible by 3. At this point, we can use the
    // reverse lookup array to order them.
    
    
    if (sub) { // No need for extra rsa and lcp processing in sub calls, only in the top level one.
        for (i = 0, j = 0, k = 0; i < alen && j < blen;)
            result[k++] = cmp(a[i], b[j]) < 0 ? a[i++] : b[j++];
        while (i < alen)
            result[k++] = a[i++];
        while (j < blen)
            result[k++] = b[j++];

        return result;
    }

    var rsa = [], lcp = [0], h = 0, i2, j2;
    for (i = 0, j = 0, k = 0; i < alen && j < blen;) {
        i2 = k;
        result[k++] = cmp(a[i], b[j]) < 0 ? a[i++] : b[j++];
        rsa[result[i2]] = i2;
        if (rsa[i2] > 0) {
            j2 = result[rsa[i2] - 1];
            while (i2 != len - h && j2 != len - h && s(i2 + h) == s(j2 + h)) h++;
            lcp[rsa[i2]] = h;
            if (h > 0) h--;
        }
    }
    while (i < alen) {
        i2 = k;
        result[k++] = a[i++];
        rsa[result[i2]] = i2;
        if (rsa[i2] > 0) {
            j2 = result[rsa[i2] - 1];
            while (i2 != len - h && j2 != len - h && s(i2 + h) == s(j2 + h)) h++;
            lcp[rsa[i2]] = h;
            if (h > 0) h--;
        }
    }
    while (j < blen) {
        i2 = k;
        result[k++] = b[j++];
        rsa[result[i2]] = i2;
        if (rsa[i2] > 0) {
            j2 = result[rsa[i2] - 1];
            while (i2 != len - h && j2 != len - h && s(i2 + h) == s(j2 + h)) h++;
            lcp[rsa[i2]] = h;
            if (h > 0) h--;
        }
    }

    return [result, lcp];
 }

 }).call();

 if (typeof process === 'object') {
    var fs = require('fs');
    var file = fs.readFileSync((process.argv.length > 2) ? process.argv[2] : 'suffixarray.js', 'utf8');
    console.log(LRS(file));
 } else {
    if (typeof importScripts === 'function') addEventListener('message', e => postMessage(LRS(e.data)));
 }
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="utf-8">
	<title>Redundancy checker</title>
	<meta name="viewport" content="width=device-width, initial-scale=1">
	<!--<script src="lrs.min.js"></script>-->
	<style>
	html {
	font-family: 'Verdana', sans-serif;
	margin: 0;
	padding: 0;
	}
	pre {
	margin: 0px;
	}
	#main {
	background-color: #fff;
	border: 1px solid #999;
	border-spacing: 5px;
	width: 100%;
	}
	#main>div {
	display: flex;
	}
	#main>div>div {
	background-color: #eee;
	padding: 3px 6px 3px 6px;
	margin: 1px;
	flex: 0 0 110px;
	}
	#main>div>div:nth-child(2n) {
	flex: 1 0 auto;
	}
	#main>div>div:nth-child(3n) {
	flex: 0 1 145px;
	}
	.header {
	font-size: 1.2em;
	font-weight: bold;
	}
	center {
	display: flex;
	align-items: top;
	justify-content: center;
	}
	#file {
	display: none;
	}
	#btn {
	margin-bottom: 16px;
	padding: 0px 5px 0px 5px;
	height: 72px;
	flex: 1 1 0%;
	max-width: 256px;
	text-align: center;
	font-size: 36px;
	line-height: 72px;
	background: rgb(42, 128, 225) none repeat scroll 0% 0%;
	color: rgb(255, 255, 255);
	border-radius: 12px;
	box-shadow: inset 0 5px 5px rgba(0,0,0,0.05),0 0 5px rgba(0,0,0,0.8);
	}
	#btn:hover {
	background: rgb(82, 64, 255) none repeat scroll 0% 0%;
	}
	#chk {
	display: flex;
	margin: 0px 0px 16px 5px;
	align-items: center;
	width: 400px;
	padding: 0px 6px 0px 6px;
	background-color: #efefef;
	border-radius: 9px;
	}
	</style>
	</head>
	<body>
	<center>
	<input type="file" name="file" id="file">
	<label id="btn" for="file" tabindex="0">Browse</label>

	<div id="chk">
	<input type="checkbox" id="str" checked="checked">
	<label for="str" tabindex="1" style="text-align: left; padding-left: 5px;">
	Read file as string, rather than binary. This may result in incorrect "Occurs at:" numbers, but will increase search speed significantly.
	</label>
	</div>
	</center>
	<div id="main">
	</div>
	<script>
	if(function(){var r=Math.floor,t=function(r){return r};function n(r,n){var e,o,f=r.length,s=[],i=f,u=-1,c=0;for(n=n\|\|t;i--;)u=Math.max(n(r[i]),u);for(o=(u>>24?32:u>>16&&24)\|\|u>>8&&16\|\|8;c<o;c+=4){for(i=16;i--;)s[i]=[];for(i=f;i--;)s[n(r[i])>>c&15].push(r[i]);for(e=0;e<16;e++)for(u=s[e].length;u--;)r[++i]=s[e][u]}return r}function e(r){return"function"==typeof r?r:function(r){return"[object String]"==Object.prototype.toString.call(r)}(r)?function(t){return r.charCodeAt(t)}:function(t){return r[t]}}this.suffixArray=function(t,o){var f;return o="number"==typeof(f=o)\|\|f instanceof Number?o:t.length,function t(e,o,f){var s,i,u,c=[],a=[],l=r(2o/3),g=o-l,d=l+1>>1,b=l,h=0,p=[],y=[];if(1==o)return[[0],[0]];u=function(r){return r>=o?0:e(r)};for(;b--;)c[b]=1+(3b>>1);for(b=3;b--;)n(c,function(r){return u(b+r)});h=a[r(c[0]/3)+(c[0]%3==1?0:d)]=1;for(b=1;b<l;b++)u(c[b])==u(c[b-1])&&u(c[b]+1)==u(c[b-1]+1)&&u(c[b]+2)==u(c[b-1]+2)\|\|h++,a[r(c[b]/3)+(c[b]%3==1?0:d)]=h;if(h<l)for(a=t(function(r){return a[r]},l,!0),b=l;b--;)c[b]=a[b]<d?3a[b]+1:3(a[b]-d)+2;for(b=l;b--;)p[c[b]]=b;p[o]=-1;p[o+1]=-2;i=function(r,t){return u(r)-u(t)\|\|(r%3==2?u(r+1)-u(t+1)\|\|p[r+2]-p[t+2]:p[r+1]-p[t+1])};a=o%3==1?[o-1]:[];for(b=0;b<l;b++)c[b]%3==1&&a.push(c[b]-1);n(a,function(r){return u(r)});if(f){for(b=0,h=0,s=0;b<l&&h<g;)y[s++]=i(c[b],a[h])<0?c[b++]:a[h++];for(;b<l;)y[s++]=c[b++];for(;h<g;)y[s++]=a[h++];return y}var v,S,x=[],j=[0],m=0;for(b=0,h=0,s=0;b<l&&h<g;)if(v=s,y[s++]=i(c[b],a[h])<0?c[b++]:a[h++],x[y[v]]=v,x[v]>0){for(S=y[x[v]-1];v!=o-m&&S!=o-m&&u(v+m)==u(S+m);)m++;j[x[v]]=m,m>0&&m--}for(;b<l;)if(v=s,y[s++]=c[b++],x[y[v]]=v,x[v]>0){for(S=y[x[v]-1];v!=o-m&&S!=o-m&&u(v+m)==u(S+m);)m++;j[x[v]]=m,m>0&&m--}for(;h<g;)if(v=s,y[s++]=a[h++],x[y[v]]=v,x[v]>0){for(S=y[x[v]-1];v!=o-m&&S!=o-m&&u(v+m)==u(S+m);)m++;j[x[v]]=m,m>0&&m--}return[y,j]}(e(t),o)},this.LRS=function(r,t,n){t=t\|\|3,n=n\|\|10,nots=r.constructor===Uint8Array,decoder=new TextDecoder;for(var e,o,f,[i,u]=suffixArray(r),c={},a=nots?decoder.decode(r):r,l=0;l<i.length;l++)if(!((e=u[l])<2\|\|(substring=nots?r.subarray(i[l],i[l]+e):r.substring(i[l],i[l]+e),substring=nots?decoder.decode(substring):substring,(o=a.split(substring).length-1)<t))){for(s in f=!1,c)if(s.length>=e){if(s.includes(substring)){f=!0;break}}else substring.includes(s)&&delete c[s];f\|\|(c[substring]=[o,i[l]])}return Object.entries(c).sort((r,t)=>t[1][0]t[0].length2-r[1][0]r[0].length**2).slice(0,n).reduce((r,t)=>(r[t[0]]=c[t[0]],r),{})},this.suffixArray.bsort=n}.call(),"object"==typeof process){var fs=require("fs"),file=fs.readFileSync(process.argv.length>2?process.argv[2]:"suffixarray.js","utf8");console.log(LRS(file))}else"function"==typeof importScripts&&addEventListener("message",r=>postMessage(LRS(r.data)));
	</script>
	<script>
	(function() {
	var msgs = ['Browse', 'Browse again'], b;
	function makeWorker(f) {
	//var blob = new Blob(['(', f.toString(), ')();'], { type: 'text/javascript'});
	var blob = new Blob([f.toString()], {type: 'text/javascript'});
	var blobUrl = URL.createObjectURL(blob);
	var worker = new Worker(blobUrl);
	URL.revokeObjectURL(blobUrl);
	return worker;
	}
	//var w = makeWorker(e => postMessage(LRS(e.data)));
	var w = makeWorker(document.getElementsByTagName('script')[0].innerHTML.trim());
	w.addEventListener('message', function(e) {
	//var res = LRS(e.target.result);
	var res = e.data;
	var m = document.getElementById('main');
	m.innerHTML = ' <div class="header"><div>Occurs at:</div><div>Substring:</div><div>Found count:</div></div>\n';
	for (r in res)
	m.innerHTML += ' <div><div>' + res[r][1] + '</div><div><pre>' + r + '</pre></div><div>' + res[r][0] + '</div></div>\n';
	//document.getElementById('btn').textContent = msgs[Math.floor(Math.random() * msgs.length)];
	document.getElementById('btn').textContent = msgs[b=b?0:1];
	});

	function selectHandler(e) {
	var reader = new FileReader();
	var d = new TextDecoder();
	reader.onload = function(e) {
	document.getElementById('btn').textContent = 'Processing...';
	w.postMessage((typeof e.target.result==='string')?e.target.result:(new Uint8Array(e.target.result)));
	};
	if (document.getElementById('str').checked)
	reader.readAsText(e.target.files[0]);
	else
	reader.readAsArrayBuffer(e.target.files[0]);
	}

	document.getElementById('file').addEventListener('change', selectHandler, false);
	})();
	</script>
	</body>
	</html>
	#!/usr/bin/env python

	# From: https://gist.github.com/hdf/5f0deb03e1b4fc52425ce3d9cd67cf1c
	# Original from: https://gist.github.com/hynekcer/fa340f3b63826168ffc0c4b33310ae9c

	"""Find the longest repeated substring.
	"Efficient way to find longest duplicate string for Python (From Programming Pearls)"
	http://stackoverflow.com/questions/13560037/
	The algorithm is based on "Prefix doubling".
	The worst time complexity is O(n (log n)^2). Memory requirements are linear.
	"""

	import time

	from random import randint
	import itertools
	import sys
	import unittest
	from itertools import groupby
	from operator import itemgetter
	import logging

	log = logging.getLogger(__name__)
	log.setLevel(logging.INFO)
	try:
	log.addHandler(logging.NullHandler())
	except AttributeError:
	pass

	cache = False


	def run():
	if sys.argv[1:] == ['-']:
	print("(Return/Enter and CTRL+D or CTRL+Z" +
	" and Return/Enter again to finish.)")
	text = sys.stdin.read()
	elif sys.argv[1:]:
	print('Reading data...')
	text = open(sys.argv[1]).read()
	else:
	text = 'banana\nabababab'
	print('Sorting...')
	global cache
	cache = False
	result = longest_common_substring(text)
	print('Longest common substrings in "{0}..." are:\n{1}'.format(
	text[:20], result))
	result = LRS(text)
	print('Top (at most) 10, atleast 3 times repeated longest' +
	' non-overlapping substrings are:\n{0}'.format(result))


	def LRS(text, m=3, n=10): # Longest repeated non-overlapping substrings
	"""
	Returns longest (maximum n number of) non-overlapping (minimum m times)
	repeated strings and how many times they are found in the text in
	descending order of count*len(substring)^2.
	"""
	sa, rsa, lcp = suffix_array(text)
	result = {}
	for i in range(0, len(sa)):
	l = lcp[i]
	if l < 2: # Don't bother with individual characters.
	continue
	substring = text[sa[i]:sa[i] + l]
	c = text.count(substring) # This is the bottleneck.
	if c < m: # Not found enough times.
	continue
	halt = False
	for s in list(result.keys()): # Filter out overlaps.
	if len(s) >= l:
	if substring in s:
	halt = True
	break
	elif s in substring:
	del result[s]
	if not halt:
	result[substring] = c
	return dict(sorted(result.items(),
	key=lambda x: x[1](len(x[0])*2), reverse=True)[:n])


	def longest_common_substring(text):
	"""Get the longest common substrings and their positions.
	>>> longest_common_substring('banana')
	{'ana': [1, 3]}
	>>> text = "not so Agamemnon, who spoke fiercely to "
	>>> sorted(longest_common_substring(text).items())
	[(' s', [3, 21]), ('no', [0, 13]), ('o ', [5, 20, 38])]
	This function can be easy modified for any criteria, e.g. for searching ten
	longest non overlapping repeated substrings.
	"""
	sa, rsa, lcp = suffix_array(text)
	maxlen = max(lcp)
	result = {}
	for i in range(1, len(text)):
	if lcp[i] == maxlen:
	j1, j2, h = sa[i - 1], sa[i], lcp[i]
	assert text[j1:j1 + h] == text[j2:j2 + h]
	substring = text[j1:j1 + h]
	if substring not in result:
	result[substring] = [j1]
	result[substring].append(j2)
	return dict((k, sorted(v)) for k, v in result.items())


	def suffix_array(text, _step=16):
	global cache
	if cache and "unittest" not in sys.modules:
	return cache
	"""Analyze all common strings in the text.
	Short substrings of the length _step a are first pre-sorted. The are the
	results repeatedly merged so that the garanteed number of compared
	characters bytes is doubled in every iteration until all substrings are
	sorted exactly.
	Arguments:
	text: The text to be analyzed.
	_step: Is only for optimization and testing. It is the optimal length
	of substrings used for initial pre-sorting. The bigger value is
	faster if there is enough memory. Memory requirements are
	approximately (estimate for 32 bit Python 3.3):
	len(text) * (29 + (_size + 20 if _size > 2 else 0)) + 1MB
	Return value: (tuple)
	(sa, rsa, lcp)
	sa: Suffix array for i in range(1, size):
	assert text[sa[i-1]:] < text[sa[i]:]
	rsa: Reverse suffix array for i in range(size):
	assert rsa[sa[i]] == i
	lcp: Longest common prefix for i in range(1, size):
	assert text[sa[i-1]:sa[i-1]+lcp[i]] == text[sa[i]:sa[i]+lcp[i]]
	if sa[i-1] + lcp[i] < len(text):
	assert text[sa[i-1] + lcp[i]] < text[sa[i] + lcp[i]]
	>>> suffix_array(text='banana')
	([5, 3, 1, 0, 4, 2], [3, 2, 5, 1, 4, 0], [0, 1, 3, 0, 0, 2])
	Explanation: 'a' < 'ana' < 'anana' < 'banana' < 'na' < 'nana'
	The Longest Common String is 'ana': lcp[2] == 3 == len('ana')
	It is between tx[sa[1]:] == 'ana' < 'anana' == tx[sa[2]:]
	"""
	tx = text
	t0 = time.time()
	size = len(tx)
	step = min(max(_step, 1), len(tx))
	sa = list(range(len(tx)))
	log.debug('%6.3f pre sort', time.time() - t0)
	sa.sort(key=lambda i: tx[i:i + step])
	log.debug('%6.3f after sort', time.time() - t0)
	grpstart = size * [False] + [True] # a boolean map for iteration speedup.
	# It helps to skip yet resolved values. The last value True is a sentinel.
	rsa = size * [None]
	stgrp, igrp = '', 0
	for i, pos in enumerate(sa):
	st = tx[pos:pos + step]
	if st != stgrp:
	grpstart[igrp] = (igrp < i - 1)
	stgrp = st
	igrp = i
	rsa[pos] = igrp
	sa[i] = pos
	grpstart[igrp] = (igrp < size - 1 or size == 0)
	log.debug('%6.3f after group', time.time() - t0)
	while grpstart.index(True) < size:
	# assert step <= size
	nmerge = 0
	nextgr = grpstart.index(True)
	while nextgr < size:
	igrp = nextgr
	nextgr = grpstart.index(True, igrp + 1)
	glist = []
	for ig in range(igrp, nextgr):
	pos = sa[ig]
	if rsa[pos] != igrp:
	break
	newgr = rsa[pos + step] if pos + step < size else -1
	glist.append((newgr, pos))
	glist.sort()
	for ig, g in groupby(glist, key=itemgetter(0)):
	g = [x[1] for x in g]
	sa[igrp:igrp + len(g)] = g
	grpstart[igrp] = (len(g) > 1)
	for pos in g:
	rsa[pos] = igrp
	igrp += len(g)
	nmerge += len(glist)
	log.debug('%6.3f for step=%d nmerge=%d', time.time() - t0, step, nmerge)
	step *= 2
	del grpstart
	# create LCP array
	lcp = size * [0]
	h = 0
	for i in range(size):
	if rsa[i] > 0:
	j = sa[rsa[i] - 1]
	while i != size - h and j != size - h and tx[i + h] == tx[j + h]:
	h += 1
	lcp[rsa[i]] = h
	if h > 0:
	h -= 1
	log.debug('%6.3f end', time.time() - t0)
	cache = (sa, rsa, lcp)
	return cache

	# ---


	class TestMixin(object):
	def suffix_verify(self, text, step=16):
	tx = text
	sa, rsa, lcp = suffix_array(text=tx, _step=step)
	self.assertEqual(set(sa), set(range(len(tx))))
	ok = True
	for i0, i1, h in zip(sa[:-1], sa[1:], lcp[1:]):
	self.assertEqual(tx[i1:i1 + h], tx[i0:i0 + h], "Verify LCP characters equal on text '%s...'" % text[:20])
	self.assertGreater(tx[i1 + h:i1 + h + 1], tx[i0 + h:i0 + h + 1],
	"Verify LCP+1 char is different '%s...'" % text[:20])
	self.assertLessEqual(max(i0, i1), len(tx) - h,
	"Verify LCP is not more than length of string '%s...'" % text[:20])
	self.assertTrue(ok)


	class SuffixArrayTest(unittest.TestCase, TestMixin):
	def test_16(self):
	# 'a' < 'ana' < 'anana' < 'banana' < 'na' < 'nana'
	expect = ([5, 3, 1, 0, 4, 2], [3, 2, 5, 1, 4, 0], [0, 1, 3, 0, 0, 2])
	self.assertEqual(suffix_array(text='banana', _step=16), expect)

	def test_1(self):
	expect = ([5, 3, 1, 0, 4, 2], [3, 2, 5, 1, 4, 0], [0, 1, 3, 0, 0, 2])
	self.assertEqual(suffix_array(text='banana', _step=1), expect)

	def test_mini(self):
	self.assertEqual(suffix_array(text='', _step=1), ([], [], []))
	self.assertEqual(suffix_array(text='a', _step=1), ([0], [0], [0]))
	self.assertEqual(suffix_array(text='aa', _step=1), ([1, 0], [1, 0], [0, 1]))
	self.assertEqual(suffix_array(text='aaa', _step=1), ([2, 1, 0], [2, 1, 0], [0, 1, 2]))

	def test_example(self):
	self.suffix_verify('abracadabra')

	def test_cartesian(self):
	"""Test all combinations of alphabet "ABC" up to length 4 characters"""
	for size in range(7):
	for cartesian in itertools.product((size ['ABC'])):
	text = ''.join(cartesian)
	log.debug('Testing "%s"', text)
	self.suffix_verify(text, 1)

	def test_lcp(self):
	expect = {'ana': [1, 3]}
	self.assertDictEqual(longest_common_substring('banana'), expect)
	expect = {' s': [3, 21], 'no': [0, 13], 'o ': [5, 20, 38]}
	self.assertDictEqual(longest_common_substring(
	"not so Agamemnon, who spoke fiercely to "), expect)


	class SlowTests(unittest.TestCase, TestMixin):
	"""Slow development tests running many minutes.
	It can be run only by an EXPLICIT command!
	e.g.: python -m unittest maxsubstring.SlowTests._test_random
	"""
	def _test_random(self):
	for power in range(2, 21, 2):
	size = randint(2 (power - 1), 2 power)
	for alphabet in (2, 4, 16, 256):
	text = ''.join(chr(65 + randint(0, alphabet - 1)) for _ in range(size))
	log.debug('%s %s %s', size, alphabet, 1)
	self.suffix_verify(text, 1)
	log.debug('%s %s %s', size, alphabet, 16)
	self.suffix_verify(text, 16)


	if __name__ == '__main__':
	run()
	// From: https://gist.github.com/hdf/5f0deb03e1b4fc52425ce3d9cd67cf1c
	// (Minify with: https://javascript-minifier.com/)
	// Original from: https://github.com/tixxit/suffixarray/blob/master/suffixarray.js

	/**
	* An implementation of the linear time suffix array construction of
	* Karkkainen & Sanders:
	*
	* "Simple Linear Work Suffix Array Construction", Karkainen and Sanders.
	*
	* Creating a suffix array is very simple; just call suffixArray(...) with
	* either a string or a function that returns integers and its length. For
	* example,
	*
	* var s = "Sort this!";
	* suffixArray(s); // Returns [4, 9, 0, 6, 7, 1, 2, 8, 3, 5].
	*
	* function reverse(i) { return s.charCodeAt(s.length - 1 - i) }
	* suffixArray(reverse, s.length); // Returns [5, 0, 9, 3, 2, 8, 7, 1, 4, 6].
	*
	* @author Thomas Switzer
	*/
	(function() {

	var global = this,
	floor = Math.floor,
	identity = function(x) { return x },
	undefined;

	/**
	* Sorts an array of (unsigned) integers in linear time. The values of the
	* array (a) act as keys, which are passed to the key function which returns an
	* integer value.
	*
	* @param a An array of keys to sort.
	* @param key A function that maps keys to integer values.
	* @return The array a.
	*/
	function bsort(a, key) {
	var len = a.length,
	buckets = [],
	i = len, j = -1, b, d = 0,
	keys = 0,
	bits;
	key = key \|\| identity;
	while (i--)
	j = Math.max(key(a[i]), j);
	bits = j >> 24 && 32 \|\| j >> 16 && 24 \|\| j >> 8 && 16 \|\| 8;
	for (; d < bits; d += 4) {
	for (i = 16; i--;)
	buckets[i] = [];
	for (i = len; i--;)
	buckets[(key(a[i]) >> d) & 15].push(a[i]);
	for (b = 0; b < 16; b++)
	for (j = buckets[b].length; j--;)
	a[++i] = buckets[b][j];
	}
	return a;
	}


	function isInt(n) {
	return typeof n == "number" \|\| n instanceof Number;
	}

	function isStr(s) {
	return Object.prototype.toString.call(s) == "[object String]";
	}


	function wrap(s) {
	return typeof s == "function" ? s : (isStr(s)
	? function(i) { return s.charCodeAt(i) }
	: function(i) { return s[i] });
	}


	/**
	* Returns the suffix array of the string s. The suffix array is constructed
	* in linear time. Also returns Longest common prefix array.
	*
	* The string s can either be an Unicode string (ie. JavaScript String object)
	* or a function that takes an index (integer >= 0) and returns another
	* integer (a "symbol"). If a function is provided, then another argument
	* specifying its length (integer >= 0) must be provided.
	*
	* This also takes a 3rd optional parameter that dictates how to treat the end
	* of the string. This can be either "min" or "wrap". If it is "min", then
	* characters after the end of the string are treated as 0's (the minimum).
	* If "wrap" is given, then the end of the string wraps back around to the
	* beginning. If this parameter is omitted, then "wrap" is assumed.
	*
	* In the case of strings, you can omit the 2nd paramter (length) and still
	* provide the 3rd paramter. For instance, suffixArray(str, "min").
	*
	* The returned array contains the indexes of the string in the lexicographical
	* order of the suffixes that start at those indexes.
	*
	* @param s A string or function that maps ints between [0, len) to integers.
	* @param len The length of s (optional if s is a string, required otherwise).
	* @param end Either "min", "wrap" or leave out (defaults to "wrap").
	* @return An array of indexes into s.
	*/
	global.suffixArray = function(s, len, end) {
	end = end \|\| len;
	len = isInt(len) ? len : s.length;

	if (end == "wrap")
	return wrappedSuffixArray(s, len);
	else
	return _suffixArray(wrap(s), len);
	}


	/**
	* Longest repeated non-overlapping substrings:
	* Returns longest (maximum n number of) non-overlapping (minimum m times)
	* repeated strings and how many times they are found in the text in
	* descending order of count*len(substring)^2.
	*/
	global.LRS = function(text, m, n) {
	m = m \|\| 3;
	n = n \|\| 10;
	nots = (text.constructor===Uint8Array);
	decoder = new TextDecoder();

	var [sa, lcp] = suffixArray(text);
	var result = {}, l, c, halt,
	t = nots?decoder.decode(text):text;
	for (var i = 0; i < sa.length; i++) {
	l = lcp[i];
	if (l < 2) continue; // Don't bother with individual characters.
	substring = nots?text.subarray(sa[i], sa[i] + l):text.substring(sa[i], sa[i] + l); // Subarray is very slow. :(
	substring = nots?decoder.decode(substring):substring;
	c = t.split(substring).length - 1; // This is slow too.
	if (c < m) continue; // Not found enough times.
	halt = false;
	for (s in result) // Filter out overlaps.
	if (s.length >= l) {
	if (s.includes(substring)) {
	halt = true;
	break;
	}
	} else if (substring.includes(s))
	delete result[s];
	if (!halt) result[substring] = [c, sa[i]];
	}

	return Object.entries(result).sort((a, b) =>
	(b[1][0](b[0].length2))-(a[1][0](a[0].length**2)))
	.slice(0, n).reduce((o, k) => {
	o[k[0]] = result[k[0]];
	return o;
	}, {});
	}


	// Export the Bucket Sort.
	global.suffixArray.bsort = bsort;


	/**
	* Constructs the suffix array of s. It takes either a string, an array, or a
	* function that takes an integer and returns a unsigned integer. It also takes
	* an optional 2nd paramter, the length. This is required if the first
	* parameter is a function.
	*
	* This uses the nice idea from Karkkainen & Sander's paper of replacing each
	* letter with the equivalent k-letter version (3 in their paper, 2 in this
	* algorithm). This is repeated recursively until all the letters are
	* different. This doesn't have the nice 1/3 pruning / merge step of their
	* algorithm, but still performs relatively fast, running in O(n log n).
	*
	* @param s A string, array, or function.
	* @param len The length of s.
	* @return The order of the suffixes.
	*/
	function wrappedSuffixArray(s, len) {
	len = isInt(len) ? len : s.length;
	s = wrap(s);

	var array = [],
	swap = [],
	order = [],
	span,
	sym,
	i = len;

	while (i--)
	array[i] = s(order[i] = i);

	for (span = 1; sym != len && span < len; span *= 2) {
	bsort(order, function(i) { return array[(i + span) % len] });
	bsort(order, function(i) { return array[i] });

	sym = swap[order[0]] = 1;
	for (i = 1; i < len; i++) {
	if (array[order[i]] != array[order[i - 1]] \|\| array[(order[i] + span) % len] != array[(order[i - 1] + span) % len])
	sym++;
	swap[order[i]] = sym;
	}

	tmp = array;
	array = swap;
	swap = tmp;
	}

	return order;
	}


	/* Constructs the suffix array of s. In this case, s must be a function that
	* maps integers between 0 and len - 1 to "symbols" (unsigned integers). It
	* returns the suffixes in lexicographical order as an array of indexes where
	* those suffixes start. Also returns Longest common prefix array.
	*
	* I have tried to keep the code reasonably well commented. Both for my sake,
	* and yours. That said, my code was not written with pedagogy in mind, but
	* to be relatively fast and have a small minified size.
	*
	* The description of the algorithm in the paper is very concise and is well
	* worth a read.
	*
	* The C code accompanying the paper is very terse and, IMHO, creates more
	* confusion than clarity. While the algorithm itself is fairly simple (simple
	* and fast, who wants more?), it does deal with quite a bit of abstraction.
	* That is, you are dealing with a lot of placeholders, rather than concrete
	* objects; indexes into the string to represent suffixes, lexical names
	* representing triplets of symbols, indexes of these lexical names, etc.
	*/
	function _suffixArray(_s, len, sub) {
	var a = [],
	b = [],
	alen = floor(2 * len / 3), // Number of indexes s.t. i % 3 != 0.
	blen = len - alen, // Number of indexes s.t. i % 3 = 0.
	r = (alen + 1) >> 1, // Number of indexes s.t. i % 3 = 1.
	i = alen,
	j = 0,
	k,
	lookup = [],
	result = [],
	tmp, cmp,
	s;

	if (len == 1)
	return [[0], [0]];

	s = function(i) { return i >= len ? 0 : _s(i) };

	// Sort suffixes w/ indices % 3 != 0 by their first 3 symbols (triplets).

	while (i--)
	a[i] = ((i * 3) >> 1) + 1; // a = [1, 2, 4, 5, 7, 8, 10, 11, 13, ...]

	for (i = 3; i--;)
	bsort(a, function(j) { return s(i + j) });

	// Assign lexicographical names (j) to the triplets of consecutive symbols,
	// s.t. the order of the lex. names match the lex. order of the triplets.

	// Array b contains lex. names in the order they appear in s for i % 3 != 0

	j = b[floor(a[0] / 3) + (a[0] % 3 == 1 ? 0 : r)] = 1;
	for (i = 1; i < alen; i++) {
	if (s(a[i]) != s(a[i-1]) \|\| s(a[i] + 1) != s(a[i-1] + 1) \|\| s(a[i] + 2) != s(a[i-1] + 2))
	j++;
	b[floor(a[i] / 3) + (a[i] % 3 == 1 ? 0 : r)] = j;
	}

	// If all lex. names are unique, then a is already completely sorted.

	if (j < alen) {

	// Otherwise, recursively sort lex. names in b, then reconstruct the
	// indexes of the sorted array b so they are relative to a.

	b = _suffixArray(function(i) { return b[i] }, alen, true);

	for (i = alen; i--;)
	a[i] = b[i] < r ? b[i] * 3 + 1 : ((b[i] - r) * 3 + 2);

	}

	// Create a reverse lookup table for the indexes i, s.t. i % 3 != 0.
	// This table can be used to simply determine the sorted order of 2
	// suffixes whose indexes are both not divisible by 3.

	for (i = alen; i--;)
	lookup[a[i]] = i;
	lookup[len] = -1;
	lookup[len + 1] = -2;

	/**
	* This is a comparison function for the suffixes at indices m & n that
	* uses the lookup table to shorten the searches. It assumes that
	* n % 3 == 0 and m % 3 != 0.
	*/
	cmp = function(m, n) {
	return (s(m) - s(n)) \|\| (m % 3 == 2
	? (s(m + 1) - s(n + 1)) \|\| (lookup[m + 2] - lookup[n + 2])
	: (lookup[m + 1] - lookup[n + 1]))
	};

	// Sort remaining suffixes (i % 3 == 0) using prev result (i % 3 != 0).

	b = len % 3 == 1 ? [ len - 1 ] : [];
	for (i = 0; i < alen; i++)
	if (a[i] % 3 == 1)
	b.push(a[i] - 1);
	bsort(b, function(j) { return s(j) });

	// Merge a (i % 3 != 0) and b (i % 3 == 0) together. We only need to
	// compare, at most, 2 symbols before we end up comparing 2 suffixes whose
	// indices are both not divisible by 3. At this point, we can use the
	// reverse lookup array to order them.


	if (sub) { // No need for extra rsa and lcp processing in sub calls, only in the top level one.
	for (i = 0, j = 0, k = 0; i < alen && j < blen;)
	result[k++] = cmp(a[i], b[j]) < 0 ? a[i++] : b[j++];
	while (i < alen)
	result[k++] = a[i++];
	while (j < blen)
	result[k++] = b[j++];

	return result;
	}

	var rsa = [], lcp = [0], h = 0, i2, j2;
	for (i = 0, j = 0, k = 0; i < alen && j < blen;) {
	i2 = k;
	result[k++] = cmp(a[i], b[j]) < 0 ? a[i++] : b[j++];
	rsa[result[i2]] = i2;
	if (rsa[i2] > 0) {
	j2 = result[rsa[i2] - 1];
	while (i2 != len - h && j2 != len - h && s(i2 + h) == s(j2 + h)) h++;
	lcp[rsa[i2]] = h;
	if (h > 0) h--;
	}
	}
	while (i < alen) {
	i2 = k;
	result[k++] = a[i++];
	rsa[result[i2]] = i2;
	if (rsa[i2] > 0) {
	j2 = result[rsa[i2] - 1];
	while (i2 != len - h && j2 != len - h && s(i2 + h) == s(j2 + h)) h++;
	lcp[rsa[i2]] = h;
	if (h > 0) h--;
	}
	}
	while (j < blen) {
	i2 = k;
	result[k++] = b[j++];
	rsa[result[i2]] = i2;
	if (rsa[i2] > 0) {
	j2 = result[rsa[i2] - 1];
	while (i2 != len - h && j2 != len - h && s(i2 + h) == s(j2 + h)) h++;
	lcp[rsa[i2]] = h;
	if (h > 0) h--;
	}
	}

	return [result, lcp];
	}

	}).call();

	if (typeof process === 'object') {
	var fs = require('fs');
	var file = fs.readFileSync((process.argv.length > 2) ? process.argv[2] : 'suffixarray.js', 'utf8');
	console.log(LRS(file));
	} else {
	if (typeof importScripts === 'function') addEventListener('message', e => postMessage(LRS(e.data)));
	}