Skip to content

Instantly share code, notes, and snippets.

@achille
Last active October 5, 2016 19:36
Show Gist options
  • Save achille/3a4034377dbcd3b246b17c0ae6c03c00 to your computer and use it in GitHub Desktop.
Save achille/3a4034377dbcd3b246b17c0ae6c03c00 to your computer and use it in GitHub Desktop.
"""
Vitter JS (1987) 'An efficient algorithm for sequential
random sampling.' ACM T. Math. Softw. 13(1): 58--67.
Copied from: https://gist.github.com/ldoddema/bb4ba2d4ad1b948a05e0
"""
from math import exp, log
import random
import numpy as np
from numpy import arange
def vitter_A(N, n, sample_out, last=0):
"""
Select `n` samples from the integers in population [0, `N`).
The result is placed in integer array `sample_out` (convenient
when testing with Numba).
Optionally start "counting" from integer `last`, to continue
from Vitter_D. I.e. the population has range [last, N+last).
"""
# cdef:
# float V, quot, Nreal, top
# int -> all others?
idx = 0
top = float(N - n); Nreal = float(N)
while n >= 2:
V = np.random.random(); S = 0; quot = top / Nreal
while quot > V:
S += 1; top -= 1.0; Nreal -= 1.0
quot = (quot * top) / Nreal
last = last + S
sample_out[idx] = last
idx += 1
last = last + 1
Nreal -= 1.0
n -= 1
S = int(np.round(Nreal) * np.random.random())
last += S
sample_out[idx] = last
def test_python(n,k):
return random.sample(xrange(n), k)
def test_vitter(n, k):
sample_out = [0] * k
vitter_A(n, k, sample_out)
return sample_out
# timeit -n 10 test_python(10000000, 400000)
# 10 loops, best of 3: 250 ms per loop
#
# timeit -n 10 test_vitter(10000000, 400000)
# 10 loops, best of 3: 1.72 s per loop
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment