Last active
October 5, 2016 19:36
-
-
Save achille/3a4034377dbcd3b246b17c0ae6c03c00 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Vitter JS (1987) 'An efficient algorithm for sequential | |
random sampling.' ACM T. Math. Softw. 13(1): 58--67. | |
Copied from: https://gist.github.com/ldoddema/bb4ba2d4ad1b948a05e0 | |
""" | |
from math import exp, log | |
import random | |
import numpy as np | |
from numpy import arange | |
def vitter_A(N, n, sample_out, last=0): | |
""" | |
Select `n` samples from the integers in population [0, `N`). | |
The result is placed in integer array `sample_out` (convenient | |
when testing with Numba). | |
Optionally start "counting" from integer `last`, to continue | |
from Vitter_D. I.e. the population has range [last, N+last). | |
""" | |
# cdef: | |
# float V, quot, Nreal, top | |
# int -> all others? | |
idx = 0 | |
top = float(N - n); Nreal = float(N) | |
while n >= 2: | |
V = np.random.random(); S = 0; quot = top / Nreal | |
while quot > V: | |
S += 1; top -= 1.0; Nreal -= 1.0 | |
quot = (quot * top) / Nreal | |
last = last + S | |
sample_out[idx] = last | |
idx += 1 | |
last = last + 1 | |
Nreal -= 1.0 | |
n -= 1 | |
S = int(np.round(Nreal) * np.random.random()) | |
last += S | |
sample_out[idx] = last | |
def test_python(n,k): | |
return random.sample(xrange(n), k) | |
def test_vitter(n, k): | |
sample_out = [0] * k | |
vitter_A(n, k, sample_out) | |
return sample_out | |
# timeit -n 10 test_python(10000000, 400000) | |
# 10 loops, best of 3: 250 ms per loop | |
# | |
# timeit -n 10 test_vitter(10000000, 400000) | |
# 10 loops, best of 3: 1.72 s per loop |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment