Last active
April 6, 2021 07:15
-
-
Save schmengler/c29b210535b04b23e9e6 to your computer and use it in GitHub Desktop.
Draw Elements From Large PHP Array: Benchmark Script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require 'Timer.php'; | |
$testData = array( | |
// $k | count($a) | |
// -------+--------- | |
array( 10, 1000), | |
array( 10, 10000), | |
array( 10, 100000), | |
array( 10, 1000000), | |
array( 100, 1000), | |
array( 100, 10000), | |
array( 100, 100000), | |
array( 100, 1000000), | |
array( 1000, 10000), | |
array( 5000, 10000), | |
array( 9000, 10000), | |
array( 1000, 100000), | |
array( 10000, 100000), | |
array( 50000, 100000), | |
array( 90000, 100000), | |
); | |
$functions = array( | |
'value_by_array_rand', | |
'shuffle_and_splice', | |
'random_keys', | |
'richard_durstenfeld', | |
'fisher_yates_by_value', | |
'fisher_yates_by_reference', | |
'richard_durstenfeld_fixed_array', | |
'fisher_yates_fixed_array', | |
); | |
if (isset($argv[1])) { | |
$functions = array($argv[1]); | |
} | |
echo "|------------------------------------------------------------|\n"; | |
printf($format = "| %-25.25s | %8.8s | %9.9s | %7s |\n", 'function', '$k', '$t', 'time'); | |
echo "|---------------------------+----------+-----------+---------|\n"; | |
foreach ($functions as $function) { | |
foreach ($testData as $data) { | |
list($k, $n) = $data; | |
$time = test($function, range(1, $n), $k); | |
printf($format, $function, $k, $n, PHP_Timer::secondsToTimeString($time));; | |
} | |
} | |
echo "|---------------------------+----------+-----------+---------|\n"; | |
echo "Total memory usage: ", number_format(memory_get_peak_usage()), " Bytes"; | |
function test($f, array $a, $k) | |
{ | |
PHP_Timer::start(); | |
$result = $f($a, $k); | |
return PHP_Timer::stop(); | |
} | |
function value_by_array_rand($array, $k) | |
{ | |
$keys = array_rand($array, $k); | |
$result = array(); | |
foreach ($keys as $key) { | |
$result[] = $array[$key]; | |
} | |
return $result; | |
} | |
function shuffle_and_splice(&$array, $k) | |
{ | |
shuffle($array); | |
return array_splice($array, 0, $k); | |
} | |
function random_keys($array, $k) | |
{ | |
$randomArray = []; | |
while (count($randomArray) < $k) { | |
$randomKey = mt_rand(0, count($array)-1); | |
$randomArray[$randomKey] = $array[$randomKey]; | |
} | |
return $randomArray; | |
} | |
function richard_durstenfeld($array, $n) | |
{ | |
$array_keys = array_keys($array); | |
$array_length = count($array_keys); | |
$max_index = $array_length -1; | |
$iterations = min($n, $array_length); | |
$random_array = array(); | |
while($iterations--) { | |
$index = mt_rand(0, $max_index); | |
$value = $array_keys[$index]; | |
$array_keys[$index] = $array_keys[$max_index]; | |
array_push($random_array, $array[$value]); | |
$max_index--; | |
} | |
return $random_array; | |
} | |
function fisher_yates_by_value( $a, $n ) | |
{ | |
$N = count($a); | |
$n = min($n, $N); | |
$picked = array_fill(0, $n, 0); | |
// partially shuffle the array, and generate unbiased selection simultaneously | |
// this is a variation on fisher-yates-knuth shuffle | |
for ($i=0; $i<$n; $i++) // O(n) times | |
{ | |
$selected = mt_rand( 0, --$N ); // unbiased sampling N * N-1 * N-2 * .. * N-n+1 | |
$value = $a[ $selected ]; | |
$a[ $selected ] = $a[ $N ]; | |
$a[ $N ] = $value; | |
$picked[ $i ] = $value; | |
} | |
return $picked; | |
} | |
function fisher_yates_by_reference( &$a, $n ) | |
{ | |
$N = count($a); | |
$n = min($n, $N); | |
$picked = array_fill(0, $n, 0); $backup = array_fill(0, $n, 0); | |
// partially shuffle the array, and generate unbiased selection simultaneously | |
// this is a variation on fisher-yates-knuth shuffle | |
for ($i=0; $i<$n; $i++) // O(n) times | |
{ | |
$selected = mt_rand( 0, --$N ); // unbiased sampling N * N-1 * N-2 * .. * N-n+1 | |
$value = $a[ $selected ]; | |
$a[ $selected ] = $a[ $N ]; | |
$a[ $N ] = $value; | |
$backup[ $i ] = $selected; | |
$picked[ $i ] = $value; | |
} | |
// restore partially shuffled input array from backup | |
// optional step, if needed it can be ignored, e.g $a is passed by value, hence copied | |
for ($i=$n-1; $i>=0; $i--) // O(n) times | |
{ | |
$selected = $backup[ $i ]; | |
$value = $a[ $N ]; | |
$a[ $N ] = $a[ $selected ]; | |
$a[ $selected ] = $value; | |
$N++; | |
} | |
return $picked; | |
} | |
function richard_durstenfeld_fixed_array($array, $n) | |
{ | |
$array_length = count($array); | |
$max_index = $array_length -1; | |
$iterations = min($n, $array_length); | |
$array_keys = SplFixedArray::fromArray(range(0, $max_index), false); | |
$random_array = array(); | |
while($iterations--) { | |
$index = mt_rand(0, $max_index); | |
$value = $array_keys[$index]; | |
$array_keys[$index] = $array_keys[$max_index]; | |
array_push($random_array, $array[$value]); | |
$max_index--; | |
} | |
return $random_array; | |
} | |
function fisher_yates_fixed_array( $array, $n ) | |
{ | |
$a = SplFixedArray::fromArray($array, false); | |
$N = count($a); | |
$n = min($n, $N); | |
$picked = array_fill(0, $n, 0); | |
// partially shuffle the array, and generate unbiased selection simultaneously | |
// this is a variation on fisher-yates-knuth shuffle | |
for ($i=0; $i<$n; $i++) // O(n) times | |
{ | |
$selected = mt_rand( 0, --$N ); // unbiased sampling N * N-1 * N-2 * .. * N-n+1 | |
$value = $a[ $selected ]; | |
$a[ $selected ] = $a[ $N ]; | |
$a[ $N ] = $value; | |
$picked[ $i ] = $value; | |
} | |
return $picked; | |
} |
@simtabi yes, the functions assume continuous numeric indexes. Background: https://www.schmengler-se.de/en/2015/09/efficiently-draw-random-elements-from-large-php-array/
Problem definition
- Given an array $array of $n integers, draw $k distinct elements, with $k << $n
- The array is indexed numerically from 0 to $n-1 and does not contain duplicates.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi schmengler,
Stumbled on this, great piece. But a quick question, what if an array has named keys/index keys? tried it out and it fails.