Last active
March 5, 2025 21:44
-
-
Save scottchiefbaker/e4c87de8d6c1ad4a33b1ffe5a3ce707f to your computer and use it in GitHub Desktop.
Quicky benchmark to compare 32bit and 64bit PRNGs on an ESP32
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdint.h> | |
static uint64_t s[8]; | |
static uint32_t r[2]; | |
static uint64_t sm; | |
static uint64_t fs[4]; | |
// PCG uses a structure | |
typedef struct { uint64_t state; uint64_t inc; } pcg32_random_t; | |
pcg32_random_t rng; | |
uint64_t next_out = 0; | |
uint32_t count = 0; | |
void setup() { | |
Serial.begin(115200); | |
delay(1000); | |
// Init all the various global seeds for the PRNGs | |
for (int i = 0; i < 8; i++) { | |
s[i] = rdtsc_rand64(); | |
} | |
for (int i = 0; i < 2; i++) { | |
r[i] = rdtsc_rand64(); | |
} | |
for (int i = 0; i < 4; i++) { | |
fs[i] = rdtsc_rand64(); | |
} | |
sm = rdtsc_rand64(); | |
rng.state = rdtsc_rand64(); | |
rng.inc = rdtsc_rand64(); | |
next_out = millis() + 1000; | |
} | |
void loop() { | |
uint32_t md = 1000; | |
delay(5000); | |
next_out = millis() + md; | |
Serial.printf("\r\n"); | |
////////////////////////////////////////////////////////////// | |
uint32_t num = 11; | |
while (next_out > millis()) { | |
num = xoroshiro64starstar(); | |
count++; | |
} | |
Serial.printf("Generated %u x64** = %0.1f b/s\r\n", count, (count * 4.0 / (md / 1000.0))); | |
count = 0; | |
next_out = millis() + md; | |
////////////////////////////////////////////////////////////// | |
uint64_t num2 = 11; | |
while (next_out > millis()) { | |
num2 = xoshiro256plus(); | |
count++; | |
} | |
Serial.printf("Generated %u x256+ = %0.1f b/s\r\n", count, (count * 8.0 / (md / 1000.0))); | |
count = 0; | |
next_out = millis() + md; | |
////////////////////////////////////////////////////////////// | |
num2 = 11; | |
while (next_out > millis()) { | |
num2 = xoshiro512plusplus(); | |
count++; | |
} | |
Serial.printf("Generated %u x512++ = %0.1f b/s\r\n", count, (count * 8.0 / (md / 1000.0))); | |
count = 0; | |
next_out = millis() + md; | |
////////////////////////////////////////////////////////////// | |
num2 = 33; | |
while (next_out > millis()) { | |
num2 = splitmix64(); | |
count++; | |
} | |
Serial.printf("Generated %u sm64 = %0.1f b/s\r\n", count, (count * 8.0 / (md / 1000.0))); | |
count = 0; | |
next_out = millis() + md; | |
////////////////////////////////////////////////////////////// | |
while (next_out > millis()) { | |
num2 = pcg32(); | |
count++; | |
} | |
Serial.printf("Generated %u pcg32 = %0.1f b/s\r\n", count, (count * 4.0 / (md / 1000.0))); | |
count = 0; | |
next_out = millis() + md; | |
////////////////////////////////////////////////////////////// | |
num2 = 44; | |
while (next_out > millis()) { | |
num2 = pcg64(&rng); | |
count++; | |
} | |
Serial.printf("Generated %u pcg64 = %0.1f b/s\r\n", count, (count * 8.0 / (md / 1000.0))); | |
count = 0; | |
next_out = millis() + 1000; | |
} | |
/////////////////////////////////////////////////////////// | |
// rdtsc_rand | |
/////////////////////////////////////////////////////////// | |
// Get the instruction counter for various CPU/Platforms | |
uint64_t get_rdtsc() { | |
#if defined(_WIN32) || defined(_WIN64) | |
return __rdtsc(); | |
#elif defined(__aarch64__) | |
uint64_t count; | |
__asm__ volatile ("mrs %0, cntvct_el0" : "=r" (count)); | |
return count; | |
#elif defined(ARDUINO) | |
return micros(); | |
#elif defined(__GNUC__) || defined(__clang__) | |
uint32_t low, high; | |
__asm__ volatile ("rdtsc" : "=a"(low), "=d"(high)); | |
return ((uint64_t)(high) << 32) | low; | |
#else | |
#error "Unsupported platform" | |
#endif | |
} | |
// Multiply-Shift Hash (Passes SmallCrush and PractRand up to 128GB) | |
static uint64_t hash_msh(uint64_t x) { | |
uint64_t prime = 0x9e3779b97f4a7c15; // A large prime constant | |
x ^= (x >> 30); | |
x *= prime; | |
x ^= (x >> 27); | |
x *= prime; | |
x ^= (x >> 31); | |
return x; | |
} | |
// Get an unsigned 64bit random integer | |
static uint64_t rdtsc_rand64() { | |
// Hash the rdtsc value through hash64 | |
uint64_t rdtsc_val = get_rdtsc(); | |
uint64_t ret = hash_msh(rdtsc_val); | |
return ret; | |
} | |
/////////////////////////////////////////////////////////// | |
// PRNGs | |
/////////////////////////////////////////////////////////// | |
static inline uint32_t rotl(const uint32_t x, int k) { | |
return (x << k) | (x >> (32 - k)); | |
} | |
static inline uint64_t rotl(const uint64_t x, int k) { | |
return (x << k) | (x >> (64 - k)); | |
} | |
////////////////////////////////////////////////////////////////// | |
uint32_t xoroshiro64starstar(void) { | |
const uint32_t s0 = r[0]; | |
uint32_t s1 = r[1]; | |
const uint32_t result = rotl(s0 * 0x9E3779BB, 5) * 5; | |
s1 ^= s0; | |
r[0] = rotl(s0, 26) ^ s1 ^ (s1 << 9); // a, b | |
r[1] = rotl(s1, 13); // c | |
return result; | |
} | |
////////////////////////////////////////////////////////////////// | |
uint64_t xoshiro256plus(void) { | |
const uint64_t result = fs[0] + fs[3]; | |
const uint64_t t = fs[1] << 17; | |
fs[2] ^= fs[0]; | |
fs[3] ^= fs[1]; | |
fs[1] ^= fs[2]; | |
fs[0] ^= fs[3]; | |
fs[2] ^= t; | |
fs[3] = rotl(fs[3], 45); | |
return result; | |
} | |
////////////////////////////////////////////////////////////////// | |
uint64_t xoshiro512plusplus(void) { | |
const uint64_t result = rotl(s[0] + s[2], 17) + s[2]; | |
const uint64_t t = s[1] << 11; | |
s[2] ^= s[0]; | |
s[5] ^= s[1]; | |
s[1] ^= s[2]; | |
s[7] ^= s[3]; | |
s[3] ^= s[4]; | |
s[4] ^= s[5]; | |
s[0] ^= s[6]; | |
s[6] ^= s[7]; | |
s[6] ^= t; | |
s[7] = rotl(s[7], 21); | |
return result; | |
} | |
////////////////////////////////////////////////////////////////// | |
uint64_t splitmix64() { | |
uint64_t z = (sm += 0x9e3779b97f4a7c15); | |
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; | |
z = (z ^ (z >> 27)) * 0x94d049bb133111eb; | |
return z ^ (z >> 31); | |
} | |
////////////////////////////////////////////////////////////////// | |
uint32_t pcg32() { | |
uint64_t oldstate = rng.state; | |
// Advance internal state | |
rng.state = oldstate * 6364136223846793005ULL + (rng.inc|1); | |
// Calculate output function (XSH RR), uses old state for max ILP | |
uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; | |
uint32_t rot = oldstate >> 59u; | |
return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); | |
} | |
////////////////////////////////////////////////////////////////// | |
uint64_t pcg64(pcg32_random_t* rng) { | |
uint64_t high = pcg32(); | |
uint32_t low = pcg32(); | |
uint64_t ret = (high << 32) | low; | |
return ret; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
On my 32bit ESP32-C3 I'm seeing:
Very little difference on PRNGs that use 64bit operations vs 32bit operations. Even on limited hardware like this it makes sense to use a 64bit PRNG because you get more bytes per cycle.