Skip to content

Instantly share code, notes, and snippets.

@nattoheaven
Created April 23, 2013 23:23
Show Gist options
  • Save nattoheaven/5448286 to your computer and use it in GitHub Desktop.
Save nattoheaven/5448286 to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <omp.h>
#include <sys/time.h>
#ifdef CURAND
#include <curand.h>
typedef struct {
curandGenerator_t gen;
cudaStream_t stream;
unsigned int *h_buf[2], *d_buf;
ptrdiff_t ibuf;
} gen_t;
#define CURAND_N 0x100000
static inline gen_t
myseed(unsigned int seed)
{
gen_t gen;
cudaHostAlloc((void **)(gen.h_buf + 0), CURAND_N * sizeof(unsigned int),
cudaHostAllocDefault);
cudaHostAlloc((void **)(gen.h_buf + 1), CURAND_N * sizeof(unsigned int),
cudaHostAllocDefault);
gen.ibuf = 0;
cudaMalloc((void **)(&gen.d_buf), CURAND_N * sizeof(unsigned int));
cudaStreamCreate(&gen.stream);
curandCreateGenerator(&gen.gen, CURAND_RNG_PSEUDO_DEFAULT);
curandSetStream(gen.gen, gen.stream);
curandSetPseudoRandomGeneratorSeed(gen.gen, seed);
curandGenerate(gen.gen, gen.d_buf, CURAND_N);
cudaMemcpyAsync(gen.h_buf[0], gen.d_buf, CURAND_N * sizeof(unsigned int),
cudaMemcpyDeviceToHost, gen.stream);
cudaStreamSynchronize(gen.stream);
curandGenerate(gen.gen, gen.d_buf, CURAND_N);
cudaMemcpyAsync(gen.h_buf[1], gen.d_buf, CURAND_N * sizeof(unsigned int),
cudaMemcpyDeviceToHost, gen.stream);
return gen;
}
static inline unsigned int
myrand(gen_t *gen)
{
unsigned int ret;
ret = gen->h_buf[0][gen->ibuf++];
if (gen->ibuf >= CURAND_N) {
unsigned int *h_tmp;
h_tmp = gen->h_buf[1];
gen->h_buf[1] = gen->h_buf[0];
gen->h_buf[0] = h_tmp;
gen->ibuf = 0;
cudaStreamSynchronize(gen->stream);
curandGenerate(gen->gen, gen->d_buf, CURAND_N);
cudaMemcpyAsync(gen->h_buf[1], gen->d_buf, CURAND_N * sizeof(unsigned int),
cudaMemcpyDeviceToHost, gen->stream);
}
return ret;
}
const unsigned long long max2 = 0xffffffffull * 0xffffffffull;
#else
#ifdef RDRAND
typedef char gen_t;
static inline gen_t
myseed(unsigned int seed)
{
return 0;
}
static inline unsigned int
myrand(gen_t *gen)
{
int rdrand;
__asm__ __volatile__("\n"
"0:\n"
"\trdrand %0\n\t"
"\tjnc 0b\n\t" :
"=r"(rdrand));
return rdrand;
}
const unsigned long long max2 = 0xffffffffull * 0xffffffffull;
#else
#ifdef SFMT_MEXP
#include "SFMT.h"
typedef sfmt_t gen_t;
static inline gen_t
myseed(unsigned int seed)
{
gen_t gen;
sfmt_init_gen_rand(&gen, seed);
return gen;
}
#define myrand sfmt_genrand_uint32
const unsigned long long max2 = 0xffffffffull * 0xffffffffull;
#else
#include <stdlib.h>
typedef unsigned int gen_t;
static inline gen_t
myseed(unsigned int seed)
{
return seed;
}
#define myrand rand_r
const unsigned long long max2 = (unsigned long long) RAND_MAX * RAND_MAX;
#endif
#endif
#endif
int
main(int argc, char **argv)
{
long long li, lpi;
const long long ln = 0x1000000000ll;
long double pi;
struct timeval start, end;
gettimeofday(&start, NULL);
lpi = 0;
#pragma omp parallel
{
gen_t gen;
gen = myseed(omp_get_thread_num());
#pragma omp for reduction(+:lpi)
for (li = 0; li < ln; li++) {
unsigned long long ux, uy;
ux = myrand(&gen);
uy = myrand(&gen);
if (ux * ux < (unsigned long long) (max2 - (uy * uy))) {
lpi++;
}
}
}
gettimeofday(&end, NULL);
pi = (long double) lpi / ln * 4.0l;
printf("Calc PI: %.60Lf\n", pi);
printf("Real PI: "
"3.141592653589793238462643383279502884197169399375105820974944\n");
printf("Time: %06f sec\n",
end.tv_sec - start.tv_sec + (end.tv_usec - start.tv_usec) * 1.0e-6);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment