Created
July 14, 2022 11:32
-
-
Save richarddmorey/4ae51ac16e38c058573ec485c1942342 to your computer and use it in GitHub Desktop.
Dice rolling significance test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
library(memoise) | |
# Rolls | |
k = 3 | |
# Sides | |
n = 5 | |
sumDist = function(n,k,maxn = n){ | |
do.call( | |
expand.grid,replicate(n = k,1:n,simplify = FALSE) | |
) %>% | |
mutate( | |
S = rowSums(.), | |
Pr = (1/n)^k | |
) %>% | |
group_by(S) %>% | |
summarise( | |
Pr = sum(Pr) | |
) %>% | |
mutate( | |
cPr = cumsum(Pr), | |
sides = n | |
) -> x | |
if(n<maxn){ | |
x = bind_rows( | |
x, | |
tibble(S = (3*n+1):(3*maxn),Pr = 0,cPr = 1) | |
) | |
} | |
return(x %>% relocate(sides, .before = everything())) | |
} | |
upval = Vectorize(function(obsS, sides){ | |
x = sumDist(sides,3,sides) %>% filter(S == obsS) | |
(1-x$cPr) + x$Pr | |
},'sides') | |
lpval = Vectorize(function(obsS, sides){ | |
x = sumDist(sides,3,sides) %>% filter(S == obsS) | |
x$cPr | |
},'sides') | |
pval2 = memoise(function(obsS, sides){ | |
2*pmin(upval(obsS, sides),lpval(obsS, sides)) | |
}) | |
ubound = memoise(function(obsS, alpha=0.025, max_sides = 1000){ | |
sides = 2 | |
p = 1 | |
while(TRUE){ | |
oldp = p | |
p = sumDist(sides,k,20) %>% filter(S==obsS) %>% pull(cPr) | |
if(p<alpha) return(tibble(sides=sides-1,p=oldp)) | |
if(sides == max_sides){ | |
warning(glue::glue('max_sides ({max_sides)) reached: p={p}, alpha={alpha}, obsS={obsS}')) | |
return(tibble(sides=NA_integer_,p=NA_real_)) | |
} | |
sides = sides + 1 | |
} | |
}) | |
lbound = memoise(function(obsS, alpha=0.025, max_sides = 1000){ | |
sides = 2 | |
oldp = 1 | |
while(TRUE){ | |
p = sumDist(sides,k,20) %>% filter(S==obsS) %>% | |
mutate(cPr = 1 - cPr + Pr) %>% pull(cPr) | |
if(p>alpha) return(tibble(sides=sides,p=p)) | |
if(sides == max_sides){ | |
warning(glue::glue('max_sides ({max_sides)) reached: p={p}, alpha={alpha}, obsS={obsS}')) | |
return(tibble(sides=NA_integer_,p=NA_real_)) | |
} | |
sides = sides + 1 | |
} | |
}) | |
ci = function(obsS, conf = .95,...){ | |
c(lbound(obsS, (1-conf)/2, ...)$sides, ubound(obsS, (1-conf)/2, ...)$sides) | |
} | |
ci.raw = function(x, conf = .95, ...){ | |
z = ci(sum(x), conf, ...) | |
z = z[1]:z[2] | |
excl = sapply(z, function(v) any(x>v)) | |
range(z[!excl]) | |
} | |
rnomial = Vectorize(function(r,n,k){ | |
i = 0:floor(k/r) | |
sum( | |
(-1)^i*choose(n, i)*choose(n+k-1-r*i, n-1) | |
) | |
},'k') | |
# rnomial(k+1,n-1,5) | |
sides = c(4,6,8,10,12,20) | |
plot(0,0,xlim = c(0,60),ylim=c(0,1), typ='n',las=1, | |
ylab = 'Cumulative Prob.', xlab = 'Sum') | |
sides %>% | |
purrr::map(~sumDist(.,3,20)) -> x | |
split(x, sides) %>% | |
purrr::walk( | |
~ points(.[[1]]$S,.[[1]]$cPr, pch = 19, col = .[[1]]$sides[1]) | |
) | |
ci.raw(c(2,3,5), conf = .95) | |
ci.raw(c(1,2,3), conf = .95) | |
# Adjust alpha for joint significance (square rejection) | |
alp0 = 1-sqrt(1-.05) | |
ci.raw(c(2,3,5), conf = 1-alp0) | |
# {6,8,10,12,20} | |
ci.raw(c(1,2,3), conf = 1-alp0) | |
# {4,6,8,10} | |
p1 = pval2(10, sides) | |
p1[1] = 0 # set the first one (sides = 4) to 0 because it is falsified | |
p2 = pval2(6, sides) | |
# Fisher's method of combining the p values | |
x2vals = -2 * outer(log(p1), log(p2), '+') | |
crit = qchisq(.95,4) | |
dimnames(x2vals) = list(p1 = sides, p2 = sides) | |
sig = x2vals < crit # Which ones are significant? | |
sig | |
# Compute combined p values | |
t(pchisq(x2vals,4,lower.tail = FALSE)) %>% round(3) | |
# Print in nice way | |
n = outer(paste0('(',sides), paste0(sides,')'), paste, sep = ', ') | |
paste(n[sig],collapse=', ') | |
# Simulation to check alpha, just in case | |
# Doesn't actually need a simulation - can compute | |
# analytically - but sometimes a simulation is good to check | |
# This will take a long time at first, but memoise will save | |
# the CIs for the sum, so it speeds up after a few minutes | |
# because it doesn't need to recalculate them. | |
s0 = 20 # Sides to simulate | |
M = 10000 # Simulations | |
pb <- progress::progress_bar$new(total = M) | |
purrr::map_df(1:M, function(m){ | |
x = sample.int(s0,k,replace=TRUE) | |
ci0 = ci.raw(x) | |
pb$tick() | |
bind_rows(x1 = x[1], | |
x2 = x[2], | |
x3 = x[3], | |
pval2 = pval2(sum(x), s0), | |
sum = sum(x), | |
m = m, s = s0, k = k, lo = ci0[1], up = ci0[2]) | |
}) %>% | |
mutate(too_high = s<lo, | |
too_low = s0>up, | |
outside = too_low | too_high) -> z | |
# Check coverage (should be just above .95) | |
binom.test(sum(!z$outside), M) | |
# check distribution of p values | |
# (will be conservative, so almost uniform but favoring | |
# high values. Also discreteness makes the distribution a bit | |
# strange) | |
hist(z$pval2, breaks = seq(0,1,by = .05)) | |
mean(z$pval2<.05) # alpha - should be 1-coverage |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment