richarddmorey · July 14, 2022 11:32
diff --git a/dice_rolling.R b/dice_rolling.R
 library(dplyr)
 library(memoise)

 # Rolls
 k = 3
 # Sides
 n = 5

 sumDist = function(n,k,maxn = n){
  do.call(
    expand.grid,replicate(n = k,1:n,simplify = FALSE)
    ) %>%
    mutate(
      S = rowSums(.),
      Pr = (1/n)^k
      ) %>%
    group_by(S) %>%
    summarise(
      Pr = sum(Pr)
      ) %>%
    mutate(
      cPr = cumsum(Pr),
      sides = n
    ) -> x
  if(n<maxn){
    x = bind_rows(
      x,
      tibble(S = (3*n+1):(3*maxn),Pr = 0,cPr = 1)
    )
  }
  return(x %>% relocate(sides, .before = everything()))  
 }

 upval = Vectorize(function(obsS, sides){
  x = sumDist(sides,3,sides) %>% filter(S == obsS)
  (1-x$cPr) + x$Pr
 },'sides')

 lpval = Vectorize(function(obsS, sides){
  x = sumDist(sides,3,sides) %>% filter(S == obsS)
  x$cPr
 },'sides')

 pval2 = memoise(function(obsS, sides){
  2*pmin(upval(obsS, sides),lpval(obsS, sides))
 })

 ubound = memoise(function(obsS, alpha=0.025, max_sides = 1000){
  sides = 2
  p = 1
  while(TRUE){
    oldp = p
    p = sumDist(sides,k,20) %>% filter(S==obsS) %>% pull(cPr)
    if(p<alpha) return(tibble(sides=sides-1,p=oldp))
    if(sides == max_sides){
      warning(glue::glue('max_sides ({max_sides)) reached: p={p}, alpha={alpha}, obsS={obsS}'))
      return(tibble(sides=NA_integer_,p=NA_real_))
    }
    sides = sides + 1
  }
 })

 lbound = memoise(function(obsS, alpha=0.025, max_sides = 1000){
  sides = 2
  oldp = 1
  while(TRUE){
    p = sumDist(sides,k,20) %>% filter(S==obsS) %>% 
      mutate(cPr = 1 - cPr + Pr) %>% pull(cPr)
    if(p>alpha) return(tibble(sides=sides,p=p))
    if(sides == max_sides){
      warning(glue::glue('max_sides ({max_sides)) reached: p={p}, alpha={alpha}, obsS={obsS}'))
      return(tibble(sides=NA_integer_,p=NA_real_))
    }
    sides = sides + 1
  }
 })

 ci = function(obsS, conf = .95,...){
  c(lbound(obsS, (1-conf)/2, ...)$sides, ubound(obsS, (1-conf)/2, ...)$sides)
 }

 ci.raw = function(x, conf = .95, ...){
  z = ci(sum(x), conf, ...)
  z = z[1]:z[2]
  excl = sapply(z, function(v) any(x>v))
  range(z[!excl])
 }

 rnomial = Vectorize(function(r,n,k){
  i = 0:floor(k/r)
  sum(
    (-1)^i*choose(n, i)*choose(n+k-1-r*i, n-1)
    )
 },'k')

 # rnomial(k+1,n-1,5)


 sides = c(4,6,8,10,12,20) 

 plot(0,0,xlim = c(0,60),ylim=c(0,1), typ='n',las=1,
     ylab = 'Cumulative Prob.', xlab = 'Sum')


 sides %>%
  purrr::map(~sumDist(.,3,20)) -> x

 split(x, sides) %>%
  purrr::walk(
    ~ points(.[[1]]$S,.[[1]]$cPr, pch = 19, col = .[[1]]$sides[1])
    )

 ci.raw(c(2,3,5), conf = .95)
 ci.raw(c(1,2,3), conf = .95)

 # Adjust alpha for joint significance (square rejection)
 alp0 = 1-sqrt(1-.05)
 ci.raw(c(2,3,5), conf = 1-alp0)
 # {6,8,10,12,20}
 ci.raw(c(1,2,3), conf = 1-alp0)
 # {4,6,8,10}

 p1 = pval2(10, sides)
 p1[1] = 0 # set the first one (sides = 4) to 0 because it is falsified
 p2 = pval2(6, sides)

 # Fisher's method of combining the p values
 x2vals = -2 * outer(log(p1), log(p2), '+')
 crit = qchisq(.95,4)
 dimnames(x2vals) = list(p1 = sides, p2 = sides)
 sig = x2vals < crit # Which ones are significant?
 sig

 # Compute combined p values
 t(pchisq(x2vals,4,lower.tail = FALSE)) %>% round(3)

 # Print in nice way
 n = outer(paste0('(',sides), paste0(sides,')'), paste, sep = ', ')
 paste(n[sig],collapse=', ')

 # Simulation to check alpha, just in case
 # Doesn't actually need a simulation - can compute
 # analytically - but sometimes a simulation is good to check
 # This will take a long time at first, but memoise will save
 # the CIs for the sum, so it speeds up after a few minutes
 # because it doesn't need to recalculate them.
 s0 = 20 # Sides to simulate
 M = 10000 # Simulations
 pb <- progress::progress_bar$new(total = M)
 purrr::map_df(1:M, function(m){
  x = sample.int(s0,k,replace=TRUE)
  ci0 = ci.raw(x)
  pb$tick()
  bind_rows(x1 = x[1],
            x2 = x[2],
            x3 = x[3],
            pval2 = pval2(sum(x), s0),
            sum = sum(x),
            m = m, s = s0, k = k, lo = ci0[1], up = ci0[2])
 }) %>%
  mutate(too_high = s<lo, 
         too_low  = s0>up,
         outside = too_low | too_high) -> z

 # Check coverage (should be just above .95)
 binom.test(sum(!z$outside), M)

 # check distribution of p values
 # (will be conservative, so almost uniform but favoring
 # high values. Also discreteness makes the distribution a bit 
 # strange)
 hist(z$pval2, breaks = seq(0,1,by = .05))
 mean(z$pval2<.05) # alpha - should be 1-coverage
	library(dplyr)
	library(memoise)

	# Rolls
	k = 3
	# Sides
	n = 5

	sumDist = function(n,k,maxn = n){
	do.call(
	expand.grid,replicate(n = k,1:n,simplify = FALSE)
	) %>%
	mutate(
	S = rowSums(.),
	Pr = (1/n)^k
	) %>%
	group_by(S) %>%
	summarise(
	Pr = sum(Pr)
	) %>%
	mutate(
	cPr = cumsum(Pr),
	sides = n
	) -> x
	if(n<maxn){
	x = bind_rows(
	x,
	tibble(S = (3n+1):(3maxn),Pr = 0,cPr = 1)
	)
	}
	return(x %>% relocate(sides, .before = everything()))
	}

	upval = Vectorize(function(obsS, sides){
	x = sumDist(sides,3,sides) %>% filter(S == obsS)
	(1-x$cPr) + x$Pr
	},'sides')

	lpval = Vectorize(function(obsS, sides){
	x = sumDist(sides,3,sides) %>% filter(S == obsS)
	x$cPr
	},'sides')

	pval2 = memoise(function(obsS, sides){
	2*pmin(upval(obsS, sides),lpval(obsS, sides))
	})

	ubound = memoise(function(obsS, alpha=0.025, max_sides = 1000){
	sides = 2
	p = 1
	while(TRUE){
	oldp = p
	p = sumDist(sides,k,20) %>% filter(S==obsS) %>% pull(cPr)
	if(p<alpha) return(tibble(sides=sides-1,p=oldp))
	if(sides == max_sides){
	warning(glue::glue('max_sides ({max_sides)) reached: p={p}, alpha={alpha}, obsS={obsS}'))
	return(tibble(sides=NA_integer_,p=NA_real_))
	}
	sides = sides + 1
	}
	})

	lbound = memoise(function(obsS, alpha=0.025, max_sides = 1000){
	sides = 2
	oldp = 1
	while(TRUE){
	p = sumDist(sides,k,20) %>% filter(S==obsS) %>%
	mutate(cPr = 1 - cPr + Pr) %>% pull(cPr)
	if(p>alpha) return(tibble(sides=sides,p=p))
	if(sides == max_sides){
	warning(glue::glue('max_sides ({max_sides)) reached: p={p}, alpha={alpha}, obsS={obsS}'))
	return(tibble(sides=NA_integer_,p=NA_real_))
	}
	sides = sides + 1
	}
	})

	ci = function(obsS, conf = .95,...){
	c(lbound(obsS, (1-conf)/2, ...)$sides, ubound(obsS, (1-conf)/2, ...)$sides)
	}

	ci.raw = function(x, conf = .95, ...){
	z = ci(sum(x), conf, ...)
	z = z[1]:z[2]
	excl = sapply(z, function(v) any(x>v))
	range(z[!excl])
	}

	rnomial = Vectorize(function(r,n,k){
	i = 0:floor(k/r)
	sum(
	(-1)^ichoose(n, i)choose(n+k-1-r*i, n-1)
	)
	},'k')

	# rnomial(k+1,n-1,5)


	sides = c(4,6,8,10,12,20)

	plot(0,0,xlim = c(0,60),ylim=c(0,1), typ='n',las=1,
	ylab = 'Cumulative Prob.', xlab = 'Sum')


	sides %>%
	purrr::map(~sumDist(.,3,20)) -> x

	split(x, sides) %>%
	purrr::walk(
	~ points(.[[1]]$S,.[[1]]$cPr, pch = 19, col = .[[1]]$sides[1])
	)

	ci.raw(c(2,3,5), conf = .95)
	ci.raw(c(1,2,3), conf = .95)

	# Adjust alpha for joint significance (square rejection)
	alp0 = 1-sqrt(1-.05)
	ci.raw(c(2,3,5), conf = 1-alp0)
	# {6,8,10,12,20}
	ci.raw(c(1,2,3), conf = 1-alp0)
	# {4,6,8,10}

	p1 = pval2(10, sides)
	p1[1] = 0 # set the first one (sides = 4) to 0 because it is falsified
	p2 = pval2(6, sides)

	# Fisher's method of combining the p values
	x2vals = -2 * outer(log(p1), log(p2), '+')
	crit = qchisq(.95,4)
	dimnames(x2vals) = list(p1 = sides, p2 = sides)
	sig = x2vals < crit # Which ones are significant?
	sig

	# Compute combined p values
	t(pchisq(x2vals,4,lower.tail = FALSE)) %>% round(3)

	# Print in nice way
	n = outer(paste0('(',sides), paste0(sides,')'), paste, sep = ', ')
	paste(n[sig],collapse=', ')

	# Simulation to check alpha, just in case
	# Doesn't actually need a simulation - can compute
	# analytically - but sometimes a simulation is good to check
	# This will take a long time at first, but memoise will save
	# the CIs for the sum, so it speeds up after a few minutes
	# because it doesn't need to recalculate them.
	s0 = 20 # Sides to simulate
	M = 10000 # Simulations
	pb <- progress::progress_bar$new(total = M)
	purrr::map_df(1:M, function(m){
	x = sample.int(s0,k,replace=TRUE)
	ci0 = ci.raw(x)
	pb$tick()
	bind_rows(x1 = x[1],
	x2 = x[2],
	x3 = x[3],
	pval2 = pval2(sum(x), s0),
	sum = sum(x),
	m = m, s = s0, k = k, lo = ci0[1], up = ci0[2])
	}) %>%
	mutate(too_high = s<lo,
	too_low = s0>up,
	outside = too_low \| too_high) -> z

	# Check coverage (should be just above .95)
	binom.test(sum(!z$outside), M)

	# check distribution of p values
	# (will be conservative, so almost uniform but favoring
	# high values. Also discreteness makes the distribution a bit
	# strange)
	hist(z$pval2, breaks = seq(0,1,by = .05))
	mean(z$pval2<.05) # alpha - should be 1-coverage