Last active
March 24, 2020 12:50
-
-
Save mike-lawrence/40df0d4302f3dfbfccdd9172e8da9a1e to your computer and use it in GitHub Desktop.
LMER power simulations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(broom) | |
library(lme4) | |
#set default contrasts to halfsum (this matters!) | |
halfsum_contrasts = function (...) contr.sum(...) * 0.5 | |
options(contrasts = c('halfsum_contrasts','contr.poly')) | |
run_sim = function(N,K,seed,do_checks=F){ | |
set.seed(seed) | |
coefs = c(0,0,0,1) | |
sds = c(3,3,3,3) | |
noise = 4 | |
subj_coefs = matrix( | |
rnorm(N*length(coefs),coefs,sds) | |
, nrow = N | |
, byrow = T | |
) | |
if(do_checks){ | |
#check that matrix came out right | |
print(apply(subj_coefs,2,mean)) #should be close to coef | |
print(apply(subj_coefs,2,sd)) #should be close to sds | |
print(cor(subj_coefs)) #should be close to identity | |
} | |
#create the condition tibble and contrasts | |
conditions = tidyr::expand_grid( | |
a = factor(c('a_pre','a_post')) | |
, b = factor(c('b_pre','b_post')) | |
) | |
contrasts = model.matrix( | |
data = conditions | |
, object = ~ a*b | |
) | |
#compute each subject's value-per-cell | |
subj_vals = list() | |
for(i in 1:N){ | |
subj_vals[[i]] = conditions | |
subj_vals[[i]]$subj = i | |
subj_vals[[i]]$value = as.numeric(as.matrix(contrasts) %*% subj_coefs[i,]) | |
} | |
subj_vals = dplyr::bind_rows(subj_vals) | |
subj_vals$subj = factor(subj_vals$subj) | |
if(do_checks){ | |
#compute difference between conditions | |
# (this is why we use halfsum contrasts) | |
subj_vals %>% | |
dplyr::group_by( | |
subj | |
) %>% | |
dplyr::summarise( | |
intercept = mean(value) | |
, ab = ( | |
mean(value[(a=='a_post')&(b=='b_post')]) - | |
mean(value[(a=='a_pre')&(b=='b_post')]) | |
) - ( | |
mean(value[(a=='a_post')&(b=='b_pre')]) - | |
mean(value[(a=='a_pre')&(b=='b_pre')]) | |
) | |
, a = mean(value[a=='a_post']) - mean(value[a=='a_pre']) | |
, b = mean(value[b=='b_post']) - mean(value[b=='b_pre']) | |
) %>% | |
tidyr::gather( | |
key = key | |
, value = value | |
, -subj | |
) %>% | |
dplyr::group_by( | |
key | |
) %>% | |
dplyr::summarise( | |
mean = mean(value) | |
, sd = sd(value) | |
) #should roughly match coefs and sds | |
} | |
if(do_checks){ | |
#compute an lm per participant then check the mean/sd of resulting distributions | |
subj_vals %>% | |
dplyr::group_by( | |
subj | |
) %>% | |
dplyr::summarise( | |
broom::tidy(lm(value~a*b)) | |
) %>% | |
dplyr::group_by( | |
term | |
) %>% | |
dplyr::summarise( | |
mean = mean(estimate) | |
, sd = sd(estimate) | |
) #should roughly match coefs and sds | |
} | |
#sample from each subject-by-condition mean with measurement noise | |
subj_vals %>% | |
dplyr::group_by( | |
subj | |
, a | |
, b | |
) %>% | |
dplyr::summarise( | |
value = rnorm(K,value,noise) | |
) -> | |
obs | |
if(do_checks){ | |
#compute difference between conditions | |
# (this is why we use halfsum contrasts) | |
obs %>% | |
dplyr::group_by( | |
subj | |
) %>% | |
dplyr::summarise( | |
intercept = mean(value) | |
, ab = ( | |
mean(value[(a=='a_post')&(b=='b_post')]) - | |
mean(value[(a=='a_pre')&(b=='b_post')]) | |
) - ( | |
mean(value[(a=='a_post')&(b=='b_pre')]) - | |
mean(value[(a=='a_pre')&(b=='b_pre')]) | |
) | |
, a = mean(value[a=='a_post']) - mean(value[a=='a_pre']) | |
, b = mean(value[b=='b_post']) - mean(value[b=='b_pre']) | |
) %>% | |
tidyr::gather( | |
key = key | |
, value = value | |
, -subj | |
) %>% | |
dplyr::group_by( | |
key | |
) %>% | |
dplyr::summarise( | |
mean = mean(value) | |
, sd = sd(value) | |
) #should roughly match coefs and sds | |
} | |
if(do_checks){ | |
#compute an lm per participant then check the mean/sd of resulting distributions | |
obs %>% | |
dplyr::group_by( | |
subj | |
) %>% | |
dplyr::summarise( | |
broom::tidy(lm(value~a*b)) | |
) %>% | |
dplyr::group_by( | |
term | |
) %>% | |
dplyr::summarise( | |
mean = mean(estimate) | |
, sd = sd(estimate) | |
) #should roughly match coefs and sds | |
} | |
#fit and obtain intervals | |
fit = lme4::lmer( | |
formula = value ~ a*b + (a*b|subj) | |
, data = obs | |
) | |
ci = confint(fit,method='Wald') | |
if(do_checks){ | |
print( | |
tibble( | |
N = N | |
, K = K | |
, seed = seed | |
, lo = ci[nrow(ci),1] | |
, hi = ci[nrow(ci),2] | |
) | |
) | |
} | |
return(tibble( | |
bound = c('lo','hi') | |
, value = ci[nrow(ci),] | |
)) | |
} | |
#running once | |
run_sim(N=100,K=5,seed=3) | |
#running across a grid | |
tidyr::expand_grid( | |
N = c(,20,40,80,160) | |
, K = c(5,10,20) | |
, iteration = 1:1e3 | |
) %>% | |
dplyr::mutate( | |
seed=1:n() | |
) %>% | |
dplyr::group_by( | |
N | |
, K | |
, iteration | |
) %>% | |
dplyr::summarize( | |
run_sim( | |
N = N | |
, K = K | |
, seed = seed | |
) | |
) -> | |
out | |
out %>% | |
tidyr::spread( | |
key = bound | |
, value = value | |
) %>% | |
dplyr::group_by( | |
N | |
, K | |
) %>% | |
dplyr::summarise( | |
hi_less_than_zero = mean(hi<0) | |
, hi_greater_than_zero = mean(hi>0) | |
, lo_less_than_zero = mean(lo<0) | |
, lo_greater_than_zero = mean(lo>0) | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(broom) | |
library(lme4) | |
#set default contrasts to halfsum (this matters!) | |
halfsum_contrasts = function (...) contr.sum(...) * 0.5 | |
options(contrasts = c('halfsum_contrasts','contr.poly')) | |
run_sim = function(N,K,seed,do_checks=F){ | |
set.seed(seed) | |
coefs = c(0,1) | |
sds = c(1,3) | |
noise = 4 | |
subj_coefs = matrix( | |
rnorm(N*length(coefs),coefs,sds) | |
, nrow = N | |
, byrow = T | |
) | |
if(do_checks){ | |
#check that matrix came out right | |
print(apply(subj_coefs,2,mean)) #should be close to coef | |
print(apply(subj_coefs,2,sd)) #should be close to sds | |
print(cor(subj_coefs)) #should be close to identity | |
} | |
#create the condition tibble and contrasts | |
conditions = tidyr::expand_grid( | |
a = factor(c('a_pre','a_post')) | |
) | |
contrasts = model.matrix( | |
data = conditions | |
, object = ~ a | |
) | |
#compute each subject's value-per-cell | |
subj_vals = list() | |
for(i in 1:N){ | |
subj_vals[[i]] = conditions | |
subj_vals[[i]]$subj = i | |
subj_vals[[i]]$value = as.numeric(as.matrix(contrasts) %*% subj_coefs[i,]) | |
} | |
subj_vals = dplyr::bind_rows(subj_vals) | |
subj_vals$subj = factor(subj_vals$subj) | |
if(do_checks){ | |
#compute difference between conditions | |
# (this is why we use halfsum contrasts) | |
subj_vals %>% | |
dplyr::group_by( | |
subj | |
) %>% | |
dplyr::summarise( | |
intercept = mean(value) | |
, a = mean(value[a=='a_post']) - mean(value[a=='a_pre']) | |
) %>% | |
tidyr::gather( | |
key = key | |
, value = value | |
, -subj | |
) %>% | |
dplyr::group_by( | |
key | |
) %>% | |
dplyr::summarise( | |
mean = mean(value) | |
, sd = sd(value) | |
) #should roughly match coefs and sds | |
} | |
if(do_checks){ | |
#compute an lm per participant then check the mean/sd of resulting distributions | |
subj_vals %>% | |
dplyr::group_by( | |
subj | |
) %>% | |
dplyr::summarise( | |
broom::tidy(lm(value~a)) | |
) %>% | |
dplyr::group_by( | |
term | |
) %>% | |
dplyr::summarise( | |
mean = mean(estimate) | |
, sd = sd(estimate) | |
) #should roughly match coefs and sds | |
} | |
#sample from each subject-by-condition mean with measurement noise | |
subj_vals %>% | |
dplyr::group_by( | |
subj | |
, a | |
) %>% | |
dplyr::summarise( | |
value = rnorm(K,value,noise) | |
) -> | |
obs | |
if(do_checks){ | |
#compute difference between conditions | |
# (this is why we use halfsum contrasts) | |
obs %>% | |
dplyr::group_by( | |
subj | |
) %>% | |
dplyr::summarise( | |
intercept = mean(value) | |
, a = mean(value[a=='a_post']) - mean(value[a=='a_pre']) | |
) %>% | |
tidyr::gather( | |
key = key | |
, value = value | |
, -subj | |
) %>% | |
dplyr::group_by( | |
key | |
) %>% | |
dplyr::summarise( | |
mean = mean(value) | |
, sd = sd(value) | |
) #should roughly match coefs and sds | |
} | |
if(do_checks){ | |
#compute an lm per participant then check the mean/sd of resulting distributions | |
obs %>% | |
dplyr::group_by( | |
subj | |
) %>% | |
dplyr::summarise( | |
broom::tidy(lm(value~a)) | |
) %>% | |
dplyr::group_by( | |
term | |
) %>% | |
dplyr::summarise( | |
mean = mean(estimate) | |
, sd = sd(estimate) | |
) #should roughly match coefs and sds | |
} | |
#fit and obtain intervals | |
fit = lme4::lmer( | |
formula = value ~ a + (a|subj) | |
, data = obs | |
) | |
ci = confint(fit,method='Wald') | |
if(do_checks){ | |
print( | |
tibble( | |
N = N | |
, K = K | |
, seed = seed | |
, lo = ci[nrow(ci),1] | |
, hi = ci[nrow(ci),2] | |
) | |
) | |
} | |
return(tibble( | |
bound = c('lo','hi') | |
, value = ci[nrow(ci),] | |
)) | |
} | |
#running once | |
run_sim(N=100,K=5,seed=3) | |
#running across a grid | |
tidyr::expand_grid( | |
N = c(20,40,80,160) | |
, K = c(5,10,20) | |
, iteration = 1:1e3 | |
) %>% | |
dplyr::mutate( | |
seed=1:n() | |
) %>% | |
dplyr::group_by( | |
N | |
, K | |
, iteration | |
) %>% | |
dplyr::summarize( | |
run_sim( | |
N = N | |
, K = K | |
, seed = seed | |
) | |
) -> | |
out | |
out %>% | |
tidyr::spread( | |
key = bound | |
, value = value | |
) %>% | |
dplyr::group_by( | |
N | |
, K | |
) %>% | |
dplyr::summarise( | |
hi_less_than_zero = mean(hi<0) | |
, hi_greater_than_zero = mean(hi>0) | |
, lo_less_than_zero = mean(lo<0) | |
, lo_greater_than_zero = mean(lo>0) | |
) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment