Created
June 14, 2020 07:43
-
-
Save mike-lawrence/ed5c447344f358e638ceb46f55106be6 to your computer and use it in GitHub Desktop.
reliability simulation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load packages used ---- | |
library(tidyverse) | |
library(broom) | |
options(dplyr.show_progress=TRUE) | |
# define a function to simulate an experiment ---- | |
simulate_experiment = function( | |
# simulation parameters ---- | |
within_subject_noise = 1 | |
, between_subject_variability = 1 | |
, num_obs_per_subject = 10 | |
, num_subjects_per_group = 10 | |
){ | |
# start with a tibble of subject true means ---- | |
tibble( | |
subj_id = 1:(num_subjects_per_group*2) | |
, subj_true_val = rnorm( | |
num_subjects_per_group*2 | |
, 0 | |
, between_subject_variability | |
) | |
) %>% | |
dplyr::group_by(subj_id) %>% | |
#for each subject, generate observed measurements | |
dplyr::summarise( | |
trial_id = 1:num_obs_per_subject | |
, value = rnorm( | |
num_obs_per_subject | |
, subj_true_val | |
, within_subject_noise | |
) | |
, .groups = 'keep' | |
) -> | |
obs_data | |
# prep for computing split-half reliability ---- | |
obs_data %>% | |
#add split label | |
dplyr::mutate( | |
split = case_when( | |
trial_id %% 2 == 1 ~ 'A' | |
, trial_id %% 2 == 0 ~ 'B' | |
) | |
) %>% | |
dplyr::group_by( | |
subj_id | |
, split | |
) %>% | |
#collapse to a mean | |
dplyr::summarise( | |
subj_split_obs_mean = mean(value) | |
, .groups = 'drop' | |
) %>% | |
#spread | |
tidyr::spread( | |
key = split | |
, value = subj_split_obs_mean | |
) %>% | |
#compute correlation | |
dplyr::summarise( | |
broom::tidy(cor.test(A,B)) | |
) -> | |
cor_test_result | |
#get the t-test result | |
obs_data %>% | |
#collapse to a mean | |
dplyr::summarise( | |
subj_obs_mean = mean(value) | |
, .groups = 'drop' | |
) %>% | |
#add a group identifier | |
dplyr::mutate( | |
group = case_when( | |
subj_id %% 2 == 1 ~ 'A' | |
, subj_id %% 2 == 0 ~ 'B' | |
) | |
, group = factor(group) | |
) %>% | |
#now run a t-test | |
dplyr::summarise( | |
broom::tidy(t.test(subj_obs_mean~group)) | |
) -> | |
t_test_result | |
tibble( | |
cor_test_result = list(cor_test_result) | |
, t_test_result = list(t_test_result) | |
) %>% | |
return() | |
} | |
#run lots of simulated experiments with a variety of values for the experiment parameters | |
expand_grid( | |
iteration = 1:1e4 #10e4 gives stable estimates but takes a couple hours to run | |
, within_subject_noise = c(.1,1,10) | |
, between_subject_variability = 1 | |
, num_obs_per_subject = c(2,10,100) | |
, num_subjects_per_group = c(2,10,100) | |
) %>% | |
dplyr::group_by_all() %>% | |
dplyr::summarise( | |
simulate_experiment( | |
within_subject_noise | |
, between_subject_variability | |
, num_obs_per_subject | |
, num_subjects_per_group | |
) | |
, .groups = 'keep' | |
) -> | |
all_experiment_results | |
all_experiment_results %>% | |
# dplyr::ungroup() %>% | |
# dplyr::slice(1) -> | |
# . | |
dplyr::group_by( | |
within_subject_noise | |
, between_subject_variability | |
, num_obs_per_subject | |
, num_subjects_per_group | |
, iteration | |
) %>% | |
dplyr::summarise( | |
value = cor_test_result[[1]]$estimate | |
, .groups = 'drop_last' | |
) %>% | |
dplyr::summarise( | |
lo = quantile(value,.025) | |
, hi = quantile(value,.975) | |
, med = median(value) | |
, .groups = 'drop' | |
) %>% | |
dplyr::mutate( | |
num_subjects_per_group = factor(num_subjects_per_group) | |
) %>% | |
ggplot( | |
mapping = aes( | |
y = med | |
, ymin = lo | |
, ymax = hi | |
, x = within_subject_noise | |
, colour = num_subjects_per_group | |
, shape = num_subjects_per_group | |
) | |
)+ | |
facet_wrap( | |
~ num_obs_per_subject | |
, labeller = label_both | |
)+ | |
geom_point(alpha=.5)+ | |
geom_line(alpha=.5)+ | |
geom_errorbar(alpha=.5) | |
all_experiment_results %>% | |
# dplyr::ungroup() %>% | |
# dplyr::slice(1) -> | |
# . | |
dplyr::group_by( | |
within_subject_noise | |
, between_subject_variability | |
, num_obs_per_subject | |
, num_subjects_per_group | |
, iteration | |
) %>% | |
dplyr::summarise( | |
value = t_test_result[[1]]$p.value<.05 | |
, .groups = 'drop_last' | |
) %>% | |
dplyr::summarise( | |
lo = broom::tidy(prop.test(sum(value),n()))$conf.low | |
, hi = broom::tidy(prop.test(sum(value),n()))$conf.high | |
, value = mean(value) | |
, .groups = 'drop' | |
) %>% | |
dplyr::mutate( | |
num_subjects_per_group = factor(num_subjects_per_group) | |
) %>% | |
ggplot( | |
mapping = aes( | |
y = value | |
, ymin = lo | |
, ymax = hi | |
, x = within_subject_noise | |
, colour = num_subjects_per_group | |
, shape = num_subjects_per_group | |
) | |
)+ | |
facet_wrap( | |
~ num_obs_per_subject | |
, labeller = label_both | |
)+ | |
geom_point(alpha=.5)+ | |
geom_line(alpha=.5)+ | |
geom_errorbar(alpha=.5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment