mike-lawrence · June 14, 2020 07:43
diff --git a/main.r b/main.r
 # load packages used ----
 library(tidyverse)
 library(broom)

 options(dplyr.show_progress=TRUE)

 # define a function to simulate an experiment ----
 simulate_experiment = function(
 	# simulation parameters ----
 	  within_subject_noise = 1
 	, between_subject_variability = 1
 	, num_obs_per_subject = 10
 	, num_subjects_per_group = 10
 ){
 	# start with a tibble of subject true means ----
 	tibble(
 		subj_id = 1:(num_subjects_per_group*2)
 		, subj_true_val = rnorm(
 			num_subjects_per_group*2
 			, 0
 			, between_subject_variability
 		)
 	) %>%
 		dplyr::group_by(subj_id) %>%
 		#for each subject, generate observed measurements
 		dplyr::summarise(
 			trial_id = 1:num_obs_per_subject
 			, value = rnorm(
 				num_obs_per_subject
 				, subj_true_val
 				, within_subject_noise
 			)
 			, .groups = 'keep'
 		) ->
 		obs_data

 	# prep for computing split-half reliability ----
 	obs_data %>%
 		#add split label
 		dplyr::mutate(
 			split = case_when(
 				trial_id %% 2 == 1 ~ 'A'
 				, trial_id %% 2 == 0 ~ 'B'
 			)
 		) %>%
 		dplyr::group_by(
 			subj_id
 			, split
 		) %>%
 		#collapse to a mean
 		dplyr::summarise(
 			subj_split_obs_mean = mean(value)
 			, .groups = 'drop'
 		) %>%
 		#spread
 		tidyr::spread(
 			key = split
 			, value = subj_split_obs_mean
 		) %>%
 		#compute correlation
 		dplyr::summarise(
 			broom::tidy(cor.test(A,B))
 		) ->
 		cor_test_result

 	#get the t-test result
 	obs_data %>%
 		#collapse to a mean
 		dplyr::summarise(
 			subj_obs_mean = mean(value)
 			, .groups = 'drop'
 		) %>%
 		#add a group identifier
 		dplyr::mutate(
 			group = case_when(
 				subj_id %% 2 == 1 ~ 'A'
 				, subj_id %% 2 == 0 ~ 'B'
 			)
 			, group = factor(group)
 		) %>%
 		#now run a t-test
 		dplyr::summarise(
 			broom::tidy(t.test(subj_obs_mean~group))
 		) ->
 		t_test_result

 	tibble(
 		cor_test_result = list(cor_test_result)
 		, t_test_result = list(t_test_result)
 	) %>%
 		return()
 }

 #run lots of simulated experiments with a variety of values for the experiment parameters
 expand_grid(
 	  iteration = 1:1e4 #10e4 gives stable estimates but takes a couple hours to run
 	, within_subject_noise = c(.1,1,10)
 	, between_subject_variability = 1
 	, num_obs_per_subject = c(2,10,100)
 	, num_subjects_per_group = c(2,10,100)
 ) %>%
 	dplyr::group_by_all() %>%
 	dplyr::summarise(
 		simulate_experiment(
 			  within_subject_noise
 			, between_subject_variability
 			, num_obs_per_subject
 			, num_subjects_per_group
 		)
 		, .groups = 'keep'
 	) ->
 	all_experiment_results


 all_experiment_results %>%
 	# dplyr::ungroup() %>%
 	# dplyr::slice(1) ->
 	# .
 	dplyr::group_by(
 		 within_subject_noise
 		, between_subject_variability
 		, num_obs_per_subject
 		, num_subjects_per_group
 		, iteration
 	) %>%
 	dplyr::summarise(
 		value = cor_test_result[[1]]$estimate
 		, .groups = 'drop_last'
 	) %>%
 	dplyr::summarise(
 		lo = quantile(value,.025)
 		, hi = quantile(value,.975)
 		, med = median(value)
 		, .groups = 'drop'
 	) %>%
 	dplyr::mutate(
 		num_subjects_per_group = factor(num_subjects_per_group)
 	) %>%
 	ggplot(
 		mapping = aes(
 			y = med
 			, ymin = lo
 			, ymax = hi
 			, x = within_subject_noise
 			, colour = num_subjects_per_group
 			, shape = num_subjects_per_group
 		)
 	)+
 	facet_wrap(
 		~ num_obs_per_subject
 		, labeller = label_both
 	)+
 	geom_point(alpha=.5)+
 	geom_line(alpha=.5)+
 	geom_errorbar(alpha=.5)


 all_experiment_results %>%
 	# dplyr::ungroup() %>%
 	# dplyr::slice(1) ->
 	# .
 	dplyr::group_by(
 		within_subject_noise
 		, between_subject_variability
 		, num_obs_per_subject
 		, num_subjects_per_group
 		, iteration
 	) %>%
 	dplyr::summarise(
 		value = t_test_result[[1]]$p.value<.05
 		, .groups = 'drop_last'
 	) %>%
 	dplyr::summarise(
 		lo = broom::tidy(prop.test(sum(value),n()))$conf.low
 		, hi = broom::tidy(prop.test(sum(value),n()))$conf.high
 		, value = mean(value)
 		, .groups = 'drop'
 	) %>%
 	dplyr::mutate(
 		num_subjects_per_group = factor(num_subjects_per_group)
 	) %>%
 	ggplot(
 		mapping = aes(
 			y = value
 			, ymin = lo
 			, ymax = hi
 			, x = within_subject_noise
 			, colour = num_subjects_per_group
 			, shape = num_subjects_per_group
 		)
 	)+
 	facet_wrap(
 		~ num_obs_per_subject
 		, labeller = label_both
 	)+
 	geom_point(alpha=.5)+
 	geom_line(alpha=.5)+
 	geom_errorbar(alpha=.5)
	# load packages used ----
	library(tidyverse)
	library(broom)

	options(dplyr.show_progress=TRUE)

	# define a function to simulate an experiment ----
	simulate_experiment = function(
	# simulation parameters ----
	within_subject_noise = 1
	, between_subject_variability = 1
	, num_obs_per_subject = 10
	, num_subjects_per_group = 10
	){
	# start with a tibble of subject true means ----
	tibble(
	subj_id = 1:(num_subjects_per_group*2)
	, subj_true_val = rnorm(
	num_subjects_per_group*2
	, 0
	, between_subject_variability
	)
	) %>%
	dplyr::group_by(subj_id) %>%
	#for each subject, generate observed measurements
	dplyr::summarise(
	trial_id = 1:num_obs_per_subject
	, value = rnorm(
	num_obs_per_subject
	, subj_true_val
	, within_subject_noise
	)
	, .groups = 'keep'
	) ->
	obs_data

	# prep for computing split-half reliability ----
	obs_data %>%
	#add split label
	dplyr::mutate(
	split = case_when(
	trial_id %% 2 == 1 ~ 'A'
	, trial_id %% 2 == 0 ~ 'B'
	)
	) %>%
	dplyr::group_by(
	subj_id
	, split
	) %>%
	#collapse to a mean
	dplyr::summarise(
	subj_split_obs_mean = mean(value)
	, .groups = 'drop'
	) %>%
	#spread
	tidyr::spread(
	key = split
	, value = subj_split_obs_mean
	) %>%
	#compute correlation
	dplyr::summarise(
	broom::tidy(cor.test(A,B))
	) ->
	cor_test_result

	#get the t-test result
	obs_data %>%
	#collapse to a mean
	dplyr::summarise(
	subj_obs_mean = mean(value)
	, .groups = 'drop'
	) %>%
	#add a group identifier
	dplyr::mutate(
	group = case_when(
	subj_id %% 2 == 1 ~ 'A'
	, subj_id %% 2 == 0 ~ 'B'
	)
	, group = factor(group)
	) %>%
	#now run a t-test
	dplyr::summarise(
	broom::tidy(t.test(subj_obs_mean~group))
	) ->
	t_test_result

	tibble(
	cor_test_result = list(cor_test_result)
	, t_test_result = list(t_test_result)
	) %>%
	return()
	}

	#run lots of simulated experiments with a variety of values for the experiment parameters
	expand_grid(
	iteration = 1:1e4 #10e4 gives stable estimates but takes a couple hours to run
	, within_subject_noise = c(.1,1,10)
	, between_subject_variability = 1
	, num_obs_per_subject = c(2,10,100)
	, num_subjects_per_group = c(2,10,100)
	) %>%
	dplyr::group_by_all() %>%
	dplyr::summarise(
	simulate_experiment(
	within_subject_noise
	, between_subject_variability
	, num_obs_per_subject
	, num_subjects_per_group
	)
	, .groups = 'keep'
	) ->
	all_experiment_results


	all_experiment_results %>%
	# dplyr::ungroup() %>%
	# dplyr::slice(1) ->
	# .
	dplyr::group_by(
	within_subject_noise
	, between_subject_variability
	, num_obs_per_subject
	, num_subjects_per_group
	, iteration
	) %>%
	dplyr::summarise(
	value = cor_test_result[[1]]$estimate
	, .groups = 'drop_last'
	) %>%
	dplyr::summarise(
	lo = quantile(value,.025)
	, hi = quantile(value,.975)
	, med = median(value)
	, .groups = 'drop'
	) %>%
	dplyr::mutate(
	num_subjects_per_group = factor(num_subjects_per_group)
	) %>%
	ggplot(
	mapping = aes(
	y = med
	, ymin = lo
	, ymax = hi
	, x = within_subject_noise
	, colour = num_subjects_per_group
	, shape = num_subjects_per_group
	)
	)+
	facet_wrap(
	~ num_obs_per_subject
	, labeller = label_both
	)+
	geom_point(alpha=.5)+
	geom_line(alpha=.5)+
	geom_errorbar(alpha=.5)


	all_experiment_results %>%
	# dplyr::ungroup() %>%
	# dplyr::slice(1) ->
	# .
	dplyr::group_by(
	within_subject_noise
	, between_subject_variability
	, num_obs_per_subject
	, num_subjects_per_group
	, iteration
	) %>%
	dplyr::summarise(
	value = t_test_result[[1]]$p.value<.05
	, .groups = 'drop_last'
	) %>%
	dplyr::summarise(
	lo = broom::tidy(prop.test(sum(value),n()))$conf.low
	, hi = broom::tidy(prop.test(sum(value),n()))$conf.high
	, value = mean(value)
	, .groups = 'drop'
	) %>%
	dplyr::mutate(
	num_subjects_per_group = factor(num_subjects_per_group)
	) %>%
	ggplot(
	mapping = aes(
	y = value
	, ymin = lo
	, ymax = hi
	, x = within_subject_noise
	, colour = num_subjects_per_group
	, shape = num_subjects_per_group
	)
	)+
	facet_wrap(
	~ num_obs_per_subject
	, labeller = label_both
	)+
	geom_point(alpha=.5)+
	geom_line(alpha=.5)+
	geom_errorbar(alpha=.5)