bgall · February 5, 2020 18:39
diff --git a/social_desirability_bias.R b/social_desirability_bias.R
 # Show that social desirability not only produces biased
 # "descriptive statistics" (e.g. group means), but can
 # produce treatment effects of the opposite sign of
 # the true effect. You cannot simply ignore social
 # desirability under the assumption that it does
 # not affect estimates of causal parameters since
 # measures pre-treatment and post-treatment are
 # both subject to social desirability.
 #
 # Note that this arises due to social desirability
 # pushing people toward the maximum and minimum of
 # the theoretical range of the outcome variable, such
 # as when people state the probability they vote is very
 # high. Because the range of the outcome truncates treatment
 # effects for which the treatment would push an observation's
 # outcome value outside of the range, social desirability
 # that pushes outcome values toward the ceiling or floor of
 # the measure increases the risk that a treatment effect is
 # truncated. In the extreme case, it could cause all positive
 # treatment effects to be truncated at a positive value close
 # to zero and none of the negative treatment effects to be
 # truncated. The consequence is that nearly all of our
 # treatment effects are negative and large treatment effects
 # are only found in the negative effects, so the ATE is negative.
 # This does not happen when the measure is unbounded.

 # There are at least two ways of conceptualizing social 
 # desirability bias and the below looks at both:
 # - Additive SBD: true response + constant 
 # - Multiplicative SDB: t9rue response)*(constant)

 ########################################
 # Set-up
 ########################################

 # Load packages
 library(dplyr)
 library(magrittr)

 # Randomization seed
 set.seed(123)

 ########################################
 # Parameters
 ########################################

 # Number of participants completing study
 N <- 5000

 # Average treatment effect ~ Normal(mu,sigma^2)
 mu <- 0.10 # positive!
 sigma <- 0.10
 sdb_add <- 0.9
 sdb_multi <- .8

 ########################################
 # Generate data
 ########################################

 # Initialize "empty" data frame to store data
 # with dplyr. No good way to do this with
 # dplyr.
 df <- data.frame(y_t0_true = rep(NA_real_, N))

 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Generate control data
 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 df %<>% dplyr::mutate(
  # Draw "true" vote probability for each
  # participant from Unif(0,1)
  y_t0_true = runif(n = N, min = 0, max = 1),
  
  # Assume there is some social desirability
  # where everyone reports a higher probability
  # of voting than they would otherwise. We can
  # think of 2 different types of SDB: additive
  # effects and multiplicative effects.
  
  # Additive effect: true probability + sdb,
  # where sdb in [0,1]
  y_t0_observed_add = ifelse(y_t0_true + sdb_add > 1,
                             1,
                             y_t0_true + sdb_add),
  
  # Multiplicative effect: true probability*(1+sdb),
  # where sdb > 0
  y_t0_observed_multi = ifelse((y_t0_true * (1 + sdb_multi)) > 1,
                               1,
                               y_t0_true * (1 + sdb_multi))
 )

 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Generate treated data
 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 # Get treatment effects
 df %<>% rnorm(n = N, mean = mu, sd = sigma)

 # Treated outcome, no social desirability,
 # round to [0,1] interval
 df %<>% dplyr::mutate(
  y_t1_true = y_t0_true + fx,
  y_t1_true = dplyr::case_when(y_t1_true < 0 ~ 0,
                               y_t1_true > 1 ~ 1,
                               TRUE ~ y_t1_true)
 )

 # Treated outcome, social desirability
 # round to [0,1] interval, additive
 df %<>% dplyr::mutate(
  y_t1_observed_add = y_t0_observed_add + fx,
  y_t1_observed_add = dplyr::case_when(
    y_t1_observed_add < 0 ~ 0,
    y_t1_observed_add > 1 ~ 1,
    TRUE ~ y_t1_observed_add
  )
 )

 # Treated outcome, social desirability
 # round to [0,1] interval, multiplicative

 df %<>% dplyr::mutate(
  y_t1_observed_multi = y_t0_observed_multi + fx*(1 + sdb_multi),
  y_t1_observed_multi = dplyr::case_when(
    y_t1_observed_multi < 0 ~ 0,
    y_t1_observed_multi > 1 ~ 1,
    TRUE ~ y_t1_observed_multi
  )
 )

 ########################################
 # Calculate ATE for different measures
 ########################################

 mean(df$y_t1_true - df$y_t0_true) # true
 mean(df$y_t1_observed_add - df$y_t0_observed_add) # additive
 mean(df$y_t1_observed_multi - df$y_t0_observed_multi) # multiplicative
	# Show that social desirability not only produces biased
	# "descriptive statistics" (e.g. group means), but can
	# produce treatment effects of the opposite sign of
	# the true effect. You cannot simply ignore social
	# desirability under the assumption that it does
	# not affect estimates of causal parameters since
	# measures pre-treatment and post-treatment are
	# both subject to social desirability.
	#
	# Note that this arises due to social desirability
	# pushing people toward the maximum and minimum of
	# the theoretical range of the outcome variable, such
	# as when people state the probability they vote is very
	# high. Because the range of the outcome truncates treatment
	# effects for which the treatment would push an observation's
	# outcome value outside of the range, social desirability
	# that pushes outcome values toward the ceiling or floor of
	# the measure increases the risk that a treatment effect is
	# truncated. In the extreme case, it could cause all positive
	# treatment effects to be truncated at a positive value close
	# to zero and none of the negative treatment effects to be
	# truncated. The consequence is that nearly all of our
	# treatment effects are negative and large treatment effects
	# are only found in the negative effects, so the ATE is negative.
	# This does not happen when the measure is unbounded.

	# There are at least two ways of conceptualizing social
	# desirability bias and the below looks at both:
	# - Additive SBD: true response + constant
	# - Multiplicative SDB: t9rue response)*(constant)

	########################################
	# Set-up
	########################################

	# Load packages
	library(dplyr)
	library(magrittr)

	# Randomization seed
	set.seed(123)

	########################################
	# Parameters
	########################################

	# Number of participants completing study
	N <- 5000

	# Average treatment effect ~ Normal(mu,sigma^2)
	mu <- 0.10 # positive!
	sigma <- 0.10
	sdb_add <- 0.9
	sdb_multi <- .8

	########################################
	# Generate data
	########################################

	# Initialize "empty" data frame to store data
	# with dplyr. No good way to do this with
	# dplyr.
	df <- data.frame(y_t0_true = rep(NA_real_, N))

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Generate control data
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	df %<>% dplyr::mutate(
	# Draw "true" vote probability for each
	# participant from Unif(0,1)
	y_t0_true = runif(n = N, min = 0, max = 1),

	# Assume there is some social desirability
	# where everyone reports a higher probability
	# of voting than they would otherwise. We can
	# think of 2 different types of SDB: additive
	# effects and multiplicative effects.

	# Additive effect: true probability + sdb,
	# where sdb in [0,1]
	y_t0_observed_add = ifelse(y_t0_true + sdb_add > 1,
	1,
	y_t0_true + sdb_add),

	# Multiplicative effect: true probability*(1+sdb),
	# where sdb > 0
	y_t0_observed_multi = ifelse((y_t0_true * (1 + sdb_multi)) > 1,
	1,
	y_t0_true * (1 + sdb_multi))
	)

	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	# Generate treated data
	#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

	# Get treatment effects
	df %<>% rnorm(n = N, mean = mu, sd = sigma)

	# Treated outcome, no social desirability,
	# round to [0,1] interval
	df %<>% dplyr::mutate(
	y_t1_true = y_t0_true + fx,
	y_t1_true = dplyr::case_when(y_t1_true < 0 ~ 0,
	y_t1_true > 1 ~ 1,
	TRUE ~ y_t1_true)
	)

	# Treated outcome, social desirability
	# round to [0,1] interval, additive
	df %<>% dplyr::mutate(
	y_t1_observed_add = y_t0_observed_add + fx,
	y_t1_observed_add = dplyr::case_when(
	y_t1_observed_add < 0 ~ 0,
	y_t1_observed_add > 1 ~ 1,
	TRUE ~ y_t1_observed_add
	)
	)

	# Treated outcome, social desirability
	# round to [0,1] interval, multiplicative

	df %<>% dplyr::mutate(
	y_t1_observed_multi = y_t0_observed_multi + fx*(1 + sdb_multi),
	y_t1_observed_multi = dplyr::case_when(
	y_t1_observed_multi < 0 ~ 0,
	y_t1_observed_multi > 1 ~ 1,
	TRUE ~ y_t1_observed_multi
	)
	)

	########################################
	# Calculate ATE for different measures
	########################################

	mean(df$y_t1_true - df$y_t0_true) # true
	mean(df$y_t1_observed_add - df$y_t0_observed_add) # additive
	mean(df$y_t1_observed_multi - df$y_t0_observed_multi) # multiplicative