Skip to content

Instantly share code, notes, and snippets.

@helske
Created April 29, 2025 17:52
Show Gist options
  • Save helske/744d6d3b115c449c24919658aea35993 to your computer and use it in GitHub Desktop.
Save helske/744d6d3b115c449c24919658aea35993 to your computer and use it in GitHub Desktop.
Small multicollinearity experiment
# Simulation experiment on the effect of multicollinearity in linear regression
library(dplyr)
library(ggplot2)
set.seed(1)
n <- 1000 # sample size
rho1 <- c(0, 0.3, 0.6) # cor(x, z) = cor(x, w)
rho2 <- seq(0.75, 0.95, by = 0.05) # cor(z, w)
rho <- expand.grid(rho1, rho2) # all combinations
# function which simulates the data, fits the model and returns SE
f <- function(n, rho1, rho2) {
Sigma <- matrix(rho1, 3, 3)
Sigma[1, 2] <- Sigma[2, 1] <- rho2
diag(Sigma) <- 1
X <- MASS::mvrnorm(n, mu = numeric(3), Sigma = Sigma)
y <- rnorm(n, X %*% rep(1, 3))
SE <- summary(lm(y ~ X))$coefficients["X3", "Std. Error"]
data.frame(rho1, rho2, SE = SE)
}
# 1000 replications for the quantiles
out <- do.call(
rbind,
replicate(
1000,
do.call(rbind, lapply(seq_len(nrow(rho)), \(i) f(n, rho[i, 1], rho[i, 2]))),
simplify = FALSE
)
)
# summarise results
results <- out |>
group_by(rho1, rho2) |>
summarise(mean = mean(SE), q5 = quantile(SE, 0.05), q95 = quantile(SE, 0.95))
# plot
results |>
mutate(rho1 = factor(rho1)) |>
ggplot(aes(rho2, mean)) +
geom_ribbon(aes(ymin = q5, ymax = q95, fill = rho1), alpha = 0.5) +
geom_line(aes(colour = rho1)) +
labs(fill = "Cor(X, Z)=Cor(X,W)", colour = "Cor(X, Z)=Cor(X,W)") +
theme_minimal() +
xlab("Cor(Z, W)") +
ylab("Standard error of coefficients of X")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment