Created
April 29, 2025 17:52
-
-
Save helske/744d6d3b115c449c24919658aea35993 to your computer and use it in GitHub Desktop.
Small multicollinearity experiment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Simulation experiment on the effect of multicollinearity in linear regression | |
library(dplyr) | |
library(ggplot2) | |
set.seed(1) | |
n <- 1000 # sample size | |
rho1 <- c(0, 0.3, 0.6) # cor(x, z) = cor(x, w) | |
rho2 <- seq(0.75, 0.95, by = 0.05) # cor(z, w) | |
rho <- expand.grid(rho1, rho2) # all combinations | |
# function which simulates the data, fits the model and returns SE | |
f <- function(n, rho1, rho2) { | |
Sigma <- matrix(rho1, 3, 3) | |
Sigma[1, 2] <- Sigma[2, 1] <- rho2 | |
diag(Sigma) <- 1 | |
X <- MASS::mvrnorm(n, mu = numeric(3), Sigma = Sigma) | |
y <- rnorm(n, X %*% rep(1, 3)) | |
SE <- summary(lm(y ~ X))$coefficients["X3", "Std. Error"] | |
data.frame(rho1, rho2, SE = SE) | |
} | |
# 1000 replications for the quantiles | |
out <- do.call( | |
rbind, | |
replicate( | |
1000, | |
do.call(rbind, lapply(seq_len(nrow(rho)), \(i) f(n, rho[i, 1], rho[i, 2]))), | |
simplify = FALSE | |
) | |
) | |
# summarise results | |
results <- out |> | |
group_by(rho1, rho2) |> | |
summarise(mean = mean(SE), q5 = quantile(SE, 0.05), q95 = quantile(SE, 0.95)) | |
# plot | |
results |> | |
mutate(rho1 = factor(rho1)) |> | |
ggplot(aes(rho2, mean)) + | |
geom_ribbon(aes(ymin = q5, ymax = q95, fill = rho1), alpha = 0.5) + | |
geom_line(aes(colour = rho1)) + | |
labs(fill = "Cor(X, Z)=Cor(X,W)", colour = "Cor(X, Z)=Cor(X,W)") + | |
theme_minimal() + | |
xlab("Cor(Z, W)") + | |
ylab("Standard error of coefficients of X") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment