Last active
May 6, 2022 18:50
-
-
Save grantmcdermott/7d8f9ea20d2bbf54d3366f5a72482ad9 to your computer and use it in GitHub Desktop.
Bayesian bootstrap
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Context: https://twitter.com/grant_mcdermott/status/1487528757418102787 | |
library(data.table) | |
library(fixest) | |
bboot = | |
function(object, reps = 100L, cluster = NULL, ...) { | |
fixest_obj = inherits(object, c('fixest', 'fixest_multi')) | |
if (inherits(object, c('lm'))) { | |
Ymat = object$model[, 1] | |
} else if (fixest_obj) { | |
Ymat = model.matrix(object, type = 'lhs', as.matrix = TRUE) | |
} else { | |
stop('\nModel or object class not currently supported.\n') | |
} | |
Xmat = model.matrix(object) | |
n_weights = nrow(Xmat) | |
fmat = NULL | |
if (fixest_obj && !is.null(object$fixef_vars)) { | |
fmat = model.matrix(object, type = 'fixef') | |
} | |
## Have to do a bit of leg work to pull out the clusters and match to | |
## model matrix | |
if (!is.null(cluster)) { | |
if (inherits(cluster, "formula")) { | |
cl_string = strsplit(paste0(cluster)[2], split = ' \\+ ')[[1]] | |
} else { | |
cl_string = paste(cluster) | |
} | |
if (!is.null(fmat) && all(cl_string %in% colnames(fmat))) { | |
cl_mat = fmat[, cl_string] | |
} else if (all(cl_string %in% colnames(Xmat))) { | |
cl_mat = Xmat[, cl_string] | |
} else { | |
DATA = eval(object$call$data) | |
if (all(cl_string %in% names(DATA))) { | |
all_vars = sapply(list(Ymat, Xmat, fmat), colnames) | |
if (inherits(all_vars, 'list')) all_vars = do.call('c', all_vars) | |
all_vars = union(all_vars, cl_string) | |
DATA = data.frame(DATA)[, intersect(colnames(DATA), all_vars)] | |
DATA = DATA[complete.cases(DATA), ] | |
cl_mat = model.matrix(~0+., DATA[, cl_string, drop=FALSE]) | |
} else { | |
stop(paste0('Could not find ', cluster, '. Please provide a valid input.\n')) | |
} | |
} | |
if (!inherits(cl_mat, "matrix")) cl_mat = matrix(cl_mat) | |
n_weights = nrow(unique(cl_mat)) | |
## Keep track of cluster id for consistent weighting within each | |
## cluster later on | |
cl_mat = data.table::as.data.table(cl_mat) | |
cl_mat$cl_id = data.table::frank(cl_mat, ties.method = "dense") | |
} | |
## Pre-allocate space for efficiency | |
wfits = matrix(0, reps, length(object$coefficients)) | |
for (i in 1:reps) { | |
if (is.null(cluster)) { | |
weights = rexp(n_weights, rate = 1) | |
} else { | |
weights = cl_mat[, wt := rexp(1, rate = 1), by = cl_id][, wt] | |
} | |
## Normalise weights | |
## (Unnecessary? https://twitter.com/deaneckles/status/1487506960698200067) | |
# weights = weights / sum(weights) | |
## Demean X and Y matrices if fixed effects are present | |
if (!is.null(fmat)) { | |
Xmat = fixest::demean(X = Xmat, f = fmat, weights = weights) | |
Ymat = fixest::demean(X = Ymat, f = fmat, weights = weights) | |
} | |
## Fit weighted reg | |
wfits[i, ] = lm.wfit(x = Xmat, y = Ymat, w = weights)$coefficients | |
} | |
colnames(wfits) = colnames(Xmat) | |
class(wfits) = "bboot" | |
## Meta attributes | |
attr(wfits, "coefs") = try(coefficients(object), silent = TRUE) | |
attr(wfits, "df") = try(df.residual(object), silent = TRUE) | |
attr(wfits, "se") = try(sqrt(diag(cov(wfits))), silent = TRUE) | |
attr(wfits, "reps") = reps | |
return(wfits) | |
} | |
# | |
## Methods | |
# | |
summary.bboot = function(object, level = 0.95, ...) { | |
alpha = 1 - level | |
lwr = alpha/2 | |
upr = 1-lwr | |
est = attr(object, "coefs") | |
se = attr(object, "se") | |
df = attr(object, "df") | |
cnames = c("Estimate", "Std. Error") | |
tval = as.vector(est)/se | |
if (is.null(df)) df = 0 | |
if (is.finite(df) && df > 0) { | |
pval = 2 * pt(abs(tval), df = df, lower.tail = FALSE) | |
fac = qt(alpha, df = df) | |
cnames = c(cnames, "t value", "Pr(>|t|)") | |
} else { | |
pval = 2 * pnorm(abs(tval), lower.tail = FALSE) | |
fac = qnorm(alpha) | |
cnames = c(cnames, "z value", "Pr(>|z|)") | |
} | |
out = cbind(est, se, tval, pval) | |
colnames(out) = cnames | |
ci = cbind(est + fac * se, est - fac * se) | |
colnames(ci) = c("Lower", "Upper") | |
out = cbind(out, ci) | |
# qtiles = t(apply(object, 2, \(x) c(mean(x), quantile(x, c(lwr, upr))))) | |
# colnames(qtiles) = c("mean", "conf.low", "conf.upper") | |
# attr(out, "quantiles") = qtiles | |
out | |
} | |
print.bboot = function(object, ...) { | |
out = summary(object, ...) | |
cat("Bayesian bootstrap: Standard errors based on", attr(object, "reps"), "reps.", "\n\n") | |
print(out, digits = 4, quote = FALSE, print.gap = 2L) | |
} | |
# | |
## Examples | |
# | |
set.seed(123) | |
mod = lm(mpg ~ wt + hp, mtcars) | |
bb_mod = bboot(mod, reps = 1e3) | |
bb_mod | |
hist(bb_mod[, 'wt'], | |
breaks = 100, | |
border = 'white', | |
main = 'Bayesian bootstrap: wt') | |
# Add cluster variables with a formula | |
set.seed(42) | |
bboot(mod, reps = 1e3, cluster = ~cyl) | |
# Same result for fixest::feols (no FEs) | |
set.seed(42) | |
feols(mpg ~ wt + hp, mtcars) |> | |
bboot(reps = 1e3, cluster = ~cyl) | |
# Incl. FEs is fine too (although adds a bit of overhead) | |
feols(mpg ~ wt + hp | cyl, mtcars, vcov = 'iid') |> | |
bboot(reps = 1e3) | |
# Can combine FEs and clustering as well | |
feols(mpg ~ wt + hp | cyl, mtcars, vcov = ~cyl) |> | |
bboot(reps = 1e3, cluster = ~cyl) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
You don't need to apologize.
Thank you very much for the detailed answer!