Skip to content

Instantly share code, notes, and snippets.

@svmiller
Created December 13, 2024 11:00
Show Gist options
  • Save svmiller/cafc3cbf9b88fbe9bd363a05348b0f47 to your computer and use it in GitHub Desktop.
Save svmiller/cafc3cbf9b88fbe9bd363a05348b0f47 to your computer and use it in GitHub Desktop.
a <- rnorm(50) # this is stationary
b <- cumsum(rnorm(50)) # this is not
spp_test <- function(x, lag_short = TRUE, n_sims = 1000, interpret = TRUE,
pp_stat = "tau") {
if (!pp_stat %in% c("tau", "rho") | length(pp_stat) > 1) {
stop("The only 'pp_stat' arguments that make sense in this context is 'tau' or 'rho'. Pick one of the two.")
}
m <- embed(x, 2)
dat <- data.frame(y = m[, 1], ly = m[, 2])
dat$t <- 1:length(dat$y)
n <- length(dat$y)
M1 <- lm(y ~ ly - 1, dat) # no drift, no trend
M2 <- lm(y ~ ly, dat) # drift, no trend
M3 <- lm(y ~ ly + t, dat) # drift and trend
if(lag_short == TRUE) {
q <- floor(4*(n/100)^0.25)
} else {
q <- floor(12*(n/100)^0.25)
}
get_z_stats <- function(mod, m) {
index <- ifelse(m > 1, 2, 1)
resids <- resid(mod)
est_rho <- summary(mod)$coefficients[index,1]
est_sig <- summary(mod)$coefficients[index,2]
s2 <- sum(resids^2)/(n - m)
gamma <- numeric(q + 1)
for (i in 1:(q + 1)) {
u <- embed(resids, i)
gamma[i] = sum(u[,1]*u[,i])/n
}
lambda2 <- gamma[1] + 2*sum((1 - 1:q/(q + 1))*gamma[-1])
z_rho <- n*(est_rho - 1) - n^2*est_sig^2/s2*(lambda2 - gamma[1])/2
z_tau <- sqrt(gamma[1]/lambda2)*(est_rho - 1)/est_sig -
(lambda2 - gamma[1])*n*est_sig/(2*sqrt(lambda2*s2))
return(c(z_rho, z_tau))
}
Stats <- rbind(get_z_stats(M1,1),
get_z_stats(M2,2),
get_z_stats(M3,3))
Sims <- data.frame()
for (i in 1:n_sims) {
fake_x <- rnorm(length(x))
fm <- embed(fake_x, 2)
fdat <- data.frame(y = fm[, 1], ly = fm[, 2])
fdat$t <- 1:length(fdat$y)
fn <- length(fdat$y)
fM1 <- lm(y ~ ly - 1, fdat) # no drift, no trend
fM2 <- lm(y ~ ly, fdat) # drift, no trend
fM3 <- lm(y ~ ly + t, fdat) # drift and trend
fakeStats <- rbind(get_z_stats(fM1, 1),
get_z_stats(fM2, 2),
get_z_stats(fM3, 3))
fakeStats <- data.frame(fakeStats)
names(fakeStats) <- c("z_rho", "z_tau")
fakeStats$sim <- i
fakeStats$cat <- c("No Drift, No Trend", "Drift, No Trend", "Drift and Trend")
Sims <- rbind(Sims, fakeStats)
}
output <- list("Statistics" = Stats,
"Simulations" = Sims)
if(interpret == TRUE) {
if(pp_stat == "tau") {
tau_stats <- output$Statistics[1:3, 2]
tau_results <- do.call(rbind, with(output$Simulations, {
lapply(split(z_tau, cat), function(x) {
data.frame(
mean = mean(x),
lwr = quantile(x, probs = 0.05),
upr = quantile(x, probs = 0.95)
)
})
}))
tau_results$cat <- rownames(tau_results)
rownames(tau_results) <- NULL
stat_sum <- paste0("Your tau statistics were ", round(tau_stats[1], 3), " (no drift, no trend), ",
round(tau_stats[2], 3), " (drift, no trend), and ", round(tau_stats[3], 3),
" (drift and trend). The mean across ", n_sims,
" simulations of a white noise time series of the length of your time series returns simulated means (and 95% intervals) of ",
round(subset(tau_results, cat == 'No Drift, No Trend')[1,1], 3), " (",
round(subset(tau_results, cat == 'No Drift, No Trend')[1,2], 3),", ",
round(subset(tau_results, cat == 'No Drift, No Trend')[1,3], 3),") [no drift, no trend], ",
round(subset(tau_results, cat == 'Drift, No Trend')[1,1], 3), " (",
round(subset(tau_results, cat == 'Drift, No Trend')[1,2], 3),",",
round(subset(tau_results, cat == 'Drift, No Trend')[1,3], 3),") [drift, no trend], and ",
round(subset(tau_results, cat == 'Drift and Trend')[1,1], 3), " (",
round(subset(tau_results, cat == 'Drift and Trend')[1,2], 3),", ",
round(subset(tau_results, cat == 'Drift and Trend')[1,3], 3),") [drift and trend]."
)
} else if (pp_stat == "rho") {
rho_stats <- output$Statistics[1:3, 1]
rho_results <- do.call(rbind, with(output$Simulations, {
lapply(split(z_rho, cat), function(x) {
data.frame(
mean = mean(x),
lwr = quantile(x, probs = 0.05),
upr = quantile(x, probs = 0.95)
)
})
}))
rho_results$cat <- rownames(rho_results)
rownames(rho_results) <- NULL
stat_sum <- paste0("Your rho statistics were ", round(rho_stats[1], 3), " (no drift, no trend), ",
round(rho_stats[2], 3), " (drift, no trend), and ", round(rho_stats[3], 3),
" (drift and trend). The mean across ", n_sims,
" simulations of a white noise time series of the length of your time series returns simulated means (and 95% intervals) of ",
round(subset(rho_results, cat == 'No Drift, No Trend')[1,1], 3), " (",
round(subset(rho_results, cat == 'No Drift, No Trend')[1,2], 3),", ",
round(subset(rho_results, cat == 'No Drift, No Trend')[1,3], 3),") [no drift, no trend], ",
round(subset(rho_results, cat == 'Drift, No Trend')[1,1], 3), " (",
round(subset(rho_results, cat == 'Drift, No Trend')[1,2], 3),",",
round(subset(rho_results, cat == 'Drift, No Trend')[1,3], 3),") [drift, no trend], and ",
round(subset(rho_results, cat == 'Drift and Trend')[1,1], 3), " (",
round(subset(rho_results, cat == 'Drift and Trend')[1,2], 3),", ",
round(subset(rho_results, cat == 'Drift and Trend')[1,3], 3),") [drift and trend]."
)
}
interp_message <- paste0(stat_sum, "\n\nFor any one of those three statistics, if your *particular* test statistic is not included in those corresponding intervals, it is strongly suggestive of non-stationarity as it would be incompatible with a pure stationary, white-noise time series across these ", n_sims,
" simulations.\n\nThe actual simulations are available to you in the output of this function for further exploration or inference by other intervals.")
message(interp_message)
} else {
}
return(output)
}
spp_test(a, n_sims = 50)
spp_test(b, n_sims = 50)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment