svmiller · December 13, 2024 11:00
diff --git a/spp_test.R b/spp_test.R
 a <- rnorm(50) # this is stationary
 b <- cumsum(rnorm(50)) # this is not

 spp_test <- function(x, lag_short = TRUE, n_sims = 1000, interpret = TRUE,
                     pp_stat = "tau") {

  if (!pp_stat %in% c("tau", "rho") | length(pp_stat) > 1) {
    stop("The only 'pp_stat' arguments that make sense in this context is 'tau' or 'rho'. Pick one of the two.")
  }


  m <- embed(x, 2)
  dat <- data.frame(y = m[, 1], ly = m[, 2])
  dat$t <- 1:length(dat$y)

  n <- length(dat$y)

  M1 <- lm(y ~ ly - 1, dat) # no drift, no trend
  M2 <- lm(y ~ ly, dat) # drift, no trend
  M3 <- lm(y ~ ly + t, dat) # drift and trend


  if(lag_short == TRUE) {
    q <- floor(4*(n/100)^0.25)
  } else {
    q <-  floor(12*(n/100)^0.25)
  }

  get_z_stats <- function(mod, m) {
    index <- ifelse(m > 1, 2, 1)
    resids <- resid(mod)
    est_rho <- summary(mod)$coefficients[index,1]
    est_sig <- summary(mod)$coefficients[index,2]
    s2 <- sum(resids^2)/(n - m)
    gamma <- numeric(q + 1)
    for (i in 1:(q + 1)) {
      u <- embed(resids, i)
      gamma[i] = sum(u[,1]*u[,i])/n
    }
    lambda2 <- gamma[1] + 2*sum((1 - 1:q/(q + 1))*gamma[-1])
    z_rho <- n*(est_rho - 1) - n^2*est_sig^2/s2*(lambda2 - gamma[1])/2
    z_tau <- sqrt(gamma[1]/lambda2)*(est_rho - 1)/est_sig -
      (lambda2 - gamma[1])*n*est_sig/(2*sqrt(lambda2*s2))
    return(c(z_rho, z_tau))
  }


  Stats <- rbind(get_z_stats(M1,1),
                get_z_stats(M2,2),
                get_z_stats(M3,3))


  Sims <- data.frame()
  for (i in 1:n_sims) {
    fake_x <- rnorm(length(x))

    fm <- embed(fake_x, 2)
    fdat <- data.frame(y = fm[, 1], ly = fm[, 2])
    fdat$t <- 1:length(fdat$y)

    fn <- length(fdat$y)

    fM1 <- lm(y ~ ly - 1, fdat) # no drift, no trend
    fM2 <- lm(y ~ ly, fdat) # drift, no trend
    fM3 <- lm(y ~ ly + t, fdat) # drift and trend

    fakeStats <- rbind(get_z_stats(fM1, 1),
                   get_z_stats(fM2, 2),
                   get_z_stats(fM3, 3))

    fakeStats <- data.frame(fakeStats)
    names(fakeStats) <- c("z_rho", "z_tau")
    fakeStats$sim <- i
    fakeStats$cat <- c("No Drift, No Trend", "Drift, No Trend", "Drift and Trend")

    Sims <- rbind(Sims, fakeStats)


  }

  output <- list("Statistics" = Stats,
                 "Simulations" = Sims)


  if(interpret == TRUE) {

    if(pp_stat == "tau") {

      tau_stats <- output$Statistics[1:3, 2]
      tau_results <- do.call(rbind, with(output$Simulations, {
        lapply(split(z_tau, cat), function(x) {
          data.frame(
            mean = mean(x),
            lwr = quantile(x, probs = 0.05),
            upr = quantile(x, probs = 0.95)
          )
        })
      }))

      tau_results$cat <- rownames(tau_results)
      rownames(tau_results) <- NULL

      stat_sum <- paste0("Your tau statistics were ", round(tau_stats[1], 3), " (no drift, no trend), ",
                              round(tau_stats[2], 3), " (drift, no trend), and ", round(tau_stats[3], 3),
                              " (drift and trend). The mean across ", n_sims,
                              " simulations of a white noise time series of the length of your time series returns simulated means (and 95% intervals) of ",
                              round(subset(tau_results, cat == 'No Drift, No Trend')[1,1], 3), " (",
                              round(subset(tau_results, cat == 'No Drift, No Trend')[1,2], 3),", ",
                              round(subset(tau_results, cat == 'No Drift, No Trend')[1,3], 3),") [no drift, no trend], ",
                              round(subset(tau_results, cat == 'Drift, No Trend')[1,1], 3), " (",
                              round(subset(tau_results, cat == 'Drift, No Trend')[1,2], 3),",",
                              round(subset(tau_results, cat == 'Drift, No Trend')[1,3], 3),") [drift, no trend], and ",
                              round(subset(tau_results, cat == 'Drift and Trend')[1,1], 3), " (",
                              round(subset(tau_results, cat == 'Drift and Trend')[1,2], 3),", ",
                              round(subset(tau_results, cat == 'Drift and Trend')[1,3], 3),") [drift and trend]."
      )


    } else if (pp_stat == "rho") {

      rho_stats <- output$Statistics[1:3, 1]


      rho_results <- do.call(rbind, with(output$Simulations, {
        lapply(split(z_rho, cat), function(x) {
          data.frame(
            mean = mean(x),
            lwr = quantile(x, probs = 0.05),
            upr = quantile(x, probs = 0.95)
          )
        })
      }))

      rho_results$cat <- rownames(rho_results)
      rownames(rho_results) <- NULL



      stat_sum <- paste0("Your rho statistics were ", round(rho_stats[1], 3), " (no drift, no trend), ",
                              round(rho_stats[2], 3), " (drift, no trend), and ", round(rho_stats[3], 3),
                              " (drift and trend). The mean across ", n_sims,
                              " simulations of a white noise time series of the length of your time series returns simulated means (and 95% intervals) of ",
                              round(subset(rho_results, cat == 'No Drift, No Trend')[1,1], 3), " (",
                              round(subset(rho_results, cat == 'No Drift, No Trend')[1,2], 3),", ",
                              round(subset(rho_results, cat == 'No Drift, No Trend')[1,3], 3),") [no drift, no trend], ",
                              round(subset(rho_results, cat == 'Drift, No Trend')[1,1], 3), " (",
                              round(subset(rho_results, cat == 'Drift, No Trend')[1,2], 3),",",
                              round(subset(rho_results, cat == 'Drift, No Trend')[1,3], 3),") [drift, no trend], and ",
                              round(subset(rho_results, cat == 'Drift and Trend')[1,1], 3), " (",
                              round(subset(rho_results, cat == 'Drift and Trend')[1,2], 3),", ",
                              round(subset(rho_results, cat == 'Drift and Trend')[1,3], 3),") [drift and trend]."
      )


    }

    interp_message <- paste0(stat_sum, "\n\nFor any one of those three statistics, if your *particular* test statistic is not included in those corresponding intervals, it is strongly suggestive of non-stationarity as it would be incompatible with a pure stationary, white-noise time series across these ", n_sims,
                             " simulations.\n\nThe actual simulations are available to you in the output of this function for further exploration or inference by other intervals.")

    message(interp_message)

  } else {

  }

  return(output)


 }


 spp_test(a, n_sims = 50)
 spp_test(b, n_sims = 50)
	a <- rnorm(50) # this is stationary
	b <- cumsum(rnorm(50)) # this is not

	spp_test <- function(x, lag_short = TRUE, n_sims = 1000, interpret = TRUE,
	pp_stat = "tau") {

	if (!pp_stat %in% c("tau", "rho") \| length(pp_stat) > 1) {
	stop("The only 'pp_stat' arguments that make sense in this context is 'tau' or 'rho'. Pick one of the two.")
	}


	m <- embed(x, 2)
	dat <- data.frame(y = m[, 1], ly = m[, 2])
	dat$t <- 1:length(dat$y)

	n <- length(dat$y)

	M1 <- lm(y ~ ly - 1, dat) # no drift, no trend
	M2 <- lm(y ~ ly, dat) # drift, no trend
	M3 <- lm(y ~ ly + t, dat) # drift and trend


	if(lag_short == TRUE) {
	q <- floor(4*(n/100)^0.25)
	} else {
	q <- floor(12*(n/100)^0.25)
	}

	get_z_stats <- function(mod, m) {
	index <- ifelse(m > 1, 2, 1)
	resids <- resid(mod)
	est_rho <- summary(mod)$coefficients[index,1]
	est_sig <- summary(mod)$coefficients[index,2]
	s2 <- sum(resids^2)/(n - m)
	gamma <- numeric(q + 1)
	for (i in 1:(q + 1)) {
	u <- embed(resids, i)
	gamma[i] = sum(u[,1]*u[,i])/n
	}
	lambda2 <- gamma[1] + 2sum((1 - 1:q/(q + 1))gamma[-1])
	z_rho <- n(est_rho - 1) - n^2est_sig^2/s2*(lambda2 - gamma[1])/2
	z_tau <- sqrt(gamma[1]/lambda2)*(est_rho - 1)/est_sig -
	(lambda2 - gamma[1])nest_sig/(2sqrt(lambda2s2))
	return(c(z_rho, z_tau))
	}


	Stats <- rbind(get_z_stats(M1,1),
	get_z_stats(M2,2),
	get_z_stats(M3,3))


	Sims <- data.frame()
	for (i in 1:n_sims) {
	fake_x <- rnorm(length(x))

	fm <- embed(fake_x, 2)
	fdat <- data.frame(y = fm[, 1], ly = fm[, 2])
	fdat$t <- 1:length(fdat$y)

	fn <- length(fdat$y)

	fM1 <- lm(y ~ ly - 1, fdat) # no drift, no trend
	fM2 <- lm(y ~ ly, fdat) # drift, no trend
	fM3 <- lm(y ~ ly + t, fdat) # drift and trend

	fakeStats <- rbind(get_z_stats(fM1, 1),
	get_z_stats(fM2, 2),
	get_z_stats(fM3, 3))

	fakeStats <- data.frame(fakeStats)
	names(fakeStats) <- c("z_rho", "z_tau")
	fakeStats$sim <- i
	fakeStats$cat <- c("No Drift, No Trend", "Drift, No Trend", "Drift and Trend")

	Sims <- rbind(Sims, fakeStats)


	}

	output <- list("Statistics" = Stats,
	"Simulations" = Sims)


	if(interpret == TRUE) {

	if(pp_stat == "tau") {

	tau_stats <- output$Statistics[1:3, 2]
	tau_results <- do.call(rbind, with(output$Simulations, {
	lapply(split(z_tau, cat), function(x) {
	data.frame(
	mean = mean(x),
	lwr = quantile(x, probs = 0.05),
	upr = quantile(x, probs = 0.95)
	)
	})
	}))

	tau_results$cat <- rownames(tau_results)
	rownames(tau_results) <- NULL

	stat_sum <- paste0("Your tau statistics were ", round(tau_stats[1], 3), " (no drift, no trend), ",
	round(tau_stats[2], 3), " (drift, no trend), and ", round(tau_stats[3], 3),
	" (drift and trend). The mean across ", n_sims,
	" simulations of a white noise time series of the length of your time series returns simulated means (and 95% intervals) of ",
	round(subset(tau_results, cat == 'No Drift, No Trend')[1,1], 3), " (",
	round(subset(tau_results, cat == 'No Drift, No Trend')[1,2], 3),", ",
	round(subset(tau_results, cat == 'No Drift, No Trend')[1,3], 3),") [no drift, no trend], ",
	round(subset(tau_results, cat == 'Drift, No Trend')[1,1], 3), " (",
	round(subset(tau_results, cat == 'Drift, No Trend')[1,2], 3),",",
	round(subset(tau_results, cat == 'Drift, No Trend')[1,3], 3),") [drift, no trend], and ",
	round(subset(tau_results, cat == 'Drift and Trend')[1,1], 3), " (",
	round(subset(tau_results, cat == 'Drift and Trend')[1,2], 3),", ",
	round(subset(tau_results, cat == 'Drift and Trend')[1,3], 3),") [drift and trend]."
	)


	} else if (pp_stat == "rho") {

	rho_stats <- output$Statistics[1:3, 1]


	rho_results <- do.call(rbind, with(output$Simulations, {
	lapply(split(z_rho, cat), function(x) {
	data.frame(
	mean = mean(x),
	lwr = quantile(x, probs = 0.05),
	upr = quantile(x, probs = 0.95)
	)
	})
	}))

	rho_results$cat <- rownames(rho_results)
	rownames(rho_results) <- NULL



	stat_sum <- paste0("Your rho statistics were ", round(rho_stats[1], 3), " (no drift, no trend), ",
	round(rho_stats[2], 3), " (drift, no trend), and ", round(rho_stats[3], 3),
	" (drift and trend). The mean across ", n_sims,
	" simulations of a white noise time series of the length of your time series returns simulated means (and 95% intervals) of ",
	round(subset(rho_results, cat == 'No Drift, No Trend')[1,1], 3), " (",
	round(subset(rho_results, cat == 'No Drift, No Trend')[1,2], 3),", ",
	round(subset(rho_results, cat == 'No Drift, No Trend')[1,3], 3),") [no drift, no trend], ",
	round(subset(rho_results, cat == 'Drift, No Trend')[1,1], 3), " (",
	round(subset(rho_results, cat == 'Drift, No Trend')[1,2], 3),",",
	round(subset(rho_results, cat == 'Drift, No Trend')[1,3], 3),") [drift, no trend], and ",
	round(subset(rho_results, cat == 'Drift and Trend')[1,1], 3), " (",
	round(subset(rho_results, cat == 'Drift and Trend')[1,2], 3),", ",
	round(subset(rho_results, cat == 'Drift and Trend')[1,3], 3),") [drift and trend]."
	)


	}

	interp_message <- paste0(stat_sum, "\n\nFor any one of those three statistics, if your particular test statistic is not included in those corresponding intervals, it is strongly suggestive of non-stationarity as it would be incompatible with a pure stationary, white-noise time series across these ", n_sims,
	" simulations.\n\nThe actual simulations are available to you in the output of this function for further exploration or inference by other intervals.")

	message(interp_message)

	} else {

	}

	return(output)


	}


	spp_test(a, n_sims = 50)
	spp_test(b, n_sims = 50)