MilesMcBain · August 18, 2021 05:11
diff --git a/base_r_travel_time_sim.R b/base_r_travel_time_sim.R
 library(tidyverse)

 do_sim <- function() {

  n_locations <- 1000
  n_stations <- 10
  n_sources <- n_stations + n_locations
  n_incidents <- 20000
  n_replicates <- 100
  probabilities <- seq(0.7, 1, by = 0.05)
  names(probabilities) <- formatC(probabilities, format = "f", digits = 2)

  # some pretend table
  stations <- tibble(index = seq_len(n_locations + n_stations), name = "foo")

  # a big matrix of travel_times
  travel_times <- matrix(
    runif(n_incidents * (n_locations + n_stations), 0, 40 * 60) |> as.integer(),
    nrow = n_incidents,
    ncol = n_stations + n_locations
  ) |> t()
  # rows = stations
  # cols = incidents

  # we need matrix of travel times for each scenario
  # assuming the matrix times run stations, notional locations
  station_travel_times <- travel_times[seq(n_stations), ]
  notional_station_travel_times <- travel_times[n_stations + seq(n_locations), ]

  fastest_n <- function(col, n = 2) {
    sorted <- .Internal(qsort(col, TRUE))
    matrix(
      c(
        sorted$x[seq(n)],
        sorted$ix[seq(n)]
      ),
      byrow = FALSE,
      nrow = n,
      ncol = 2
    )
  }

  scenario_travel_times <-
    vapply(
      asplit(notional_station_travel_times, 1),
      function(a_row) {
        scenario_travel_time_mat <- rbind(
          station_travel_times,
          a_row
        )
      },
      array(rep(0, (n_stations + 1) * n_incidents), dim = c(n_stations + 1, n_incidents))
    )
  dim(scenario_travel_times) <- c(n_stations + 1, n_incidents * n_locations)

  fastest_scenario_station_to_incidents <-
    vapply(asplit(scenario_travel_times, 2), fastest_n, matrix(rep(0, 2 * 2), nrow = 2, ncol = 2))
  # row = resulst (1st or 2nd)
  # col1 = times
  # col2 = indexes
  # dimension 3 = incident, scenario

  fastest_station_travel_times_to_incidents <- fastest_scenario_station_to_incidents[, 1, ]
  dim(fastest_station_travel_times_to_incidents) <- c(2, n_incidents, n_locations)

  fastest_station_travel_indexes_to_incidents_unfixed <- fastest_scenario_station_to_incidents[, 2, ]
  dim(fastest_station_travel_indexes_to_incidents_unfixed) <- c(2, n_incidents, n_locations)

  # need to correct indexes so they map to original sources
  fastest_station_travel_indexes_to_incidents <-
    vapply(
      seq(n_locations),
      function(x) {
        layer <- fastest_station_travel_indexes_to_incidents_unfixed[, , x]
        layer[layer == n_stations + 1] <- x + n_stations
        layer
      },
      array(rep(0, 2 * n_incidents), dim = c(2, n_incidents))
    )

  results <- apply(
    fastest_scenario_travel_times_to_incidents,
    3,
    function(travel_time_mat) {
      replicate_incidents <- sample(seq(n_incidents), n_incidents * n_replicates * length(probabilities), replace = TRUE)
      dim(replicate_incidents) <- c(n_incidents, n_replicates, length(probabilities))
      replicate_probabilities <- vapply(
        probabilities,
        function(x) rbinom(n_incidents * n_replicates, 1, 1 - x),
        # we want 0 to be fastest
        matrix(rep(0L, n_incidents, n_replicates), nrow = n_incidents, ncol = n_replicates)
      )
      # dim 1 = incident
      # dim 2 = replicate
      # dim 3 = probability
      time_indexes <- replicate_probabilities + 1
      # We had 0s and 1s now we have 1s and 2s which index into our fastest_times

      replicate_response_times <-
        travel_time_mat[cbind(
          as.vector(time_indexes),
          as.vector(replicate_incidents)
        )]
      dim(replicate_response_times) <- c(n_incidents, n_replicates, length(probabilities))
      replicate_groups <- asplit(replicate_response_times, 2)

      summarise_replicate_group_matrix <- function(replicate_group_matrix) {
        t(apply(replicate_group_matrix, 2, quantile, probs = c(0.5, 0.9)))
      }

      scenario_percentiles <- lapply(replicate_groups, summarise_replicate_group_matrix)
      rectangle <- as.data.frame(do.call(rbind, scenario_percentiles))
      rectangle$probability <- rep(probabilities, n_replicates)
      rectangle$replicate <- rep(seq(n_replicates), each = length(probabilities))

      rectangle
    },
    simplify = FALSE
  )
 
 results
 }

 bench::mark(
  do_sim(),
  max_iterations = 1
 )
	library(tidyverse)

	do_sim <- function() {

	n_locations <- 1000
	n_stations <- 10
	n_sources <- n_stations + n_locations
	n_incidents <- 20000
	n_replicates <- 100
	probabilities <- seq(0.7, 1, by = 0.05)
	names(probabilities) <- formatC(probabilities, format = "f", digits = 2)

	# some pretend table
	stations <- tibble(index = seq_len(n_locations + n_stations), name = "foo")

	# a big matrix of travel_times
	travel_times <- matrix(
	runif(n_incidents * (n_locations + n_stations), 0, 40 * 60) \|> as.integer(),
	nrow = n_incidents,
	ncol = n_stations + n_locations
	) \|> t()
	# rows = stations
	# cols = incidents

	# we need matrix of travel times for each scenario
	# assuming the matrix times run stations, notional locations
	station_travel_times <- travel_times[seq(n_stations), ]
	notional_station_travel_times <- travel_times[n_stations + seq(n_locations), ]

	fastest_n <- function(col, n = 2) {
	sorted <- .Internal(qsort(col, TRUE))
	matrix(
	c(
	sorted$x[seq(n)],
	sorted$ix[seq(n)]
	),
	byrow = FALSE,
	nrow = n,
	ncol = 2
	)
	}

	scenario_travel_times <-
	vapply(
	asplit(notional_station_travel_times, 1),
	function(a_row) {
	scenario_travel_time_mat <- rbind(
	station_travel_times,
	a_row
	)
	},
	array(rep(0, (n_stations + 1) * n_incidents), dim = c(n_stations + 1, n_incidents))
	)
	dim(scenario_travel_times) <- c(n_stations + 1, n_incidents * n_locations)

	fastest_scenario_station_to_incidents <-
	vapply(asplit(scenario_travel_times, 2), fastest_n, matrix(rep(0, 2 * 2), nrow = 2, ncol = 2))
	# row = resulst (1st or 2nd)
	# col1 = times
	# col2 = indexes
	# dimension 3 = incident, scenario

	fastest_station_travel_times_to_incidents <- fastest_scenario_station_to_incidents[, 1, ]
	dim(fastest_station_travel_times_to_incidents) <- c(2, n_incidents, n_locations)

	fastest_station_travel_indexes_to_incidents_unfixed <- fastest_scenario_station_to_incidents[, 2, ]
	dim(fastest_station_travel_indexes_to_incidents_unfixed) <- c(2, n_incidents, n_locations)

	# need to correct indexes so they map to original sources
	fastest_station_travel_indexes_to_incidents <-
	vapply(
	seq(n_locations),
	function(x) {
	layer <- fastest_station_travel_indexes_to_incidents_unfixed[, , x]
	layer[layer == n_stations + 1] <- x + n_stations
	layer
	},
	array(rep(0, 2 * n_incidents), dim = c(2, n_incidents))
	)

	results <- apply(
	fastest_scenario_travel_times_to_incidents,
	3,
	function(travel_time_mat) {
	replicate_incidents <- sample(seq(n_incidents), n_incidents * n_replicates * length(probabilities), replace = TRUE)
	dim(replicate_incidents) <- c(n_incidents, n_replicates, length(probabilities))
	replicate_probabilities <- vapply(
	probabilities,
	function(x) rbinom(n_incidents * n_replicates, 1, 1 - x),
	# we want 0 to be fastest
	matrix(rep(0L, n_incidents, n_replicates), nrow = n_incidents, ncol = n_replicates)
	)
	# dim 1 = incident
	# dim 2 = replicate
	# dim 3 = probability
	time_indexes <- replicate_probabilities + 1
	# We had 0s and 1s now we have 1s and 2s which index into our fastest_times

	replicate_response_times <-
	travel_time_mat[cbind(
	as.vector(time_indexes),
	as.vector(replicate_incidents)
	)]
	dim(replicate_response_times) <- c(n_incidents, n_replicates, length(probabilities))
	replicate_groups <- asplit(replicate_response_times, 2)

	summarise_replicate_group_matrix <- function(replicate_group_matrix) {
	t(apply(replicate_group_matrix, 2, quantile, probs = c(0.5, 0.9)))
	}

	scenario_percentiles <- lapply(replicate_groups, summarise_replicate_group_matrix)
	rectangle <- as.data.frame(do.call(rbind, scenario_percentiles))
	rectangle$probability <- rep(probabilities, n_replicates)
	rectangle$replicate <- rep(seq(n_replicates), each = length(probabilities))

	rectangle
	},
	simplify = FALSE
	)

	results
	}

	bench::mark(
	do_sim(),
	max_iterations = 1
	)