Last active
August 18, 2021 05:11
-
-
Save MilesMcBain/95d3f1e5dcf1d20890e6e3eacf73d3f0 to your computer and use it in GitHub Desktop.
vectorised base R simulation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
do_sim <- function() { | |
n_locations <- 1000 | |
n_stations <- 10 | |
n_sources <- n_stations + n_locations | |
n_incidents <- 20000 | |
n_replicates <- 100 | |
probabilities <- seq(0.7, 1, by = 0.05) | |
names(probabilities) <- formatC(probabilities, format = "f", digits = 2) | |
# some pretend table | |
stations <- tibble(index = seq_len(n_locations + n_stations), name = "foo") | |
# a big matrix of travel_times | |
travel_times <- matrix( | |
runif(n_incidents * (n_locations + n_stations), 0, 40 * 60) |> as.integer(), | |
nrow = n_incidents, | |
ncol = n_stations + n_locations | |
) |> t() | |
# rows = stations | |
# cols = incidents | |
# we need matrix of travel times for each scenario | |
# assuming the matrix times run stations, notional locations | |
station_travel_times <- travel_times[seq(n_stations), ] | |
notional_station_travel_times <- travel_times[n_stations + seq(n_locations), ] | |
fastest_n <- function(col, n = 2) { | |
sorted <- .Internal(qsort(col, TRUE)) | |
matrix( | |
c( | |
sorted$x[seq(n)], | |
sorted$ix[seq(n)] | |
), | |
byrow = FALSE, | |
nrow = n, | |
ncol = 2 | |
) | |
} | |
scenario_travel_times <- | |
vapply( | |
asplit(notional_station_travel_times, 1), | |
function(a_row) { | |
scenario_travel_time_mat <- rbind( | |
station_travel_times, | |
a_row | |
) | |
}, | |
array(rep(0, (n_stations + 1) * n_incidents), dim = c(n_stations + 1, n_incidents)) | |
) | |
dim(scenario_travel_times) <- c(n_stations + 1, n_incidents * n_locations) | |
fastest_scenario_station_to_incidents <- | |
vapply(asplit(scenario_travel_times, 2), fastest_n, matrix(rep(0, 2 * 2), nrow = 2, ncol = 2)) | |
# row = resulst (1st or 2nd) | |
# col1 = times | |
# col2 = indexes | |
# dimension 3 = incident, scenario | |
fastest_station_travel_times_to_incidents <- fastest_scenario_station_to_incidents[, 1, ] | |
dim(fastest_station_travel_times_to_incidents) <- c(2, n_incidents, n_locations) | |
fastest_station_travel_indexes_to_incidents_unfixed <- fastest_scenario_station_to_incidents[, 2, ] | |
dim(fastest_station_travel_indexes_to_incidents_unfixed) <- c(2, n_incidents, n_locations) | |
# need to correct indexes so they map to original sources | |
fastest_station_travel_indexes_to_incidents <- | |
vapply( | |
seq(n_locations), | |
function(x) { | |
layer <- fastest_station_travel_indexes_to_incidents_unfixed[, , x] | |
layer[layer == n_stations + 1] <- x + n_stations | |
layer | |
}, | |
array(rep(0, 2 * n_incidents), dim = c(2, n_incidents)) | |
) | |
results <- apply( | |
fastest_scenario_travel_times_to_incidents, | |
3, | |
function(travel_time_mat) { | |
replicate_incidents <- sample(seq(n_incidents), n_incidents * n_replicates * length(probabilities), replace = TRUE) | |
dim(replicate_incidents) <- c(n_incidents, n_replicates, length(probabilities)) | |
replicate_probabilities <- vapply( | |
probabilities, | |
function(x) rbinom(n_incidents * n_replicates, 1, 1 - x), | |
# we want 0 to be fastest | |
matrix(rep(0L, n_incidents, n_replicates), nrow = n_incidents, ncol = n_replicates) | |
) | |
# dim 1 = incident | |
# dim 2 = replicate | |
# dim 3 = probability | |
time_indexes <- replicate_probabilities + 1 | |
# We had 0s and 1s now we have 1s and 2s which index into our fastest_times | |
replicate_response_times <- | |
travel_time_mat[cbind( | |
as.vector(time_indexes), | |
as.vector(replicate_incidents) | |
)] | |
dim(replicate_response_times) <- c(n_incidents, n_replicates, length(probabilities)) | |
replicate_groups <- asplit(replicate_response_times, 2) | |
summarise_replicate_group_matrix <- function(replicate_group_matrix) { | |
t(apply(replicate_group_matrix, 2, quantile, probs = c(0.5, 0.9))) | |
} | |
scenario_percentiles <- lapply(replicate_groups, summarise_replicate_group_matrix) | |
rectangle <- as.data.frame(do.call(rbind, scenario_percentiles)) | |
rectangle$probability <- rep(probabilities, n_replicates) | |
rectangle$replicate <- rep(seq(n_replicates), each = length(probabilities)) | |
rectangle | |
}, | |
simplify = FALSE | |
) | |
results | |
} | |
bench::mark( | |
do_sim(), | |
max_iterations = 1 | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment