Last active
September 6, 2024 19:25
-
-
Save vankesteren/ceac6f46a94d02c2cc7ad3fb2ca0da16 to your computer and use it in GitHub Desktop.
Stop storing huge full-factorial grids, start using virtual grids. It behaves like a data frame but uses only a fraction of the memory!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
virtual_grid <- function(...) { | |
pars <- list(...) | |
lens <- vapply(pars, length, 1L) | |
return(structure(list(pars = pars, lens = lens), class = c("vgrid", "tbl_lazy"))) | |
} | |
dim.vgrid <- function(x) { | |
as.integer(c(prod(x$lens), length(x$lens))) | |
} | |
collect.vgrid <- function(x, ...) { | |
do.call(expand_grid, args = x$pars) | |
} | |
print.vgrid <- function(x, ...) { | |
cat("A virtual grid: ", nrow(x), "x", ncol(x), "\n\n") | |
if (nrow(x) < 7) | |
print(collect(x)) | |
else { | |
cat("First three rows:\n") | |
print(head(x, 3)) | |
cat("\nLast three rows:\n") | |
print(tail(x, 3), sum = "") | |
} | |
} | |
collect_vgrid_element <- function(x, i, j) { | |
if (i > nrow(x)) rlang::abort("Can't subset rows past the end!") | |
if (j > ncol(x)) rlang::abort("Can't subset columns past the end!") | |
# how many repetitions for each value? | |
reps <- if (j == ncol(x)) 1 else prod(x$lens[(j + 1):ncol(x)]) | |
idx <- ceiling((i / reps) %% x$lens[j]) | |
if (idx == 0) idx <- x$lens[j] | |
el <- x$pars[[j]][idx] | |
names(el) <- names(x$lens)[j] | |
el | |
} | |
collect_vgrid_row <- function(x, i) { | |
# if (i > nrow(x)) return(as_tibble(lapply(x$pars))) | |
row_list <- lapply(seq_along(x$pars), function(j) { | |
collect_vgrid_element(x, i, j) | |
}) | |
names(row_list) <- names(x$lens) | |
return(structure(as_tibble(row_list), class = c("vgrid_slice", "tbl_df", "tbl", "data.frame"))) | |
} | |
collect_vgrid_col <- function(x, j) { | |
col <- vapply(1:nrow(x), function(i) collect_vgrid_element(x, i, j), x$pars[[j]][1]) | |
col <- as_tibble(col) | |
names(col) <- names(x$lens)[j] | |
return(col) | |
} | |
tbl_sum.vgrid_slice <- function(x, ...) { | |
paste("A virtual grid slice:", nrow(x), "x", ncol(x)) | |
} | |
`[.vgrid` <- function(x, i, j, drop = FALSE, ...) { | |
i_arg <- substitute(i) | |
j_arg <- substitute(j) | |
if (missing(i)) { | |
i <- NULL | |
i_arg <- NULL | |
} else if (is.null(i)) { | |
i <- integer() | |
} | |
if (missing(j)) { | |
j <- NULL | |
j_arg <- NULL | |
} else if (is.null(j)) { | |
j <- integer() | |
} | |
# Ignore drop as an argument for counting | |
n_real_args <- nargs() - !missing(drop) | |
# Column or matrix subsetting if nargs() == 2L | |
if (n_real_args <= 2L) { | |
if (!missing(drop)) { | |
rlang::warn("`drop` argument ignored for subsetting a virtual grid with `x[j]`, it has an effect only for `x[i, j]`.") | |
drop <- FALSE | |
} | |
j <- i | |
i <- NULL | |
j_arg <- i_arg | |
i_arg <- NULL | |
} | |
if (is.null(j)) { | |
if (is.null(i) | length(i) < 1) { | |
rlang::warn("No indices found") | |
return() | |
} | |
if (length(i) == 1) { | |
return(collect_vgrid_row(x, i)) | |
} | |
return(bind_rows(lapply(i, collect_vgrid_row, x = x))) | |
} | |
if (is.null(i)) { | |
# column collection | |
if (length(j == 1)) { | |
return(collect_vgrid_col(x, j)) | |
} | |
return(bind_cols(lapply(j, collect_vgrid_col, x = x))) | |
} | |
# full slicing | |
if (length(i) == 1) { | |
if (length(j) == 1) { | |
val <- collect_vgrid_element(x, i, j) | |
val <- as_tibble(val) | |
names(val) <- names(x$lens[j]) | |
return(val) | |
} | |
return(collect_vgrid_row(x, i)[,j]) | |
} | |
if (length(i) > 1) { | |
return(bind_rows(lapply(i, collect_vgrid_row, x = x))[,j]) | |
} | |
} |
Author
vankesteren
commented
Sep 6, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment