-
-
Save henfiber/e2af7e79f44781cde5ad to your computer and use it in GitHub Desktop.
Updated combo benchmark (25.R,RevoR,loess,LAPACK benchmarks)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##################### | |
## Updated and combo benchmark | |
## Including : | |
## R-benchmark-25.R from Simon Urbanek | |
## Revolution Analytics benchmarks : http://www.revolutionanalytics.com/revolution-revor-enterprise-benchmark-details | |
## Extra benchmarks (loess and eigen) | |
## LAPACK benchmarks (QR,eigen,cholesky,svd,prcomp,balance,kappa,norm,solve) | |
## TODO: | |
## add extra functions like dist() and cluster() - also provided by gputools | |
## separate the gpu from cpu benchmarks, and the analysis from the benchmark code | |
## precompute the matrices used throughout the experiments | |
library(compiler) | |
library(methods) | |
set.seed(1) | |
########################## | |
# Helper functions | |
########################## | |
# return a string with found BLAS/LAPACK library filenames | |
get_numlib <- function(blas_fallback="libblas", lapack_fallback="liblapack") { | |
if(exists("getMKLthreads", mode="function")) { | |
numlib <- "MKL_MKL" | |
} else { | |
blas <- blas_fallback; lapack <- lapack_fallback | |
cmd <- paste0("lsof -p ", Sys.getpid(), " 2>/dev/null | | |
grep 'lib.*blas\\|lib.*lapack\\|lib.*atlas' | | |
awk -v FS='/' '{$0=substr($NF,1,index($NF,\".\")-1)} 1'") | |
p <- pipe(cmd) | |
libs <- readLines(p); close(p) | |
if(any(grepl("atlas", libs))) { | |
str <- libs[grep("atlas", libs)] | |
blas <- unlist(strsplit(str, split=".", fixed = T))[1] | |
lapack <- blas | |
} | |
if(any(grepl("openblas", libs))) { | |
str <- libs[grep("openblas", libs)] | |
blas <- unlist(strsplit(str, split="-"))[1] | |
} | |
if(length(blas) == 0 || blas=="") blas <- blas_fallback | |
if(length(lapack) == 0 || lapack=="") lapack <- lapack_fallback | |
numlib <- paste(blas, lapack, sep = "_") | |
# Compiling the C function to set OpenBLAS number of threads for OpenMP version | |
# needed to run the implicit parallelism test, otherwise it fails | |
if(grepl("openblaso", blas)) { | |
if("OpenBlasThreads" %in% rownames(installed.packages())) { | |
library(OpenBlasThreads) | |
} else { | |
message("Compiling C inline function openblas.set.num.threads()") | |
compile_openblas_num_threads() | |
} | |
} | |
} | |
numlib | |
} | |
# Get a unique hash for this host using Sys.info() | |
# The system host name can be found using this process: | |
# First, this function checks the system environment variables HOST, HOSTNAME, and COMPUTERNAME. | |
# Second, it checks Sys.info()["nodename"] for host name details. | |
# Finally, it tries to query the system command uname -n. | |
build_sysinfo_hash <- function(){ | |
return(substr(digest::digest(Sys.info(), "md5"), 1, 5)) | |
} | |
# build a benchmark name using r version, jit status and loaded math libraries | |
build_benchmark_name <- function(jit_level=0) { | |
numlib <- tryCatch(get_numlib(), | |
error = function(e) warning(paste0("While trying to detect the math libs : ", e$message))) | |
name <- paste( # R.version$os, | |
as.character(getRversion()), | |
paste0("j", jit_level), | |
numlib, | |
sep = "_") | |
if(interactive()) { | |
response <- readline(paste0('The automatic benchmark name is "', name, '". ', | |
"Press enter to accept. OR type a new custom name :\n")) | |
if(response != "") name <- response | |
} | |
name | |
} | |
# Compile inline C function to set OpenBLAS number of threads | |
# Use as: openblas.set.num.threads(4) | |
compile_openblas_num_threads <- function() { | |
require(inline) | |
openblas.set.num.threads <<- cfunction(signature(ipt="integer"), | |
body = 'openblas_set_num_threads(*ipt);', language = "C", convention = ".C", | |
otherdefs = c ('extern void openblas_set_num_threads(int);'), | |
libargs = c ('-L/usr/lib64/ -lblas')) | |
} | |
########################## | |
# Measurement functions | |
########################## | |
init_measurements <- function() { | |
if(!exists("measurements") || is.null(measurements)) { | |
measurements <<- data.frame(timestamp=format(Sys.time(), "%Y%m%dT%H%M%S")) | |
} | |
} | |
add_measurement <- function(name,value) { | |
measurements[1, name] <<- value | |
} | |
# Save measuremt in an .rds file | |
save_measurements <- function() { | |
if(is.null(measurements)) { | |
return(2) | |
} | |
if(!dir.exists("benchmarks_results")) { | |
stop("Cannot find the benchmarks results folder. chdir to the appropriate location") | |
} | |
if(is.null(benchmark_name)) { | |
if(interactive()) | |
benchmark_name <<- readline("Benchmark name? (unique - will override identical benchmark files) : ") | |
else | |
benchmark_name <<- build_benchmark_name(0) | |
} | |
if(!exists("host_hash")) { | |
host_hash <- build_sysinfo_hash() | |
if(interactive()) { | |
response <- readline(paste0("Type a custom hostname if you want here. Otherwise the automatically generated hash ", | |
host_hash, | |
" will be used : ")) | |
if(nchar(response)) host_hash <- response | |
} | |
} | |
saveRDS(measurements, file=paste0("benchmarks_results/bench_", benchmark_name, "_", | |
format(Sys.time(), "%Y%m%dT%H%M%S"), "_", | |
host_hash, | |
".rds")) | |
} | |
# Run and measure a benchmarking function/expression | |
run_and_measure <- function(name, expr, runs=1){ | |
#expr <- substitute(expr) # Prevent early evaluation | |
init_measurements() | |
cat("###", name, "\n\n") | |
timing <- unclass(system.time(expr))[1:3] | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
# Print the timing | |
print(timing) | |
cat("\n") | |
} | |
# MAIN function - Run a whole experiment | |
run_experiment <- function(expr, jit_level=0) { | |
library(compiler) | |
enableJIT(jit_level) | |
# Use the same seed across runs | |
set.seed(1) | |
gc() | |
# Initialize measurements data frame | |
measurements <<- data.frame(timestamp=format(Sys.time(), "%Y%m%dT%H%M%S")) | |
# set the benchmark name | |
benchmark_name <<- build_benchmark_name(jit_level) | |
rownames(measurements)[1] <<- benchmark_name | |
# Run the experiment | |
cat("# EXPERIMENT:", benchmark_name, "\n\n") | |
expr | |
cat("\n") | |
} | |
##################################### | |
# Loading and ranking experiments | |
##################################### | |
# Load measurements saved in .rds files | |
load_measurements <- function(pattern='bench_.*', this_engine=TRUE){ | |
if(!dir.exists("benchmarks_results")) { | |
stop("Cannot find the benchmarks results folder. chdir to the appropriate location") | |
} | |
if(this_engine) | |
pattern <- paste0(pattern, build_sysinfo_hash(), "[.]+rds") | |
else | |
pattern <- paste0(pattern, "[.]+rds") | |
found <- list.files(path = "benchmarks_results", pattern = pattern) | |
if(length(found) > 0) { | |
past_measurements <<- data.frame(timestamp=format(Sys.time(), "%Y%m%dT%H%M%S"), stringsAsFactors = F) | |
for (f in found) { | |
cur <- readRDS(paste0("benchmarks_results/", f)) | |
name <- rownames(cur) | |
if(!this_engine) name <- paste0(name, "_", sub("(.*_)([A-Za-z0-9]+)[.]+rds", "\\2", f)) | |
cur <- cbind(data.frame(name=name, stringsAsFactors = F), cur) | |
past_measurements <<- merge(past_measurements, cur, all=T) | |
} | |
past_measurements <<- past_measurements[complete.cases(past_measurements), ] | |
} else { | |
message("No past experiments have been found.") | |
} | |
} | |
load_memory_measurements <- function(pattern='*.csv') { | |
if(!dir.exists("benchmarks_results")) { | |
stop("Cannot find the benchmarks results folder. chdir to the appropriate location") | |
} | |
past_measurements <<- do.call("rbind", lapply(list.files(path = "benchmarks_results", | |
pattern = pattern), function(f) { | |
d <- read.csv(paste0("benchmarks_results/", f), stringsAsFactors = F) | |
values <- t(d$mem_kb) | |
colnames(values) <- t(d$func) | |
rownames(values) <- f | |
data.frame(values) | |
})) | |
} | |
# Annotate experiments extracting info from the experiment name | |
annotate_experiments <- function() { | |
if(!exists("past_measurements")) | |
load_measurements() | |
m <- past_measurements | |
single_threaded <- c("libblas", "liblapack", "libsatlas", "libopenblas") | |
openblas_versions <- c("libopenblas", "libopenblasp", "libopenblaso") | |
atlas_versions <- c("libtatlas", "libsatlas") | |
mkl_versions <- c("MKL") | |
# Parse information from experiment name | |
s <- strsplit(m$name, '_') | |
d <- do.call("rbind", lapply(s, function(l){ | |
data.frame( r_version = l[1], | |
jit = substr(l[2], 2, 2), | |
blas = l[3], | |
lapack = if(length(l)>3) l[4] else l[3] | |
) | |
})) | |
d$blas_opt = as.factor(ifelse(d$blas == "libblas", "default", "optimized")) | |
d$lapack_opt = as.factor(ifelse(d$lapack == "liblapack", "default", "optimized")) | |
d$blas_threaded = as.factor(ifelse(d$blas %in% single_threaded, FALSE, TRUE)) | |
d$lapack_threaded = as.factor(ifelse(d$lapack %in% single_threaded, FALSE, TRUE)) | |
d$blas_family = as.factor(ifelse(d$blas %in% openblas_versions, "openblas", | |
ifelse(d$blas %in% atlas_versions, "atlas", | |
ifelse(d$blas %in% mkl_versions, "mkl", "default")))) | |
(m <- cbind(d, m)) | |
} | |
# Rank experiments - according to different scores | |
# Overall/clock-time means, most wins, highest ratios etc. | |
rank_experiments <- function() { | |
if(!exists("past_measurements")) | |
load_measurements() | |
# Index some categories of columns | |
cols_numeric <- 3:ncol(past_measurements) | |
cols_clock <- which(!grepl("^cput_", colnames(past_measurements))) | |
cols_clock <- cols_clock[-c(1,2)] | |
cols_cput <- which(grepl("^cput_", colnames(past_measurements))) | |
cols_parallel <- which(grepl("_PARALLEL$", colnames(past_measurements))) | |
cols_aggregated <- which(grepl("_ALL$", colnames(past_measurements))) | |
# We define geometric mean and trimmed geommetric mean | |
gmean <- function(x) {exp(mean(log(x)))} | |
trgmean <- function(x) {sort(x); exp(mean(log(x[2:length(x)-1])))} | |
# Rank by arithmetic means | |
means <- rowMeans(past_measurements[, cols_numeric]) | |
print("Rank by overall arithmetic means") | |
print(paste(past_measurements$name[order(means)], sort(means))) | |
# Rank by clock-time means | |
clock_means <- rowMeans(past_measurements[, cols_clock]) | |
print("Rank by clock-time means") | |
print(paste(past_measurements$name[order(clock_means)], sort(clock_means))) | |
# Rank by Geometric means | |
geom_means <- apply(past_measurements[, cols_numeric], 1, gmean) | |
print("Rank by overall geometric means") | |
print(paste(past_measurements$name[order(geom_means)], sort(geom_means))) | |
# Rank by Geometric clock-time means | |
clock_geom_means <- apply(past_measurements[, cols_clock], 1, gmean) | |
print("Rank by clock-time geometric means") | |
print(paste(past_measurements$name[order(clock_geom_means)], sort(clock_geom_means))) | |
# Rank by most wins | |
wins <- apply(past_measurements[, cols_numeric], 2, function(x) which.min(x)) | |
wins <- data.frame(idx=names(table(wins)), wins=as.numeric(table(wins)), stringsAsFactors = F) | |
# Add missing values, i.e. the experiments with no wins | |
wins <- merge(data.frame(idx=1:nrow(past_measurements)), wins, all = T) | |
wins$wins[is.na(wins$wins)] <- 0 | |
print("Rank by most wins") | |
print(paste(past_measurements[order(wins$wins, decreasing=T), ]$name, sort(wins$wins, decr=T))) | |
# Rank by most clock-time wins | |
wins <- apply(past_measurements[, cols_clock], 2, function(x) which.min(x)) | |
wins <- data.frame(idx=names(table(wins)), wins=as.numeric(table(wins)), stringsAsFactors = F) | |
# Add missing values, i.e. the experiments with no wins | |
wins <- merge(data.frame(idx=1:nrow(past_measurements)), wins, all = T) | |
wins$wins[is.na(wins$wins)] <- 0 | |
print("Rank by most clock-time wins") | |
print(paste(past_measurements[order(wins$wins, decreasing=T), ]$name, sort(wins$wins, decr=T))) | |
# Rank by most impressive win! | |
best <- apply(past_measurements[, cols_clock], 2, function(x) which.min(x)) | |
best <- data.frame(idx=names(best), name=past_measurements$name[best], stringsAsFactors = F) | |
best$ratio <- apply(past_measurements[, cols_clock], 2, function(x) max(x)/min(x)) | |
best <- best[order(best$ratio, decreasing = T), ] | |
print("Rank by the highest speedup/ratio in a single score") | |
print(best) | |
# Rank by the most marginal winner! (the highest difference from the second) | |
best_diff <- do.call("rbind", apply(past_measurements[, cols_clock], 2, function(x) { | |
best <- which.min(x) | |
x <- sort(x) | |
data.frame(id=best, ratio=x[2]/x[1], stringsAsFactors = F) | |
})) | |
best_diff$name <- past_measurements$name[best_diff$id] | |
best_diff <- best_diff[order(best_diff$ratio, decreasing = T), ] | |
print("Rank by the highest marginal difference from the second in a single score") | |
print(best_diff) | |
# Rank by highest parallelization ratio - means_cput / means_clock | |
cols <- colnames(past_measurements) | |
cols_cput_paired <- setdiff(setdiff(cols_cput, cols_parallel), cols_aggregated) | |
cols_clock_paired <- which(cols %in% sub("cput_", "clock_", cols[cols_cput_paired])) | |
cput_clock_ratios <- rowMeans(past_measurements[, cols_cput_paired]) / | |
rowMeans(past_measurements[, cols_clock_paired]) | |
print("Rank by highest clock-time/cpu-time ratio") | |
print(paste(past_measurements$name[order(cput_clock_ratios, decreasing = T)], | |
sort(cput_clock_ratios, decreasing = T))) | |
# Rank by overall scaled arithmetic means of CPU times | |
clock_scaled_means <- rowMeans(scale(past_measurements[, cols_cput], center = F)) | |
print("Rank by overall scaled CPU times arithmetic means") | |
print(paste(past_measurements$name[order(clock_scaled_means)], sort(clock_scaled_means))) | |
# Rank by overall scaled arithmetic means of clock times | |
clock_scaled_means <- rowMeans(scale(past_measurements[, cols_clock], center = F)) | |
print("Rank by overall scaled clock-times arithmetic means") | |
print(paste(past_measurements$name[order(clock_scaled_means)], sort(clock_scaled_means))) | |
# Rank by overall scaled geometric means (trimmed) of clock times | |
clock_scaled_gmeans <- apply(scale(past_measurements[, cols_clock], center = F), | |
1, trgmean) | |
print("Rank by overall scaled clock-times (trimmed) geometric means") | |
print(paste(past_measurements$name[order(clock_scaled_gmeans)], sort(clock_scaled_gmeans))) | |
} | |
top_experiments <- function(n_best=8) { | |
if(!exists("past_measurements")) | |
load_measurements() | |
cols_numeric <- 3:ncol(past_measurements) | |
cols_clock <- which(!grepl("^cput_", colnames(past_measurements))) | |
cols_clock <- cols_clock[-c(1,2)] | |
# Rank by most clock-time wins | |
wins <- apply(past_measurements[, cols_clock], 2, function(x) which.min(x)) | |
wins <- data.frame(idx=names(table(wins)), wins=as.numeric(table(wins)), stringsAsFactors = F) | |
# Add missing values, i.e. the experiments with no wins | |
wins <- merge(data.frame(idx=1:nrow(past_measurements)), wins, all = T) | |
wins$wins[is.na(wins$wins)] <- 0 | |
head(past_measurements[order(wins$wins, decreasing=T), ]$name, n_best) | |
} | |
plot_experiments <- function(){ | |
library(ggplot2) | |
library(reshape2) | |
if(!exists("past_measurements")) | |
load_measurements() | |
# the first column is a timestamp, the second an id of the experiment | |
measure.vars <- colnames(past_measurements)[-c(1,2)] | |
# define the attribute columns | |
ids <- c("name", "blas", "lapack", "jit", | |
"r_version", "blas_opt", "lapack_opt", | |
"blas_threaded", "lapack_threaded") | |
# Annotate experiments with metadata about the math libraries, r version and jit level used | |
m <- annotate_experiments() | |
# columns with cpu-time measurements | |
cols_cput <- colnames(m)[which(grepl("^cput_", colnames(m)))] | |
# columns with geometric means (cloc-time) measurements | |
cols_gmean <- colnames(m)[which(grepl("^trimgmean_", colnames(m)))] | |
# columns with aggregated clock-time measurements | |
cols_total <- colnames(m)[which(grepl("^clock.*_ALL$|^clock_PARALLEL$", colnames(m)))] | |
# columns with clock-time measurements | |
cols_clock <- colnames(m)[which(grepl("^clock_|^trimgmean_", colnames(m)))] | |
cols_clock <- setdiff(cols_clock, cols_total) | |
# Convert measurements to long format | |
m_melt <- melt(m, id.vars = ids, | |
measure.vars = measure.vars) | |
m_melt$measurement <- as.character(m_melt$variable) | |
m_melt$value <- as.numeric(m_melt$value) | |
# Plot single columns | |
qplot(name, clock_PARALLEL, data=m) + coord_flip() | |
# Plot multiple measurements | |
measurements_total <- subset(m_melt, measurement %in% cols_total) | |
measurements_clock <- subset(m_melt, measurement %in% cols_clock) | |
measurements_clock_threaded <- subset(measurements_clock, blas_threaded == TRUE) | |
measurements_openblasp <- subset(measurements_clock, blas == "libopenblasp") | |
measurements_clock_top <- subset(measurements_clock, name %in% top_experiments()) | |
# Line charts | |
g <- ggplot(measurements_total, aes(x=measurement, y=value, color=name, group=name)) + | |
scale_fill_brewer(type="qual", palette = 1) + | |
theme(axis.text.x = element_text(angle=90)) | |
g + geom_line(data=measurements_clock_threaded, position = "jitter") | |
# Bar charts | |
g <- ggplot(measurements_total, aes(x=measurement, y=value, fill=name)) + | |
scale_fill_brewer(type="qual") | |
# Bar chart of cols_total | |
g + geom_bar(data=measurements_total, stat="identity", position="dodge") | |
# Bar chart of cols_clock | |
g + geom_bar(data=measurements_clock, stat="identity", position="dodge") + coord_flip() | |
# Bar chart of cols_clock for threaded libraries | |
g + geom_bar(data=measurements_clock_threaded, stat="identity", position="dodge") + coord_flip() | |
# Bar chart of cols_clock for topN experiments across other settings | |
g + geom_bar(data=measurements_clock_top, stat="identity", position="dodge") + coord_flip() | |
} | |
########################################## | |
## Benchmark functions | |
########################################## | |
## R Benchmark 2.5 ## | |
## adapted as a function | |
# http://r.research.att.com/benchmarks/R-benchmark-25.R | |
# R Benchmark 2.5 (06/2008) [Simon Urbanek] | |
# version 2.5: scaled to get roughly 1s per test, R 2.7.0 @ 2.6GHz Mac Pro | |
# R Benchmark 2.4 (06/2008) [Simon Urbanek] | |
# version 2.4 adapted to more recent Matrix package | |
# R Benchmark 2.3 (21 April 2004) | |
# Warning: changes are not carefully checked yet! | |
# version 2.3 adapted to R 1.9.0 | |
# Many thanks to Douglas Bates ([email protected]) for improvements! | |
# version 2.2 adapted to R 1.8.0 | |
# version 2.1 adapted to R 1.7.0 | |
# version 2, scaled to get 1 +/- 0.1 sec with R 1.6.2 | |
# using the standard ATLAS library (Rblas.dll) | |
# on a Pentium IV 1.6 Ghz with 1 Gb Ram on Win XP pro | |
# revised and optimized for R v. 1.5.x, 8 June 2002 | |
# Requires additionnal libraries: Matrix, SuppDists | |
# Author : Philippe Grosjean | |
# eMail : [email protected] | |
# Web : http://www.sciviews.org | |
# License: GPL 2 or above at your convenience (see: http://www.gnu.org) | |
# | |
# Several tests are adapted from the Splus Benchmark Test V. 2 | |
# by Stephan Steinhaus ([email protected]) | |
# Reference for Escoufier's equivalents vectors (test III.5): | |
# Escoufier Y., 1970. Echantillonnage dans une population de variables | |
# aleatoires réelles. Publ. Inst. Statis. Univ. Paris 19 Fasc 4, 1-47. | |
#### | |
rbenchmark <- function(runs=3) { | |
set.seed (1) | |
runs <- runs # Number of times the tests are executed | |
times <- rep(0, 15); dim(times) <- c(5,3) | |
require(Matrix) # Optimized matrix operations | |
# Runif <- rMWC1019 # The fast uniform number generator | |
Runif <- runif | |
# If you don't have SuppDists, you can use: Runif <- runif | |
# a <- rMWC1019(10, new.start=TRUE, seed=492166) # Init. the generator | |
# Rnorm <- rziggurat # The fast normal number generator | |
# If you don't have SuppDists, you can use: Rnorm <- rnorm | |
# b <- rziggurat(10, new.start=TRUE) # Init. the generator | |
Rnorm <- rnorm | |
# remove("a", "b") | |
options(object.size=100000000) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
cat("## Synthetic benchmarks (2.5)\n\n") | |
cat(c("Number of times each test is run__________________________: ", runs)) | |
cat("\n\n") | |
cat("### I. Matrix calculation\n\n") | |
# (1) | |
name = "MAT_NEW_T_DEF" | |
cumulate <- numeric(5); a <- 0; b <- 0 | |
for (i in 1:runs) { | |
invisible(gc()) | |
timing <- system.time({ | |
a <- matrix(Rnorm(2500*2500)/10, ncol=2500, nrow=2500); | |
b <- t(a); | |
dim(b) <- c(1250, 5000); | |
a <- t(b) | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("a", "b") | |
timing <- cumulate/runs | |
times[1, 1] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("Creation, transp., deformation of a 2500x2500 matrix (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (2) | |
name = "MAT_POW_2500" | |
cumulate <- numeric(5); b <- 0 | |
for (i in 1:runs) { | |
a <- abs(matrix(Rnorm(2500*2500)/2, ncol=2500, nrow=2500)); | |
invisible(gc()) | |
timing <- system.time({ | |
b <- a^1000 | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("a", "b") | |
timing <- cumulate/runs | |
times[2, 1] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("2500x2500 normal distributed random matrix ^1000____ (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (3) | |
name = "SORT_7M" | |
cumulate <- numeric(5); b <- 0 | |
for (i in 1:runs) { | |
a <- Rnorm(7000000) | |
invisible(gc()) | |
timing <- system.time({ | |
b <- sort(a, method="quick") # Sort is modified in v. 1.5.x | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("a", "b") | |
timing <- cumulate/runs | |
times[3, 1] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("Sorting of 7,000,000 random values__________________ (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (4) | |
name = "CROSS_PROD_2800" | |
cumulate <- numeric(5); b <- 0 | |
for (i in 1:runs) { | |
a <- Rnorm(2800*2800); dim(a) <- c(2800, 2800) | |
invisible(gc()) | |
timing <- system.time({ | |
b <- crossprod(a) # equivalent to: b <- t(a) %*% a | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("a", "b") | |
timing <- cumulate/runs | |
times[4, 1] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("2800x2800 cross-product matrix (b = a' * a)_________ (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (5) | |
name = "LM_CUSTOM" | |
cumulate <- numeric(5); c <- 0; qra <-0 | |
for (i in 1:runs) { | |
a <- new("dgeMatrix", x = Rnorm(2000*2000), Dim = as.integer(c(2000,2000))) | |
b <- as.double(1:2000) | |
invisible(gc()) | |
timing <- system.time({ | |
c <- solve(crossprod(a), crossprod(a,b)) | |
}) | |
cumulate <- cumulate + timing | |
# This is the old method | |
# a <- Rnorm(600*600); dim(a) <- c(600,600) | |
# b <- 1:600 | |
# invisible(gc()) | |
# timing <- system.time({ | |
# qra <- qr(a, tol = 1e-7); | |
# c <- qr.coef(qra, b) | |
# #Rem: a little faster than c <- lsfit(a, b, inter=F)$coefficients | |
# })[3] | |
# cumulate <- cumulate + timing | |
} | |
remove("a", "b", "c", "qra") | |
timing <- cumulate/runs | |
times[5, 1] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("Linear regr. over a 3000x3000 matrix (c = a \\ b')___ (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# Summary of Matrix calculation tests | |
times[ , 1] <- sort(times[ , 1]) | |
cat(" --------------------------------------------\n") | |
cat(c(" Trimmed geom. mean (2 extremes eliminated): ", exp(mean(log(times[2:4, 1]))), "\n\n")) | |
# Add the overall measurement | |
add_measurement(paste0("trimgmean_", "MATRIX_CALC"), exp(mean(log(times[2:4, 1])))) | |
cat("### II. Matrix functions\n\n") | |
if (R.Version()$os == "Win32") flush.console() | |
# (1) | |
name = "FFT" | |
cumulate <- numeric(5); b <- 0 | |
for (i in 1:runs) { | |
a <- Rnorm(2400000) | |
invisible(gc()) | |
timing <- system.time({ | |
b <- fft(a) | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("a", "b") | |
timing <- cumulate/runs | |
times[1, 2] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("FFT over 2,400,000 random values____________________ (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (2) | |
name = "EIGEN_600" | |
cumulate <- numeric(5); b <- 0 | |
for (i in 1:runs) { | |
a <- array(Rnorm(600*600), dim = c(600, 600)) | |
# Only needed if using eigen.Matrix(): Matrix.class(a) | |
invisible(gc()) | |
timing <- system.time({ | |
b <- eigen(a, symmetric=FALSE, only.values=TRUE)$Value | |
# Rem: on my machine, it is faster than: | |
# b <- La.eigen(a, symmetric=F, only.values=T, method="dsyevr")$Value | |
# b <- La.eigen(a, symmetric=F, only.values=T, method="dsyev")$Value | |
# b <- eigen.Matrix(a, vectors = F)$Value | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("a", "b") | |
timing <- cumulate/runs | |
times[2, 2] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("Eigenvalues of a 640x640 random matrix______________ (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (3) | |
name = "DET_2500" | |
cumulate <- numeric(5); b <- 0 | |
for (i in 1:runs) { | |
a <- Rnorm(2500*2500); dim(a) <- c(2500, 2500) | |
#Matrix.class(a) | |
invisible(gc()) | |
timing <- system.time({ | |
#b <- determinant(a, logarithm=F) | |
# Rem: the following is slower on my computer! | |
# b <- det.default(a) | |
b <- det(a) | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("a", "b") | |
timing <- cumulate/runs | |
times[3, 2] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("Determinant of a 2500x2500 random matrix____________ (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (4) | |
name = "CHOL_3K" | |
cumulate <- numeric(5); b <- 0 | |
for (i in 1:runs) { | |
a <- crossprod(new("dgeMatrix", x = Rnorm(3000*3000), | |
Dim = as.integer(c(3000, 3000)))) | |
invisible(gc()) | |
#a <- Rnorm(900*900); dim(a) <- c(900, 900) | |
#a <- crossprod(a, a) | |
timing <- system.time({ | |
b <- chol(a) | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("a", "b") | |
timing <- cumulate/runs | |
times[4, 2] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("Cholesky decomposition of a 3000x3000 matrix________ (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (5) | |
name = "INVERSE_1600" | |
cumulate <- numeric(5); b <- 0 | |
for (i in 1:runs) { | |
a <- new("dgeMatrix", x = Rnorm(1600*1600), Dim = as.integer(c(1600, 1600))) | |
invisible(gc()) | |
# a <- Rnorm(400*400); dim(a) <- c(400, 400) | |
timing <- system.time({ | |
# b <- qr.solve(a) | |
# Rem: a little faster than | |
b <- solve(a) | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("a", "b") | |
timing <- cumulate/runs | |
times[5, 2] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("Inverse of a 1600x1600 random matrix________________ (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# Total time for Matrix functions | |
times[ , 2] <- sort(times[ , 2]) | |
cat(" --------------------------------------------\n") | |
cat(c(" Trimmed geom. mean (2 extremes eliminated): ", exp(mean(log(times[2:4, 2]))), "\n\n")) | |
# Add the overall measurement | |
add_measurement(paste0("trimgmean_", "MATRIX_FUNC"), exp(mean(log(times[2:4, 2])))) | |
cat("### III. Programmation\n\n") | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (1) | |
name = "FIBO_3.5M" | |
cumulate <- numeric(5); a <- 0; b <- 0; phi <- 1.6180339887498949 | |
for (i in 1:runs) { | |
a <- floor(Runif(3500000)*1000) | |
invisible(gc()) | |
timing <- system.time({ | |
b <- (phi^a - (-phi)^(-a))/sqrt(5) | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("a", "b", "phi") | |
timing <- cumulate/runs | |
times[1, 3] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("3,500,000 Fibonacci numbers calculation (vector calc)(sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (2) | |
name = "HILBERT_3K" | |
cumulate <- numeric(5); a <- 3000; b <- 0 | |
for (i in 1:runs) { | |
invisible(gc()) | |
timing <- system.time({ | |
b <- rep(1:a, a); dim(b) <- c(a, a); | |
b <- 1 / (t(b) + 0:(a-1)) | |
# Rem: this is twice as fast as the following code proposed by R programmers | |
# a <- 1:a; b <- 1 / outer(a - 1, a, "+") | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("a", "b") | |
timing <- cumulate/runs | |
times[2, 3] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("Creation of a 3000x3000 Hilbert matrix (matrix calc) (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (3) | |
name = "GCD_400K" | |
cumulate <- numeric(5); c <- 0 | |
# Define the gcd function | |
gcd2 <- function(x, y) {if (sum(y > 1.0E-4) == 0) x else {y[y == 0] <- x[y == 0]; Recall(y, x %% y)}} | |
for (i in 1:runs) { | |
a <- ceiling(Runif(400000)*1000) | |
b <- ceiling(Runif(400000)*1000) | |
invisible(gc()) | |
timing <- system.time({ | |
c <- gcd2(a, b) # gcd2 is a recursive function | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("a", "b", "c", "gcd2") | |
timing <- cumulate/runs | |
times[3, 3] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("Grand common divisors of 400,000 pairs (recursion)__ (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (4) | |
name = "TOEPLITZ_500" | |
cumulate <- numeric(5); b <- 0 | |
for (i in 1:runs) { | |
b <- rep(0, 500*500); dim(b) <- c(500, 500) | |
invisible(gc()) | |
timing <- system.time({ | |
# Rem: there are faster ways to do this | |
# but here we want to time loops (220*220 'for' loops)! | |
for (j in 1:500) { | |
for (k in 1:500) { | |
b[k,j] <- abs(j - k) + 1 | |
} | |
} | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("b", "j", "k") | |
timing <- cumulate/runs | |
times[4, 3] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("Creation of a 500x500 Toeplitz matrix (loops)_______ (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
# (5) | |
name = "ESCOUFIER_45" | |
cumulate <- numeric(5); p <- 0; vt <- 0; vr <- 0; vrt <- 0; rvt <- 0; RV <- 0; j <- 0; k <- 0; | |
x2 <- 0; R <- 0; Rxx <- 0; Ryy <- 0; Rxy <- 0; Ryx <- 0; Rvmax <- 0 | |
# Calculate the trace of a matrix (sum of its diagonal elements) | |
Trace <- function(y) {sum(c(y)[1 + 0:(min(dim(y)) - 1) * (dim(y)[1] + 1)], na.rm=FALSE)} | |
for (i in 1:runs) { | |
x <- abs(Rnorm(45*45)); dim(x) <- c(45, 45) | |
invisible(gc()) | |
timing <- system.time({ | |
# Calculation of Escoufier's equivalent vectors | |
p <- ncol(x) | |
vt <- 1:p # Variables to test | |
vr <- NULL # Result: ordered variables | |
RV <- 1:p # Result: correlations | |
vrt <- NULL | |
for (j in 1:p) { # loop on the variable number | |
Rvmax <- 0 | |
for (k in 1:(p-j+1)) { # loop on the variables | |
x2 <- cbind(x, x[,vr], x[,vt[k]]) | |
R <- cor(x2) # Correlations table | |
Ryy <- R[1:p, 1:p] | |
Rxx <- R[(p+1):(p+j), (p+1):(p+j)] | |
Rxy <- R[(p+1):(p+j), 1:p] | |
Ryx <- t(Rxy) | |
rvt <- Trace(Ryx %*% Rxy) / sqrt(Trace(Ryy %*% Ryy) * Trace(Rxx %*% Rxx)) # RV calculation | |
if (rvt > Rvmax) { | |
Rvmax <- rvt # test of RV | |
vrt <- vt[k] # temporary held variable | |
} | |
} | |
vr[j] <- vrt # Result: variable | |
RV[j] <- Rvmax # Result: correlation | |
vt <- vt[vt!=vr[j]] # reidentify variables to test | |
} | |
}) | |
cumulate <- cumulate + timing | |
} | |
remove("x", "p", "vt", "vr", "vrt", "rvt", "RV", "j", "k") | |
remove("x2", "R", "Rxx", "Ryy", "Rxy", "Ryx", "Rvmax", "Trace") | |
timing <- cumulate/runs | |
times[5, 3] <- timing[3] | |
# Add the measurement | |
add_measurement(paste0("cput_", name), timing[1] + timing[2]) | |
add_measurement(paste0("clock_", name), timing[3]) | |
cat(c("Escoufier's method on a 45x45 matrix (mixed)________ (sec): ", timing[3], "\n")) | |
if (R.Version()$os == "Win32" || R.Version()$os == "mingw32") flush.console() | |
## Total time for Programmation tests | |
times[ , 3] <- sort(times[ , 3]) | |
cat(" --------------------------------------------\n") | |
cat(c(" Trimmed geom. mean (2 extremes eliminated): ", exp(mean(log(times[2:4, 3]))), "\n\n\n")) | |
cat("\n\n") | |
# Add the overall measurement | |
add_measurement(paste0("trimgmean_", "PROGRAMMATION"), exp(mean(log(times[2:4, 3])))) | |
#### TOTAL TIME | |
cat("### Total time for synthetic tests\n\n") | |
cat(c("Total time for all 15 tests_________________________ (sec): ", sum(times), "\n")) | |
cat(c("Overall mean (sum of I, II and III trimmed means/3)_ (sec): ", exp(mean(log(times[2:4, ]))), "\n")) | |
cat("\n\n") | |
# Add the net time taken for all synthetic tests as a measurement | |
add_measurement(paste0("net_sum_", "SYNTHETIC"), sum(times)) | |
} | |
# run r benchmark 2.5 | |
run_synthetic <- function() { | |
run_and_measure("SYNTHETIC_ALL", rbenchmark(runs=8)) | |
} | |
############################################### | |
## bench.R | |
## http://r.research.att.com/benchmarks/bench.R | |
############################################### | |
run_extras <- function() { | |
set.seed (1) | |
cat("\n") | |
cat("## Extra benchmarks\n\n") | |
# hilbert | |
hilbert <- function(n) 1/(outer(seq(n),seq(n),"+")-1) | |
X <- hilbert(1000) | |
run_and_measure("HILBERT_1000", eigen(X)) | |
# loess | |
loess.me <- function(n) { | |
x<-rnorm(10^n); y<-rnorm(10^n); z<-rnorm(10^n) | |
invisible(loess(z~x+y)) | |
} | |
run_and_measure("LOESS_4", loess.me(4)) | |
} | |
##################################### | |
## Revolution Analytics Benchmarks | |
## | |
##################################### | |
# Matrix multiplication | |
run_matmult <- function(m=5000, n=5000){ | |
cat("\n") | |
A <- matrix (runif (m*n),m,n) | |
run_and_measure("MATMUL_custom", sum(A%*%t(A))) | |
run_and_measure("MATMUL_tcross", tcrossprod(A)) | |
} | |
# Linear Discriminant Analysis | |
run_lda <- function(m=10000, n=1000, g=5) { | |
cat("\n") | |
require ('MASS') | |
set.seed (1) | |
A <- matrix (runif (m*n),m,n) | |
A <- data.frame (A, fac=sample(LETTERS[1:g], m, replace=TRUE)) | |
k <- round (m/2) | |
train <- sample(1:m, k) | |
run_and_measure("LDA", L <- lda(fac ~., data=A, prior=rep(1,g)/g, subset=train)) | |
} | |
# Cholesky Factorization | |
# Compute the Choleski factorization of a real symmetric positive-definite square matrix. | |
# we calculate the average of chol and chol2inv in a large matrix | |
# uses LAPACK routines DPOTRF and DPSTRF | |
run_cholesky_large <- function(n=7000) { | |
cat("\n") | |
A <- matrix (runif (n*n), n, n) | |
B <- crossprod(A) | |
run_and_measure("CHOL_7K", C <- chol(B, pivot = T)) | |
run_and_measure("CHOL2INV_7K", chol2inv(C)) | |
} | |
# Singular Value Decomposition (SVD) | |
# uses LAPACK routines DGESDD and ZGESDD | |
run_svd <- function(m=10000, n=2000) { | |
cat("\n") | |
A <- matrix (runif (m*n),m,n) | |
run_and_measure("SVD", S <- svd(A,nu=0,nv=0)) | |
} | |
# Principal Components Analysis (prcomp) | |
# prcomp uses svd, so we expect that LAPACK is used | |
# princomp uses eigen, so we also expect that LAPACK is used | |
run_pca <- function(m=4000, n=2000) { | |
A <- matrix (runif (m*n),m,n) | |
run_and_measure("PRCOMP", P <- prcomp(A)) | |
run_and_measure("PRINCOMP", P <- princomp(A)) | |
} | |
# QR decomposition | |
# using LAPACK routines DGEQP3 and ZGEQP3 | |
run_qr <- function(m=7000, n=2000) { | |
X <- matrix(runif(m*n), m, n) | |
run_and_measure("QR_la", Y <- qr(X, LAPACK = T)) | |
} | |
# Eigen | |
# Uses LAPACK routines DSYEVR, DGEEV, ZHEEV and ZGEEV | |
run_eigen <- function(m=3000) { | |
hilbert <- function(n) 1/(outer(seq(n),seq(n),"+")-1) | |
X <- hilbert(m) | |
run_and_measure("EIGEN", eigen(X)) | |
} | |
# Kappa | |
# Uses LAPACK routines DTRCON and ZTRCON | |
run_kappa <- function(m=2000) { | |
X <- matrix(runif(m*m), m, m) | |
run_and_measure("KAPPA", kappa(X, norm = "O", LINPACK=F)) | |
} | |
# SOLVE | |
# Uses LAPACK routines DGESV and ZGESV | |
run_solve <- function(m=3000) { | |
hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") } | |
X <- hilbert(m) + matrix(rnorm(m*m, 1), m) | |
run_and_measure("SOLVE", solve(X)) | |
} | |
# NORM | |
# Uses LAPACK routine DLANGE | |
run_norm <- function(m=7000) { | |
hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") } | |
X <- hilbert(m) | |
#nTyp <- eval(formals(base::norm)$type) | |
nTyp <- c('M','1','O','I','F', 'E') | |
run_and_measure("NORM", sapply(nTyp, norm, x = X)) | |
} | |
# BALANCE matrix | |
# Uses LAPACK DGEBAL | |
run_balance <- function(m=10000) { | |
X <- matrix(runif(m*m), m, m) | |
run_and_measure("BALANCE", expm::balance(X)) | |
} | |
########################## | |
## Implicit parallelism ## | |
########################## | |
# Load the parallel library and fix conflicts with MKL/OpenMP multithreaded libs | |
load_parallel <- function() { | |
library(parallel) | |
# Fix conflicts for the MKL and OpenBlas OpenMP multithreaded libraries | |
if(exists("setMKLthreads", mode="function")) | |
setMKLthreads(1) | |
if(exists("openblas.set.num.threads", mode="function")) | |
openblas.set.num.threads(1) | |
} | |
# Produce a matrix with "times" permutations of the "tests" vector | |
permute_tests <- function(tests, times=4) { | |
if(times > 1) { | |
set.seed(NULL) | |
tests <- rbind(tests, rev(tests)) # adding reverse second row | |
for (i in seq(length.out = times-2)) | |
tests <- rbind(tests, sample(tests[1, ], ncol(tests), replace = F)) | |
} | |
tests | |
} | |
# Run the default benchmark in parallel | |
run_parallel <- function(cores=4) { | |
load_parallel() | |
set.seed (1) | |
runs <- rep(2, cores) | |
run_and_measure("PARALLEL", mclapply(runs, rbenchmark, mc.cores=cores)) | |
} | |
# Run "cores" parallel threads for the same code (in the same order) | |
run_parallel_ident <- function(cores=4) { | |
load_parallel() | |
set.seed (1) | |
runs <- rep(1, cores) | |
bench <- function(runs=1) { | |
funcs <- c("run_matmult", "run_qr", "run_norm", "run_extras") # "run_solve" | |
sapply(funcs, function(x) eval(get(x)())) | |
} | |
run_and_measure("PARALLEL_IDENTICAL", mclapply(runs, bench, mc.cores=cores)) | |
} | |
# Run "cores" parallel threads in a permutted order | |
run_parallel_permuted <- function(cores=4) { | |
load_parallel() | |
set.seed (1) | |
index <- 1:cores | |
# reset output file | |
system(": > /tmp/parbench") | |
funcs <- c("run_matmult", "run_qr", "run_norm", "run_extras") # "run_solve" | |
funcs <- permute_tests(funcs, cores) | |
bench <- function(index=1) { | |
set.seed(NULL) | |
cmd <- paste0("echo ", paste(funcs[index, ], collapse = " "), " >>/tmp/parbench") | |
system(cmd) | |
sapply(funcs[index,], function(x) eval(get(x)())) | |
} | |
run_and_measure("PARALLEL_PERMUTTED", mclapply(index, bench, mc.cores=cores)) | |
} | |
########################## | |
## GPU tests ## | |
########################## | |
# Matrix multiplication on GPU | |
# gpuMatMult is merely a wrapper for the CUBLAS cublasDgemm function. | |
run_matmult_gpu <- function(m=5000, n=5000){ | |
library("gputools") | |
cat("\n") | |
A <- matrix(runif (m*n),m,n) | |
run_and_measure("MATMUL_GPU", gpuMatMult(A, t(A))) | |
run_and_measure("MATMUL_tcross_GPU", gpuTcrossprod(A)) | |
# Another one to compare with CROSS_PROD_2800 (CPU) | |
A <- matrix (runif(2800*2800),2800,2800) | |
run_and_measure("CROSS_PROD_2800_GPU", gpuCrossprod(A)) | |
} | |
# QR decomposition on GPU | |
# gpuQR estimates the QR decomposition for a matrix using column pivoting and householder matrices. | |
run_qr_gpu <- function(m=7000, n=2000) { | |
library("gputools") | |
X <- matrix(runif(m*n), m, n) | |
run_and_measure("QR_GPU", gpuQr(X)) | |
} | |
# Inverse of a Matrix using GPU | |
run_inverse_gpu <- function(m=1600, n=1600) { | |
library("gputools") | |
A <- matrix (runif (m*n),m,n) | |
run_and_measure("INVERSE_1600_GPU", b <- gpuSolve(A)) | |
} | |
# Custom linear model impl. using gpu functions | |
run_custom_lm_gpu <- function(m=2000, n=2000) { | |
library("gputools") | |
A <- matrix (runif (m*n),m,n) | |
b <- as.double(1:m) | |
run_and_measure("LM_CUSTOM_GPU", c <- gpuSolve(gpuCrossprod(A), gpuCrossprod(A,b))) | |
} | |
# Run a sort task in GPU using the Rth:rthsort() function | |
run_sort_gpu <- function(m=7000000) { | |
library("Rth") | |
a <- runif(m) | |
run_and_measure("SORT_7M_GPU", b <- rthsort(a)) | |
} | |
# Parallel running of matrix multiplication in the GPU | |
run_parallel_gpu <- function(reps=2) { | |
library(parallel) | |
set.seed (1) | |
cl <- makeForkCluster(2) | |
run_and_measure("GPU_PARALLEL", clusterCall(cl, function() run_matmult_gpu(2000,2000))) | |
stopCluster(cl) | |
} | |
# Run shipped demos | |
run_demos <- function() { | |
# demos <- unclass(demo(package="stats"))$results[,3] | |
demos <- c("glm.vr", "lm.glm", "nlm", "smooth") | |
run_and_measure("DEMOS", sapply(demos, function(x) demo(x, package="stats", | |
character.only = T, ask = FALSE))) | |
} | |
###################################### | |
## Batch running tests | |
# LAPACK-using methods tests | |
# functions: | |
# eigen, qr, svd, chol, chol2inv, solve, det (uses QR) | |
# kappa, rcond (similar to kappa), norm, expm::balance | |
run_lapack <- function(m=10000, n=5000) { | |
set.seed(1) | |
run_and_measure("LAPACK_ALL", { | |
run_qr() | |
run_eigen() | |
run_norm() | |
run_cholesky_large() | |
run_svd() | |
run_balance() | |
run_pca() | |
run_kappa() | |
run_solve() | |
}) | |
} | |
# We exclude the implicit parallelism test because some threaded BLAS configuration produce segfaults | |
run_safe <- function(jit_level=0) { | |
run_and_measure("TESTS_SAFE_ALL", { | |
run_synthetic() | |
run_extras() | |
run_matmult() | |
run_lda() | |
run_lapack() | |
}) | |
} | |
run_gpu <- function() { | |
run_matmult_gpu() | |
run_qr_gpu | |
run_inverse_gpu() | |
run_custom_lm_gpu() | |
} | |
run_all <- function() { | |
run_and_measure("TESTS_ALL", { | |
run_safe() | |
run_parallel() | |
}) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment