Created
March 10, 2026 11:43
-
-
Save dutchLuck/f2dc516e2cd53b4c2eb9ea997b72d85a to your computer and use it in GitHub Desktop.
Variant of the Julia script "bfbs.jl" that uses MultiFloat instead of BigFloat to speed up calculations, but still retains more precision than Float64
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #! /usr/bin/env julia | |
| # | |
| # M U L T I F L O A T . J L | |
| # | |
| # Multi Float Basic Statistics | |
| # | |
| # multifloat.jl last updated on Tue Mar 10 20:52:50 2026 by O.H. as 0v01 | |
| # | |
| # Descendant of bfbs.jl 0v11, but adapted to use the MultiFloat package | |
| # and its Float64x4 type for high precision calculations. It is a testbed | |
| # to check the claim it is 30 times faster than the BigFloat type for high | |
| # precision calculations. (see https://juliahub.com/ui/Packages/General/MultiFloats) | |
| # | |
| # Read in one or more files containing one or more rows and columns | |
| # of numbers. Then use julia's MultiFloat calculation capability to | |
| # calculate basic statistics like median, arithmetic mean, sum, | |
| # variance and standard deviation for those rows and columns. | |
| # Finally output the basic statistics results to the stdout. | |
| ## Recipe for Linux (Ubuntu 22.04 LTS) julia install, multifloat.jl clone and run ## | |
| # sudo apt install curl | |
| # curl -fsSL http://install.julialang.org | sh | |
| # >Proceed with installation | |
| # "restart the terminal" | |
| # mkdir src | |
| # cd src | |
| # mkdir julia | |
| # cd julia | |
| # git clone https://github.com/dutchLuck/bfbs | |
| # cd bfbs | |
| # chmod u+x bfbs.jl | |
| # julia | |
| # ] | |
| # add MultiFloats | |
| # add CSV | |
| # add ArgParse | |
| # add DelimitedFiles | |
| # add Statistics | |
| # ^D | |
| # ./multifloat.jl -V | |
| # multifloat version 0v01 (2026-03-10) | |
| # | |
| # | |
| # 0v01 Switch from BigFloat to MultiFloat for higher speed & high precision calculations. | |
| # | |
| using MultiFloats # for Float64x4 type and high precision calculations | |
| using DelimitedFiles | |
| using Statistics | |
| using Printf | |
| using ArgParse | |
| using InteractiveUtils # for versioninfo() | |
| function parse_arguments() | |
| s = ArgParseSettings() | |
| @add_arg_table s begin | |
| "--comment_char", "-c" | |
| arg_type = String | |
| help = "Define the Comment Delimiter character as \"COMMENT_CHAR\". If not provided, hash (\"#\") is used." | |
| "--no_column_stats", "-C" | |
| action = :store_true | |
| help = "Disable column statistics calculation and output." | |
| "--delimiter_char", "-d" | |
| arg_type = String | |
| help = "Define the Column Delimiter character as \"DELIMITER_CHAR\". If not provided, comma (\",\") is used." | |
| "--debug", "-D" | |
| action = :store_true | |
| help = "Provide copious amounts of information about the current run and the data." | |
| "--scientific", "-e" | |
| action = :store_true | |
| help = "Output statistics results in scientific number format." | |
| "--header", "-H" | |
| action = :store_true | |
| help = "The first row is treated as a header." | |
| "--n_divisor", "-n" | |
| action = :store_true | |
| help = "Use the actual number of samples n as the Standard Deviation divisor, rather than n-1." | |
| "--print_digits", "-p" | |
| arg_type = String | |
| help = "Write output with \"PRINT-DIGITS\" digits. If not provided, 64 output digits are used." | |
| "--no_row_stats", "-R" | |
| action = :store_true | |
| help = "Disable row statistics calculation and output." | |
| "--skip", "-s" | |
| arg_type = String | |
| help = "Skip first \"SKIP\" lines in data file(s). If not provided, zero lines are skipped." | |
| "--verbose", "-v" | |
| action = :store_true | |
| help = "Provide extra information about the current run and the data." | |
| "--version", "-V" | |
| action = :store_true | |
| help = "Provide version information." | |
| "files" | |
| nargs = '*' | |
| help = "Input files containing 1 or more columns of numbers. Default file format has comma separated columns." | |
| end | |
| return parse_args(s) | |
| end | |
| # Function to read a file and convert to MultiFloat matrix | |
| function read_bignum_matrix(filepath::String, delimiter::Char, header::Bool, verbose::Bool, linestoskip::Integer, comment_start::Char) | |
| if header | |
| raw_data, raw_hdr = readdlm(filepath, delimiter, String; | |
| header=true, skipstart=linestoskip, skipblanks=true, comments=true, comment_char=comment_start) | |
| if verbose | |
| println("Header: $raw_hdr") | |
| end | |
| else | |
| raw_data = readdlm(filepath, delimiter, String; | |
| header=false, skipstart=linestoskip, skipblanks=true, comments=true, comment_char=comment_start) | |
| end | |
| num_rows, num_cols = size(raw_data) | |
| mat = Matrix{Float64x4}(undef, num_rows, num_cols) | |
| for i in 1:num_rows, j in 1:num_cols | |
| mat[i, j] = parse(Float64x4, raw_data[i, j]) | |
| end | |
| return mat | |
| end | |
| # Function to compute average and stddev for each row | |
| function row_stats(mat::Matrix{Float64x4}, use_n::Bool ) | |
| row_cnts = [length(row) for row in eachrow(mat)] | |
| row_mins = [minimum(row) for row in eachrow(mat)] | |
| row_medians = [median(row) for row in eachrow(mat)] | |
| row_maxs = [maximum(row) for row in eachrow(mat)] | |
| row_ranges = row_maxs - row_mins | |
| row_means = [mean(row) for row in eachrow(mat)] | |
| row_sums = [sum(row) for row in eachrow(mat)] | |
| row_vars = [var(row) for row in eachrow(mat)] | |
| row_stds = [std(row; corrected = !use_n) for row in eachrow(mat)] | |
| row_medians = [median(row) for row in eachrow(mat)] | |
| return row_cnts, row_mins, row_medians, row_maxs, row_ranges, row_means, row_sums, row_vars, row_stds | |
| end | |
| # Function to compute average and stddev for each column | |
| function col_stats(mat::Matrix{Float64x4}, use_n::Bool) | |
| col_cnts = [length(col) for col in eachcol(mat)] | |
| col_mins = [minimum(col) for col in eachcol(mat)] | |
| col_medians = [median(col) for col in eachcol(mat)] | |
| col_maxs = [maximum(col) for col in eachcol(mat)] | |
| col_ranges = col_maxs - col_mins | |
| col_means = [mean(col) for col in eachcol(mat)] | |
| col_sums = [sum(col) for col in eachcol(mat)] | |
| col_vars = [var(col) for col in eachcol(mat)] | |
| col_stds = [std(col; corrected = !use_n) for col in eachcol(mat)] | |
| return col_cnts, col_mins, col_medians, col_maxs, col_ranges, col_means, col_sums, col_vars, col_stds | |
| end | |
| # Print a matrix of MultiFloats with high precision | |
| function print_multifloat_matrix(mat::Matrix{Float64x4}, digits::Int64) | |
| println("\nMultiFloat Matrix:") | |
| for row in eachrow(mat) | |
| for val in row | |
| @printf("%.*e ", digits, val) # print in e format with digits number of digits | |
| end | |
| println() | |
| end | |
| end | |
| function print_basic_statistics(str::String, precision::Int64, cnts, mins, medians, maxs, ranges, means, sums, vars, stds) | |
| # Display rows results | |
| println("$str Counts:") | |
| foreach(x -> @printf("%d\n", x), cnts) | |
| # Display row results with precision | |
| println("$str Minimums:") | |
| foreach(x -> @printf("%.*e\n", precision, x), mins) | |
| println("$str Medians:") | |
| foreach(x -> @printf("%.*e\n", precision, x), medians) | |
| println("$str Maximums:") | |
| foreach(x -> @printf("%.*e\n", precision, x), maxs) | |
| println("$str Ranges:") | |
| foreach(x -> @printf("%.*e\n", precision, x), ranges) | |
| println("$str Means:") | |
| foreach(x -> @printf("%.*e\n", precision, x), means) | |
| println("$str Sums:") | |
| foreach(x -> @printf("%.*e\n", precision, x), sums) | |
| println("$str Variances:") | |
| foreach(x -> @printf("%.*e\n", precision, x), vars) | |
| println("$str Standard Deviations:") | |
| foreach(x -> @printf("%.*e\n", precision, x), stds) | |
| end | |
| function print_basic_stats_e_format(str::String, precision::Int64, cnts, mins, medians, maxs, ranges, means, sums, vars, stds) | |
| # Display rows or column results | |
| i = 1 | |
| for x in cnts | |
| @printf("%s: %d\n", str, i) | |
| @printf(" Count : %d\n", x) | |
| @printf(" Minimum : %.*e\n", precision, mins[i]) | |
| @printf(" Median : %.*e\n", precision, medians[i]) | |
| @printf(" Maximum : %.*e\n", precision, maxs[i]) | |
| @printf(" Range : %.*e\n", precision, ranges[i]) | |
| @printf(" Mean : %.*e\n", precision, means[i]) | |
| @printf(" Sum : %.*e\n", precision, sums[i]) | |
| @printf(" Variance : %.*e\n", precision, vars[i]) | |
| @printf(" Std. Dev. : %.*e\n", precision, stds[i]) | |
| i += 1 | |
| end | |
| end | |
| function print_basic_stats_g_format(str::String, precision::Int64, cnts, mins, medians, maxs, ranges, means, sums, vars, stds) | |
| # Display rows or column results | |
| i = 1 | |
| for x in cnts | |
| @printf("%s: %d\n", str, i) | |
| @printf(" Count : %d\n", x) | |
| @printf(" Minimum : %.*g\n", precision, mins[i]) | |
| @printf(" Median : %.*g\n", precision, medians[i]) | |
| @printf(" Maximum : %.*g\n", precision, maxs[i]) | |
| @printf(" Range : %.*g\n", precision, ranges[i]) | |
| @printf(" Mean : %.*g\n", precision, means[i]) | |
| @printf(" Sum : %.*g\n", precision, sums[i]) | |
| @printf(" Variance : %.*g\n", precision, vars[i]) | |
| @printf(" Std. Dev. : %.*g\n", precision, stds[i]) | |
| i += 1 | |
| end | |
| end | |
| function main() | |
| # Announce multifloat version | |
| println("multifloat version 0v01 (2026-03-10)") | |
| # Parse command line arguments | |
| args = parse_arguments() | |
| comment_delimiter_string = get(args, "comment_char", nothing) # --comment_char command line argument | |
| delimiter_string = get(args, "delimiter_char", nothing) # --delimiter_char command line argument | |
| has_header = args["header"] # --header command line argument | |
| n_divisor = args["n_divisor"] # --n_divisor command line argument | |
| print_digits_string = get(args, "print_digits", nothing) # --print_digits command line argument | |
| skip_lines_string = get(args, "skip", nothing) # --skip_lines_string command line argument | |
| verbose = args["verbose"] # --verbose command line argument | |
| files = args["files"] # names of data files | |
| # Set default values for options if not provided on command line | |
| # and check for valid values if provided | |
| if isnothing(delimiter_string) | |
| delimiter = ',' # set default column delimiter value to comma | |
| else | |
| if delimiter_string[begin] == '\\' && delimiter_string[begin+1] == 't' | |
| delimiter = '\t' # set tab character as delimiter | |
| else | |
| delimiter = delimiter_string[begin] # set supplied char as column delimiter | |
| end | |
| end | |
| if isnothing(comment_delimiter_string) | |
| comment_start = '#' # set default comment delimiter value to hash | |
| else | |
| comment_start = comment_delimiter_string[begin] # set supplied char as comment delimiter | |
| end | |
| if isnothing(skip_lines_string) | |
| skip_lines = 0 # set default lines to skip to 0 value | |
| else | |
| skip_lines = parse(Int64, skip_lines_string) | |
| if skip_lines < 0 # Don't allow negetive skip value | |
| println("Warning: Unable to skip $skip_lines lines - defaulting to zero") | |
| skip_lines = 0 | |
| end | |
| end | |
| if isnothing(print_digits_string) | |
| precision = 56 # set default value of output format to effectively "%.64g" or "%.64e" | |
| else | |
| precision = parse(Int64, print_digits_string) | |
| if precision < 0 # Don't allow negetive numbers in the "%.*e" output format | |
| println("Warning: Unable to set print_digits to \"$precision\" digits - limiting to 0 digits") | |
| precision = 0 | |
| elseif precision > 256 # Limit the print_digits to 256 digits | |
| println("Warning: Unable to set print_digits to \"$precision\" digits - limiting to 256 digits") | |
| precision = 256 | |
| end | |
| end | |
| # Announce Julia version | |
| if verbose || args["debug"] | |
| versioninfo() # Show Julia version information | |
| else | |
| println("Julia version $(VERSION)") | |
| end | |
| # Report current precision and settings if verbose or debug mode is active | |
| print("Results output using $precision digits in ") | |
| if args["scientific"] | |
| println("scientific number format") | |
| else | |
| println("general number format") | |
| end | |
| if verbose || args["debug"] | |
| println("Column delimiter is: '$delimiter'") | |
| println("Start of Comment delimiter is: '$comment_start'") | |
| println("Skip lines before starting to read data: $skip_lines") | |
| println("Use number of samples n as devisor in Standard Deviation: $n_divisor") | |
| end | |
| if args["version"] | |
| return # terminate the execution (a bit like the help option) | |
| end | |
| # Loop through any file names on the command line | |
| for filepath in files | |
| if !isfile(filepath) | |
| println("\nWarning: file \"$filepath\" not found?!") | |
| continue # skip this one, but if there are more files on the command line then try to process them | |
| end | |
| println("\nBasic Statistics for Data from file: \"$filepath\"") | |
| # Read data from the file into a matrix | |
| bignum_matrix = read_bignum_matrix(filepath, delimiter, has_header, verbose, skip_lines, comment_start) | |
| num_rows, num_cols = size(bignum_matrix) | |
| # Show matrix with full MultiFloat precision if Debug mode is active | |
| if args["debug"] | |
| println("Data dimensions are $num_cols columns x $num_rows rows") | |
| print_multifloat_matrix(bignum_matrix, precision) | |
| end | |
| # Now compute stats with high precision | |
| if !args["no_row_stats"] && num_cols > 1 # Don't calc row stats unless more than 1 column | |
| # Calculate row results with high precision | |
| row_cnts, row_mins, row_medians, row_maxs, row_ranges, row_means, row_sums, row_vars, row_stds = row_stats(bignum_matrix, n_divisor) | |
| # Display rows results | |
| if args["scientific"] | |
| print_basic_stats_e_format("Row", precision, row_cnts, row_mins, row_medians, row_maxs, row_ranges, row_means, row_sums, row_vars, row_stds) | |
| else | |
| print_basic_stats_g_format("Row", precision, row_cnts, row_mins, row_medians, row_maxs, row_ranges, row_means, row_sums, row_vars, row_stds) | |
| end | |
| end | |
| if !args["no_column_stats"] && num_rows > 1 # Don't calc column stats unless more than 1 row | |
| # Calculate column results with high precision | |
| col_cnts, col_mins, col_medians, col_maxs, col_ranges, col_means, col_sums, col_vars, col_stds = col_stats(bignum_matrix, n_divisor) | |
| # Display column results | |
| if args["scientific"] | |
| print_basic_stats_e_format("Column", precision, col_cnts, col_mins, col_medians, col_maxs, col_ranges, col_means, col_sums, col_vars, col_stds) | |
| else | |
| print_basic_stats_g_format("Column", precision, col_cnts, col_mins, col_medians, col_maxs, col_ranges, col_means, col_sums, col_vars, col_stds) | |
| end | |
| end | |
| end | |
| end | |
| # Start the timer at the beginning of the script | |
| start_time = time() | |
| main() | |
| # End the timer at the end of the script | |
| end_time = time() | |
| # Calculate the elapsed time and print it | |
| elapsed_time = end_time - start_time | |
| @printf("multifloat.jl script execution time: %.4g [sec]\n", elapsed_time ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment