Skip to content

Instantly share code, notes, and snippets.

@dutchLuck
Last active March 15, 2026 11:58
Show Gist options
  • Select an option

  • Save dutchLuck/0f51052b27887706d84d05fed01ddf65 to your computer and use it in GitHub Desktop.

Select an option

Save dutchLuck/0f51052b27887706d84d05fed01ddf65 to your computer and use it in GitHub Desktop.
A Swift program using the Fixed precision Decimal package to read one or more CSV files and output Basic Statistics for each column of data with better than double precision accuracy.

FDBS

This "fdbs.swift" (fixed decimal basic statistics) gist is an example of a command line utility in Swift to calculate basic stats for columns of data.

Fixed width Decimal Basic Statistics for one or more files of one or more CSV columns. This version uses Swift's built-in Decimal type for calculations.

//
// F D B S . S W I F T
//
// fdbs.swift last edited on Sun Mar 15 15:36:50 2026
//
// Fixed width Decimal Basic Statistics for one or more
// files of one or more CSV columns. This version uses
// Swift's built-in Decimal type for calculations.
//
//
// Original version produced by free AI from bfbs.py but was renamed
// to fdbs.swift as it failed to meet the bfbs (BigFloat Basic Statistics)
// specification. It is not a drop-in replacement for bfbs.py as it does
// not have arbitrary precision calculations. Unfortunately the Swift
// Decimal package is fixed precision.
//
//
// Recipe for compiling and running fdbs.swift on macOS with Swift 5.9+:
// 1. Create and check a new Swift project area:
// mkdir fdbs; cd fdbs
// swift package init --type executable
// swift build
// swift run // Should print "Hello, world!"
//
// 2. Overwrite your Package.swift file with; -
// // swift-tools-version: 5.9
// import PackageDescription
//
// let package = Package(
// name: "fdbs",
// platforms: [.macOS(.v12)],
// dependencies: [
// .package(url: "https://github.com/apple/swift-argument-parser", from: "1.2.0")
// ],
// targets: [
// .executableTarget(
// name: "fdbs",
// dependencies: [
// .product(name: "ArgumentParser", package: "swift-argument-parser")
// ]
// )
// ]
// )
//
// 3. Replace the contents of Sources/fdbs/main.swift with the code from fdbs.swift
//
// 4. Build the project:
// swift build -c release
//
// 5. Run the executable:
// ./.build/release/fdbs --precision 20 --header file1.csv file2.csv
//
// 6. For help:
// ./.build/release/fdbs --help
//
//
// 0.1.2 - Updated calculations and print of Variance and Std. Dev. to
// include both sample (s^2, s) and population (σ^2, σ) versions.
// see: https://en.wikipedia.org/wiki/Variance#Population_and_sample_variance
// or https://towardsdatascience.com/a-tale-of-two-variances-why-numpy-and-pandas-give-different-answers/
// 0.1.1 - Updated to use Swift's ArgumentParser and CSVReader for more
// efficient streaming of large files. Still uses Decimal for
// calculations, so precision is fixed at 38 mantissa digits.
// Standard Deviation calculations use newton's method for
// square root, to reduce loss of precision.
// 0.1.0 - Initial version produced by free AI from bfbs.py
//
// Short-comings:
// 1. Decimal package precision is fixed (Swift's Decimal is fixed at 38 mantissa digits)
//
import Foundation
import ArgumentParser
// MARK: - Decimal square root using Newton's method
func dsqrt(_ value: Decimal, precision: Int = 37) -> Decimal {
// 1. Guard for zero or negative values
if value <= 0 {
return 0
}
// 2. Initial guess using Double (same as Java's value.doubleValue())
let doubleValue = (value as NSDecimalNumber).doubleValue
var x = Decimal(sqrt(doubleValue))
let two: Decimal = 2
let maxIterations = precision + 5
// 3. Newton's Method loop
// x = (x + value / x) / 2
for _ in 0..<maxIterations {
x = (x + (value / x)) / two
}
// 4. Rounding to desired precision
let handler = NSDecimalNumberHandler(
roundingMode: .plain,
scale: Int16(precision),
raiseOnExactness: false,
raiseOnOverflow: false,
raiseOnUnderflow: false,
raiseOnDivideByZero: false
)
return (x as NSDecimalNumber).rounding(accordingToBehavior: handler) as Decimal
}
// MARK: - Running statistics (Welford)
struct RunningStats {
private(set) var count: Int = 0
private(set) var mean: Decimal = 0
private var m2: Decimal = 0
private(set) var min: Decimal?
private(set) var max: Decimal?
private var values: [Decimal] = []
mutating func add(_ x: Decimal) {
values.append(x)
if min == nil || x < min! { min = x }
if max == nil || x > max! { max = x }
count += 1
let delta = x - mean
mean += delta / Decimal(count)
let delta2 = x - mean
m2 += delta * delta2
}
var variance: Decimal {
guard count > 1 else { return 0 }
return m2 / Decimal(count - 1)
}
var pvariance: Decimal {
guard count > 1 else { return 0 }
return m2 / Decimal(count)
}
var stddev: Decimal {
return dsqrt(variance, precision: 37)
}
var pstddev: Decimal {
return dsqrt(pvariance, precision: 37)
}
var sum: Decimal {
mean * Decimal(count)
}
var median: Decimal? {
guard !values.isEmpty else { return nil }
let sorted = values.sorted()
let n = sorted.count
let mid = n / 2
if n % 2 == 1 {
return sorted[mid]
} else {
return (sorted[mid - 1] + sorted[mid]) / Decimal(2)
}
}
var range: Decimal? {
guard let min, let max else { return nil }
return max - min
}
}
// MARK: - CSV streaming reader
struct CSVReader: Sequence, IteratorProtocol {
let handle: FileHandle
let delimiter: Character
var buffer = Data()
init(path: String, delimiter: Character = ",", header: Bool = false ) throws {
self.handle = try FileHandle(forReadingFrom: URL(fileURLWithPath: path))
self.delimiter = delimiter
}
mutating func next() -> [String]? {
while true {
if let range = buffer.firstRange(of: Data([0x0a])) {
let lineData = buffer.subdata(in: 0..<range.lowerBound)
buffer.removeSubrange(0...range.lowerBound)
guard let line = String(data: lineData, encoding: .utf8) else {
continue
}
let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines)
if trimmed.isEmpty { continue }
if trimmed.hasPrefix("#") { continue }
return line.split(separator: delimiter).map { String($0) }
}
let chunk = try? handle.read(upToCount: 4096)
if let chunk, !chunk.isEmpty {
buffer.append(chunk)
} else {
if buffer.isEmpty { return nil }
let line = String(data: buffer, encoding: .utf8)
buffer.removeAll()
if let line {
return line.split(separator: delimiter).map { String($0) }
}
return nil
}
}
}
}
// MARK: - CLI
struct FDBS: ParsableCommand {
static let configuration = CommandConfiguration(
abstract: "Basic statistics for CSV columns"
)
@Flag(name: [.long], help: "Treat first row as header")
var header = false
@Option(name: [.short, .long], help: "Print digits after decimal point")
var precision: Int = 38
@Flag(name: [.short, .long], help: "Quiet mode")
var quiet = false
@Argument(help: "CSV files")
var files: [String]
mutating func run() throws {
let start = Date()
if !quiet {
print("fdbs 0.1.2 (2026-03-15) - Fixed width Decimal Basic Statistics for CSV column data")
}
print("Info: Calculations using Decimal package with fixed 38 digits of mantissa.")
print("Info: Print using \(precision) digits after decimal point.")
for file in files {
guard FileManager.default.fileExists(atPath: file) else {
print("\nError: \"\(file)\" not found.")
continue
}
print("\nInfo: Processing file: \"\(file)\"")
var reader = try CSVReader(path: file, delimiter: ",", header: header)
guard let firstRow = reader.next() else {
continue
}
let headers: [String]
var stats: [RunningStats]
if header {
headers = firstRow
stats = Array(repeating: RunningStats(), count: headers.count)
} else {
headers = (0..<firstRow.count).map { "Column \($0 + 1)" }
stats = Array(repeating: RunningStats(), count: headers.count)
for (i,v) in firstRow.enumerated() {
if let d = Decimal(string: v) {
stats[i].add(d)
}
}
}
while let row = reader.next() {
for (i,val) in row.enumerated() {
guard i < stats.count else { continue }
if let d = Decimal(string: val) {
stats[i].add(d)
}
}
}
for (i,s) in stats.enumerated() {
if s.count == 0 {
print("Warning: \(headers[i]): No valid numeric data")
continue
}
func p( _ v: Decimal) -> String {
if precision >= 38 {
return v.description
} else {
return v.formatted(.number.precision(.fractionLength(precision)))
}
}
print("\n\(headers[i]):")
print(" Count : \(s.count)")
print(" Minimum :",p(s.min!))
print(" Mean :",p(s.mean))
print(" Median :",p(s.median!))
print(" Maximum :",p(s.max!))
print(" Range :",p(s.range!))
print(" Sum :",p(s.sum))
print(" Variance s\u{00B2} :",p(s.variance))
print(" Std. Dev. s :",p(s.stddev))
print(" Variance \u{03C3}\u{00B2} :",p(s.pvariance))
print(" Std. Dev. \u{03C3} :",p(s.pstddev))
}
}
if !quiet {
let elapsed = Date().timeIntervalSince(start) * 1000
print(String(format:"Info: fdbs execution time: %.3f ms", elapsed))
}
}
}
FDBS.main()
// swift-tools-version: 5.9
import PackageDescription
let package = Package(
name: "fdbs",
platforms: [.macOS(.v12)],
dependencies: [
.package(url: "https://github.com/apple/swift-argument-parser", from: "1.2.0")
],
targets: [
.executableTarget(
name: "fdbs",
dependencies: [
.product(name: "ArgumentParser", package: "swift-argument-parser")
]
)
]
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment