|
// |
|
// F D B S . S W I F T |
|
// |
|
// fdbs.swift last edited on Sun Mar 15 15:36:50 2026 |
|
// |
|
// Fixed width Decimal Basic Statistics for one or more |
|
// files of one or more CSV columns. This version uses |
|
// Swift's built-in Decimal type for calculations. |
|
// |
|
|
|
// |
|
// Original version produced by free AI from bfbs.py but was renamed |
|
// to fdbs.swift as it failed to meet the bfbs (BigFloat Basic Statistics) |
|
// specification. It is not a drop-in replacement for bfbs.py as it does |
|
// not have arbitrary precision calculations. Unfortunately the Swift |
|
// Decimal package is fixed precision. |
|
// |
|
|
|
// |
|
// Recipe for compiling and running fdbs.swift on macOS with Swift 5.9+: |
|
// 1. Create and check a new Swift project area: |
|
// mkdir fdbs; cd fdbs |
|
// swift package init --type executable |
|
// swift build |
|
// swift run // Should print "Hello, world!" |
|
// |
|
// 2. Overwrite your Package.swift file with; - |
|
// // swift-tools-version: 5.9 |
|
// import PackageDescription |
|
// |
|
// let package = Package( |
|
// name: "fdbs", |
|
// platforms: [.macOS(.v12)], |
|
// dependencies: [ |
|
// .package(url: "https://github.com/apple/swift-argument-parser", from: "1.2.0") |
|
// ], |
|
// targets: [ |
|
// .executableTarget( |
|
// name: "fdbs", |
|
// dependencies: [ |
|
// .product(name: "ArgumentParser", package: "swift-argument-parser") |
|
// ] |
|
// ) |
|
// ] |
|
// ) |
|
// |
|
// 3. Replace the contents of Sources/fdbs/main.swift with the code from fdbs.swift |
|
// |
|
// 4. Build the project: |
|
// swift build -c release |
|
// |
|
// 5. Run the executable: |
|
// ./.build/release/fdbs --precision 20 --header file1.csv file2.csv |
|
// |
|
// 6. For help: |
|
// ./.build/release/fdbs --help |
|
// |
|
|
|
// |
|
// 0.1.2 - Updated calculations and print of Variance and Std. Dev. to |
|
// include both sample (s^2, s) and population (σ^2, σ) versions. |
|
// see: https://en.wikipedia.org/wiki/Variance#Population_and_sample_variance |
|
// or https://towardsdatascience.com/a-tale-of-two-variances-why-numpy-and-pandas-give-different-answers/ |
|
// 0.1.1 - Updated to use Swift's ArgumentParser and CSVReader for more |
|
// efficient streaming of large files. Still uses Decimal for |
|
// calculations, so precision is fixed at 38 mantissa digits. |
|
// Standard Deviation calculations use newton's method for |
|
// square root, to reduce loss of precision. |
|
// 0.1.0 - Initial version produced by free AI from bfbs.py |
|
// |
|
|
|
// Short-comings: |
|
// 1. Decimal package precision is fixed (Swift's Decimal is fixed at 38 mantissa digits) |
|
// |
|
|
|
import Foundation |
|
import ArgumentParser |
|
|
|
// MARK: - Decimal square root using Newton's method |
|
|
|
func dsqrt(_ value: Decimal, precision: Int = 37) -> Decimal { |
|
// 1. Guard for zero or negative values |
|
if value <= 0 { |
|
return 0 |
|
} |
|
|
|
// 2. Initial guess using Double (same as Java's value.doubleValue()) |
|
let doubleValue = (value as NSDecimalNumber).doubleValue |
|
var x = Decimal(sqrt(doubleValue)) |
|
|
|
let two: Decimal = 2 |
|
let maxIterations = precision + 5 |
|
|
|
// 3. Newton's Method loop |
|
// x = (x + value / x) / 2 |
|
for _ in 0..<maxIterations { |
|
x = (x + (value / x)) / two |
|
} |
|
|
|
// 4. Rounding to desired precision |
|
let handler = NSDecimalNumberHandler( |
|
roundingMode: .plain, |
|
scale: Int16(precision), |
|
raiseOnExactness: false, |
|
raiseOnOverflow: false, |
|
raiseOnUnderflow: false, |
|
raiseOnDivideByZero: false |
|
) |
|
|
|
return (x as NSDecimalNumber).rounding(accordingToBehavior: handler) as Decimal |
|
} |
|
|
|
// MARK: - Running statistics (Welford) |
|
|
|
struct RunningStats { |
|
|
|
private(set) var count: Int = 0 |
|
private(set) var mean: Decimal = 0 |
|
private var m2: Decimal = 0 |
|
|
|
private(set) var min: Decimal? |
|
private(set) var max: Decimal? |
|
|
|
private var values: [Decimal] = [] |
|
|
|
mutating func add(_ x: Decimal) { |
|
|
|
values.append(x) |
|
|
|
if min == nil || x < min! { min = x } |
|
if max == nil || x > max! { max = x } |
|
|
|
count += 1 |
|
|
|
let delta = x - mean |
|
mean += delta / Decimal(count) |
|
let delta2 = x - mean |
|
m2 += delta * delta2 |
|
} |
|
|
|
var variance: Decimal { |
|
guard count > 1 else { return 0 } |
|
return m2 / Decimal(count - 1) |
|
} |
|
|
|
var pvariance: Decimal { |
|
guard count > 1 else { return 0 } |
|
return m2 / Decimal(count) |
|
} |
|
|
|
var stddev: Decimal { |
|
return dsqrt(variance, precision: 37) |
|
} |
|
|
|
var pstddev: Decimal { |
|
return dsqrt(pvariance, precision: 37) |
|
} |
|
|
|
var sum: Decimal { |
|
mean * Decimal(count) |
|
} |
|
|
|
var median: Decimal? { |
|
|
|
guard !values.isEmpty else { return nil } |
|
|
|
let sorted = values.sorted() |
|
let n = sorted.count |
|
let mid = n / 2 |
|
|
|
if n % 2 == 1 { |
|
return sorted[mid] |
|
} else { |
|
return (sorted[mid - 1] + sorted[mid]) / Decimal(2) |
|
} |
|
} |
|
|
|
var range: Decimal? { |
|
guard let min, let max else { return nil } |
|
return max - min |
|
} |
|
} |
|
|
|
// MARK: - CSV streaming reader |
|
|
|
struct CSVReader: Sequence, IteratorProtocol { |
|
|
|
let handle: FileHandle |
|
let delimiter: Character |
|
|
|
var buffer = Data() |
|
|
|
init(path: String, delimiter: Character = ",", header: Bool = false ) throws { |
|
self.handle = try FileHandle(forReadingFrom: URL(fileURLWithPath: path)) |
|
self.delimiter = delimiter |
|
} |
|
|
|
mutating func next() -> [String]? { |
|
|
|
while true { |
|
|
|
if let range = buffer.firstRange(of: Data([0x0a])) { |
|
|
|
let lineData = buffer.subdata(in: 0..<range.lowerBound) |
|
buffer.removeSubrange(0...range.lowerBound) |
|
|
|
guard let line = String(data: lineData, encoding: .utf8) else { |
|
continue |
|
} |
|
|
|
let trimmed = line.trimmingCharacters(in: .whitespacesAndNewlines) |
|
|
|
if trimmed.isEmpty { continue } |
|
if trimmed.hasPrefix("#") { continue } |
|
|
|
return line.split(separator: delimiter).map { String($0) } |
|
} |
|
|
|
let chunk = try? handle.read(upToCount: 4096) |
|
|
|
if let chunk, !chunk.isEmpty { |
|
buffer.append(chunk) |
|
} else { |
|
if buffer.isEmpty { return nil } |
|
|
|
let line = String(data: buffer, encoding: .utf8) |
|
buffer.removeAll() |
|
|
|
if let line { |
|
return line.split(separator: delimiter).map { String($0) } |
|
} |
|
|
|
return nil |
|
} |
|
} |
|
} |
|
} |
|
|
|
// MARK: - CLI |
|
|
|
struct FDBS: ParsableCommand { |
|
|
|
static let configuration = CommandConfiguration( |
|
abstract: "Basic statistics for CSV columns" |
|
) |
|
|
|
@Flag(name: [.long], help: "Treat first row as header") |
|
var header = false |
|
|
|
@Option(name: [.short, .long], help: "Print digits after decimal point") |
|
var precision: Int = 38 |
|
|
|
@Flag(name: [.short, .long], help: "Quiet mode") |
|
var quiet = false |
|
|
|
@Argument(help: "CSV files") |
|
var files: [String] |
|
|
|
mutating func run() throws { |
|
|
|
let start = Date() |
|
|
|
if !quiet { |
|
print("fdbs 0.1.2 (2026-03-15) - Fixed width Decimal Basic Statistics for CSV column data") |
|
} |
|
|
|
print("Info: Calculations using Decimal package with fixed 38 digits of mantissa.") |
|
print("Info: Print using \(precision) digits after decimal point.") |
|
|
|
for file in files { |
|
|
|
guard FileManager.default.fileExists(atPath: file) else { |
|
print("\nError: \"\(file)\" not found.") |
|
continue |
|
} |
|
|
|
print("\nInfo: Processing file: \"\(file)\"") |
|
|
|
var reader = try CSVReader(path: file, delimiter: ",", header: header) |
|
|
|
guard let firstRow = reader.next() else { |
|
continue |
|
} |
|
|
|
let headers: [String] |
|
var stats: [RunningStats] |
|
|
|
if header { |
|
headers = firstRow |
|
stats = Array(repeating: RunningStats(), count: headers.count) |
|
} else { |
|
headers = (0..<firstRow.count).map { "Column \($0 + 1)" } |
|
stats = Array(repeating: RunningStats(), count: headers.count) |
|
|
|
for (i,v) in firstRow.enumerated() { |
|
if let d = Decimal(string: v) { |
|
stats[i].add(d) |
|
} |
|
} |
|
} |
|
|
|
while let row = reader.next() { |
|
|
|
for (i,val) in row.enumerated() { |
|
|
|
guard i < stats.count else { continue } |
|
|
|
if let d = Decimal(string: val) { |
|
stats[i].add(d) |
|
} |
|
} |
|
} |
|
|
|
for (i,s) in stats.enumerated() { |
|
|
|
if s.count == 0 { |
|
print("Warning: \(headers[i]): No valid numeric data") |
|
continue |
|
} |
|
|
|
func p( _ v: Decimal) -> String { |
|
if precision >= 38 { |
|
return v.description |
|
} else { |
|
return v.formatted(.number.precision(.fractionLength(precision))) |
|
} |
|
} |
|
|
|
print("\n\(headers[i]):") |
|
print(" Count : \(s.count)") |
|
print(" Minimum :",p(s.min!)) |
|
print(" Mean :",p(s.mean)) |
|
print(" Median :",p(s.median!)) |
|
print(" Maximum :",p(s.max!)) |
|
print(" Range :",p(s.range!)) |
|
print(" Sum :",p(s.sum)) |
|
print(" Variance s\u{00B2} :",p(s.variance)) |
|
print(" Std. Dev. s :",p(s.stddev)) |
|
print(" Variance \u{03C3}\u{00B2} :",p(s.pvariance)) |
|
print(" Std. Dev. \u{03C3} :",p(s.pstddev)) |
|
} |
|
} |
|
|
|
if !quiet { |
|
let elapsed = Date().timeIntervalSince(start) * 1000 |
|
print(String(format:"Info: fdbs execution time: %.3f ms", elapsed)) |
|
} |
|
} |
|
} |
|
|
|
FDBS.main() |