Skip to content

Instantly share code, notes, and snippets.

@dutchLuck
Last active July 18, 2025 10:23
Show Gist options
  • Save dutchLuck/7d8ea6c206be7718866b36988fc4cde7 to your computer and use it in GitHub Desktop.
Save dutchLuck/7d8ea6c206be7718866b36988fc4cde7 to your computer and use it in GitHub Desktop.
Display the basic statistics of Comma Separated Value data from one or a number of files using the golang stat package.
//
// A N A L Y S E _ C S V . G O
//
//
//
// Display basic statistics of one or more columns of numbers separated by commas
// in one or more files.
//
//
// Create execuatable file with; -
// go build analyse_csv.go
//
// Run on MacOS with; -
// ./analyse_csv data.csv
//
package main
import (
"encoding/csv"
"flag"
"fmt"
"log"
"os"
"strconv"
"strings"
"gonum.org/v1/gonum/stat"
)
func parseCSVFile(filename string, hasHeader bool) ([][]float64, []string, error) {
file, err := os.Open(filename)
if err != nil {
return nil, nil, fmt.Errorf("could not open file: %v", err)
}
defer file.Close()
reader := csv.NewReader(file)
reader.TrimLeadingSpace = true
reader.Comment = '#'
lines, err := reader.ReadAll()
if err != nil {
return nil, nil, fmt.Errorf("could not read CSV: %v", err)
}
var data [][]float64
var headers []string
for _, row := range lines {
if len(row) == 0 || strings.HasPrefix(strings.TrimSpace(row[0]), "#") {
continue
}
if hasHeader && len(headers) == 0 {
headers = row
continue
}
var floatRow []float64
for _, field := range row {
field = strings.TrimSpace(strings.Split(field, "#")[0]) // remove inline comment
if field == "" {
continue
}
num, err := strconv.ParseFloat(field, 64)
if err != nil {
continue // skip non-numeric
}
floatRow = append(floatRow, num)
}
if len(floatRow) > 0 {
data = append(data, floatRow)
}
}
if len(headers) == 0 && len(data) > 0 {
// generate generic headers
for i := range data[0] {
headers = append(headers, fmt.Sprintf("Column %d", i+1))
}
}
return transpose(data), headers, nil
}
func transpose(data [][]float64) [][]float64 {
if len(data) == 0 {
return nil
}
numCols := len(data[0])
transposed := make([][]float64, numCols)
for i := range transposed {
transposed[i] = make([]float64, len(data))
for j := range data {
if i < len(data[j]) {
transposed[i][j] = data[j][i]
}
}
}
return transposed
}
func analyzeColumn(col []float64, name string) {
mean := stat.Mean(col, nil)
circMean := stat.CircularMean(col, nil)
geomMean := stat.GeometricMean(col, nil)
harmMean := stat.HarmonicMean(col, nil)
variance := stat.Variance(col, nil)
stdDev := stat.StdDev(col, nil)
popStdDev := stat.PopStdDev(col, nil)
skew := stat.Skew(col, nil)
kurt := stat.ExKurtosis(col, nil)
min, max := col[0], col[0]
sum := 0.0
for _, v := range col {
sum += v
if v < min {
min = v
}
if v > max {
max = v
}
}
mode, modeCnt := stat.Mode(col, nil)
stat.SortWeighted(col, nil)
median := stat.Quantile( 0.5, 1, col, nil)
quartile25 := stat.Quantile( 0.25, 1, col, nil)
quartile75 := stat.Quantile( 0.75, 1, col, nil)
fmt.Printf("Statistics for %s:\n", name)
fmt.Printf(" Row Count : %d\n", len(col))
fmt.Printf(" Min : %.15e\n", min)
fmt.Printf(" 25%% Quartile : %.15e\n", quartile25)
fmt.Printf(" Median : %.15e\n", median)
fmt.Printf(" 75%% Quartile : %.15e\n", quartile75)
fmt.Printf(" Max : %.15e\n", max)
fmt.Printf(" Range : %.15e\n", max - min)
fmt.Printf(" Mode : %.15e (Count %.0f)\n", mode, modeCnt)
fmt.Printf(" Mean : %.15e\n", mean)
fmt.Printf(" Circular Mean : %.15e\n", circMean)
fmt.Printf(" Geometric Mean : %.15e\n", geomMean)
fmt.Printf(" Harmonic Mean : %.15e\n", harmMean)
fmt.Printf(" Sum : %.15e\n", sum)
fmt.Printf(" Variance : %.15e\n", variance)
fmt.Printf(" Std Deviation : %.15e\n", stdDev)
fmt.Printf(" Population Std Dev : %.15e\n", popStdDev)
fmt.Printf(" Skew : %.15e\n", skew)
fmt.Printf(" Kurtosis : %.15e\n", kurt)
fmt.Println()
}
func main() {
header := flag.Bool("header", false, "Treat first row as header")
quiet := flag.Bool("quiet", false, "Suppress terminal output")
flag.Parse()
files := flag.Args()
if len(files) == 0 {
log.Fatal("Please provide at least one CSV file")
}
for _, file := range files {
data, headers, err := parseCSVFile(file, *header)
if err != nil {
log.Printf("Skipping file %s: %v\n", file, err)
continue
}
if !*quiet {
fmt.Printf("\nAnalyzing file: %s\n", file)
fmt.Printf("Number of data columns read: %d\n", cap(data))
fmt.Printf("Number of data rows read: %d\n", len(data[0]))
fmt.Println(strings.Repeat("-", 60))
}
for i, col := range data {
if !*quiet {
analyzeColumn(col, headers[i])
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment