Last active
July 18, 2025 10:23
-
-
Save dutchLuck/7d8ea6c206be7718866b36988fc4cde7 to your computer and use it in GitHub Desktop.
Display the basic statistics of Comma Separated Value data from one or a number of files using the golang stat package.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// A N A L Y S E _ C S V . G O | |
// | |
// | |
// | |
// Display basic statistics of one or more columns of numbers separated by commas | |
// in one or more files. | |
// | |
// | |
// Create execuatable file with; - | |
// go build analyse_csv.go | |
// | |
// Run on MacOS with; - | |
// ./analyse_csv data.csv | |
// | |
package main | |
import ( | |
"encoding/csv" | |
"flag" | |
"fmt" | |
"log" | |
"os" | |
"strconv" | |
"strings" | |
"gonum.org/v1/gonum/stat" | |
) | |
func parseCSVFile(filename string, hasHeader bool) ([][]float64, []string, error) { | |
file, err := os.Open(filename) | |
if err != nil { | |
return nil, nil, fmt.Errorf("could not open file: %v", err) | |
} | |
defer file.Close() | |
reader := csv.NewReader(file) | |
reader.TrimLeadingSpace = true | |
reader.Comment = '#' | |
lines, err := reader.ReadAll() | |
if err != nil { | |
return nil, nil, fmt.Errorf("could not read CSV: %v", err) | |
} | |
var data [][]float64 | |
var headers []string | |
for _, row := range lines { | |
if len(row) == 0 || strings.HasPrefix(strings.TrimSpace(row[0]), "#") { | |
continue | |
} | |
if hasHeader && len(headers) == 0 { | |
headers = row | |
continue | |
} | |
var floatRow []float64 | |
for _, field := range row { | |
field = strings.TrimSpace(strings.Split(field, "#")[0]) // remove inline comment | |
if field == "" { | |
continue | |
} | |
num, err := strconv.ParseFloat(field, 64) | |
if err != nil { | |
continue // skip non-numeric | |
} | |
floatRow = append(floatRow, num) | |
} | |
if len(floatRow) > 0 { | |
data = append(data, floatRow) | |
} | |
} | |
if len(headers) == 0 && len(data) > 0 { | |
// generate generic headers | |
for i := range data[0] { | |
headers = append(headers, fmt.Sprintf("Column %d", i+1)) | |
} | |
} | |
return transpose(data), headers, nil | |
} | |
func transpose(data [][]float64) [][]float64 { | |
if len(data) == 0 { | |
return nil | |
} | |
numCols := len(data[0]) | |
transposed := make([][]float64, numCols) | |
for i := range transposed { | |
transposed[i] = make([]float64, len(data)) | |
for j := range data { | |
if i < len(data[j]) { | |
transposed[i][j] = data[j][i] | |
} | |
} | |
} | |
return transposed | |
} | |
func analyzeColumn(col []float64, name string) { | |
mean := stat.Mean(col, nil) | |
circMean := stat.CircularMean(col, nil) | |
geomMean := stat.GeometricMean(col, nil) | |
harmMean := stat.HarmonicMean(col, nil) | |
variance := stat.Variance(col, nil) | |
stdDev := stat.StdDev(col, nil) | |
popStdDev := stat.PopStdDev(col, nil) | |
skew := stat.Skew(col, nil) | |
kurt := stat.ExKurtosis(col, nil) | |
min, max := col[0], col[0] | |
sum := 0.0 | |
for _, v := range col { | |
sum += v | |
if v < min { | |
min = v | |
} | |
if v > max { | |
max = v | |
} | |
} | |
mode, modeCnt := stat.Mode(col, nil) | |
stat.SortWeighted(col, nil) | |
median := stat.Quantile( 0.5, 1, col, nil) | |
quartile25 := stat.Quantile( 0.25, 1, col, nil) | |
quartile75 := stat.Quantile( 0.75, 1, col, nil) | |
fmt.Printf("Statistics for %s:\n", name) | |
fmt.Printf(" Row Count : %d\n", len(col)) | |
fmt.Printf(" Min : %.15e\n", min) | |
fmt.Printf(" 25%% Quartile : %.15e\n", quartile25) | |
fmt.Printf(" Median : %.15e\n", median) | |
fmt.Printf(" 75%% Quartile : %.15e\n", quartile75) | |
fmt.Printf(" Max : %.15e\n", max) | |
fmt.Printf(" Range : %.15e\n", max - min) | |
fmt.Printf(" Mode : %.15e (Count %.0f)\n", mode, modeCnt) | |
fmt.Printf(" Mean : %.15e\n", mean) | |
fmt.Printf(" Circular Mean : %.15e\n", circMean) | |
fmt.Printf(" Geometric Mean : %.15e\n", geomMean) | |
fmt.Printf(" Harmonic Mean : %.15e\n", harmMean) | |
fmt.Printf(" Sum : %.15e\n", sum) | |
fmt.Printf(" Variance : %.15e\n", variance) | |
fmt.Printf(" Std Deviation : %.15e\n", stdDev) | |
fmt.Printf(" Population Std Dev : %.15e\n", popStdDev) | |
fmt.Printf(" Skew : %.15e\n", skew) | |
fmt.Printf(" Kurtosis : %.15e\n", kurt) | |
fmt.Println() | |
} | |
func main() { | |
header := flag.Bool("header", false, "Treat first row as header") | |
quiet := flag.Bool("quiet", false, "Suppress terminal output") | |
flag.Parse() | |
files := flag.Args() | |
if len(files) == 0 { | |
log.Fatal("Please provide at least one CSV file") | |
} | |
for _, file := range files { | |
data, headers, err := parseCSVFile(file, *header) | |
if err != nil { | |
log.Printf("Skipping file %s: %v\n", file, err) | |
continue | |
} | |
if !*quiet { | |
fmt.Printf("\nAnalyzing file: %s\n", file) | |
fmt.Printf("Number of data columns read: %d\n", cap(data)) | |
fmt.Printf("Number of data rows read: %d\n", len(data[0])) | |
fmt.Println(strings.Repeat("-", 60)) | |
} | |
for i, col := range data { | |
if !*quiet { | |
analyzeColumn(col, headers[i]) | |
} | |
} | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment