-
-
Save nixon/8087581 to your computer and use it in GitHub Desktop.
add median absolute deviation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/awk -f | |
# based on: | |
# https://gist.github.com/mikewallace1979/3973059 | |
# Quick hacky script that computes basic statistics from a single | |
# column of numbers from stdin - inspired by the CouchDB _stats reducer | |
function median(arr, sorted, n, _m) { | |
n = asort(arr, sorted) | |
if (n % 2) { | |
_m = sorted[int(n/2) + 1] | |
} else { | |
_m = (sorted[n/2] + sorted[n/2 + 1]) / 2 | |
} | |
return _m | |
} | |
function median_absolute_deviation(arr, _median, _m, len, i) { | |
# https://en.wikipedia.org/wiki/Median_absolute_deviation | |
len = 0 | |
for(i in arr) { | |
dif = arr[i] - _median | |
if (dif < 0) dif = -dif # abs() | |
_m[len++] = dif | |
} | |
return median(_m) | |
} | |
# asort() is only available in gawk | |
BEGIN { if (PROCINFO["version"]) want_median=1 } | |
# skip lines that arent numbers | |
$1 !~ /^-?[0-9]*\.?[0-9]+$/ { | |
#print "skipping: \"" $1 "\""; | |
next | |
} | |
NR == 1 || $1 < min { min = $1 } | |
NR == 1 || $1 > max { max = $1 } | |
want_median { line[NR] = $1; } | |
{ sum += $1 ; sumsq += $1 ^ 2 } | |
END { | |
# Stolen from http://www.commandlinefu.com/commands/view/1661/display-the-standard-deviation-of-a-column-of-numbers-with-awk | |
if (NR > 0) { | |
mean = sum/NR | |
stddev = sqrt(sumsq/NR - mean^2) | |
} | |
print "sum:", sum | |
print "count:", NR | |
print "min:", min | |
print "max:", max | |
print "mean:", mean | |
print "standard deviation:", stddev | |
if (want_median) { | |
m = median(line) | |
mad = median_absolute_deviation(line, m) | |
print "median:", m | |
print "mad:", mad | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for sharing -- I'm adding your idea of skipping lines that aren't numbers to my AWK stats tool. See Num at https://github.com/numcommand/num