Skip to content

Instantly share code, notes, and snippets.

@lambdalisue
Last active August 27, 2024 10:10
Show Gist options
  • Save lambdalisue/22fe05493d4a3a6c34651959e62ebb2f to your computer and use it in GitHub Desktop.
Save lambdalisue/22fe05493d4a3a6c34651959e62ebb2f to your computer and use it in GitHub Desktop.
English word length frequency
import { exists } from "jsr:@std/fs";
import { map, reduce } from "jsr:@core/iterutil";
const sourceUrl =
"https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt";
const filename = "words_alpha.txt";
if (!(await exists(filename))) {
const resp = await fetch(sourceUrl);
await Deno.writeFile(filename, resp.body!);
}
const content = await Deno.readTextFile(filename);
const samples = content
.split("\n")
.filter((v) => v)
.map((v) => v.trim().length);
function sum(value: number[]): number {
return value.reduce((a, v) => a + v);
}
function mean(value: number[]): number {
return sum(value) / value.length;
}
function mode(value: number[]): number {
const counter = map(
Map.groupBy(value, (v) => v).entries(),
([n, vs]) => [n, vs.length] as const,
);
return reduce(counter, (a, v) => a[1] < v[1] ? v : a)![0];
}
function median(value: number[]): number {
const freq = value.length;
if (freq % 2 === 0) {
const mid = freq / 2;
return (value[mid - 1] + value[mid]) / 2;
} else {
const mid = (freq - 1) / 2;
return value[mid];
}
}
function quantile(value: number[]): [q1: number, q2: number, q3: number] {
const freq = value.length;
if (freq % 2 === 0) {
const mid = freq / 2;
const head = value.slice(0, mid);
const tail = value.slice(mid);
return [median(head), median(value), median(tail)];
} else {
const mid = (freq - 1) / 2;
const head = value.slice(0, mid);
const tail = value.slice(mid + 1);
return [median(head), median(value), median(tail)];
}
}
console.log(`Frequency: ${samples.length}`);
console.log(`Mean: ${mean(samples)}`);
console.log(`Mode: ${mode(samples)}`);
console.log(`Median: ${median(samples)}`);
console.log(`Quantile: ${quantile(samples)}`);
const counter = map(
Map.groupBy(samples, (v) => v).entries(),
([n, vs]) => [n, vs.length] as const,
);
console.log("Data:");
for (
const [n, v] of [...counter].sort(([a], [b]) => a === b ? 0 : a > b ? 1 : -1)
) {
console.log(`${n.toString().padStart(2)}, ${v}`);
}
@lambdalisue
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment