Skip to content

Instantly share code, notes, and snippets.

@skeeto
Last active December 20, 2025 17:42
Show Gist options
  • Select an option

  • Save skeeto/3fe6c9bfebf86d2fbc3f975f16973746 to your computer and use it in GitHub Desktop.

Select an option

Save skeeto/3fe6c9bfebf86d2fbc3f975f16973746 to your computer and use it in GitHub Desktop.
Baseline for a benchmark
// https://old.reddit.com/r/C_Programming/comments/1pr881z/
// $ cc -std=gnu23 -O -o baseline baseline.c
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define new(a, n, t) (t *)alloc(a, n, sizeof(t), _Alignof(t))
typedef struct { char *beg, *end; } Arena;
static char *alloc(Arena *a, ptrdiff_t count, int size, int align)
{
ptrdiff_t pad = (ptrdiff_t)-(uintptr_t)a->beg & (align - 1);
assert(count < (a->end - a->beg - pad)/size); // TODO: oom handling
char *r = a->beg + pad;
a->beg += pad + count*size;
return memset(r, 0, (size_t)(count*size));
}
typedef struct {
char *data;
ptrdiff_t len;
} Str;
static Str clone(Arena *a, Str s)
{
Str r = s;
r.data = new(a, r.len, char);
memcpy(r.data, s.data, (size_t)r.len);
return r;
}
static bool equals(Str a, Str b)
{
return a.len==b.len && !memcmp(a.data, b.data, (size_t)a.len);
}
static ptrdiff_t compare(Str a, Str b)
{
ptrdiff_t len = a.len<b.len ? a.len : b.len;
for (ptrdiff_t i = 0; i < len; i++) {
if (a.data[i] != b.data[i]) {
return (a.data[i]&255) - (b.data[i]&255);
}
}
return a.len - b.len;
}
enum { MAPEXP = 3 };
typedef struct Map Map;
struct Map {
int64_t count;
Str key;
Map *child[1<<MAPEXP];
Map *next;
};
static uint64_t hash(Str s)
{
uint64_t r = 0x100;
for (ptrdiff_t i = 0; i < s.len; i++) {
r ^= s.data[i] & 255;
r *= 1111111111111111111u;
}
return r;
}
static Map *upsert(Map **m, Str key, Arena *a)
{
for (uint64_t h = hash(key); *m; h <<= MAPEXP) {
if (equals(key, (*m)->key)) {
return *m;
}
m = &(*m)->child[h>>(64 - MAPEXP)];
}
*m = new(a, 1, Map);
(*m)->key = clone(a, key);
return *m;
}
static ptrdiff_t descending(Map *a, Map *b)
{
if (a->count < b->count) {
return 1;
} else if (a->count > b->count) {
return -1;
}
return compare(a->key, b->key);
}
static Map *sort(Map *head)
{
if (!head || !head->next) {
return head;
}
ptrdiff_t len = 0;
Map *tail = head;
Map *last = head;
for (Map *m = head; m; m = m->next, len++) {
if (len & 1) {
last = tail;
tail = tail->next;
}
}
last->next = 0;
head = sort(head);
tail = sort(tail);
Map *rhead = 0;
Map **rtail = &rhead;
while (head && tail) {
if (descending(head, tail) < 0) {
*rtail = head;
head = head->next;
} else {
*rtail = tail;
tail = tail->next;
}
rtail = &(*rtail)->next;
}
*rtail = head ? head : tail;
return rhead;
}
static bool wordchar(int c)
{
return
(c>='A' && c<='Z') ||
(c>='a' && c<='z') ||
c == '\'' || c>127;
}
static char downcase(int c)
{
if (c>='A' && c<='Z') {
return (char)(c - ('A' - 'a'));
}
return (char)c;
}
typedef struct {
int off;
int len;
char buf[1<<14];
} Input;
static int get(Input *b)
{
assert(b->len >= 0);
if (b->off == b->len) {
b->off = 0;
b->len = (int)read(0, b->buf, sizeof(b->buf));
if (b->len < 1) {
return -1;
}
}
return b->buf[b->off++] & 255;
}
int main()
{
ptrdiff_t cap = (ptrdiff_t)1<<38;
char *mem = 0;
for (; !mem; cap >>= 1) {
mem = malloc((size_t)cap);
}
Arena a = {mem, mem+cap};
char buf[100] = {};
int len = 0;
Map *counts = 0;
Map *head = 0;
Map **tail = &head;
Input *b = new(&a, 1, Input);
enum { OUTSIDE, INSIDE, IGNORE } state = OUTSIDE;
for (int c = 0; c >= 0;) {
c = get(b);
if (wordchar(c)) {
switch (state) {
case OUTSIDE:
state = INSIDE;
len = 0;
// fallthrough
case INSIDE:
if (len == (int)sizeof(buf)) {
state = IGNORE; // too long to be a "word"
} else {
buf[len++] = downcase(c);
}
break;
case IGNORE:
break;
}
} else {
switch (state) {
case INSIDE:
Str word = (Str){buf, len};
Map *entry = upsert(&counts, word, &a);
if (++entry->count == 1) {
*tail = entry;
tail = &entry->next;
}
state = OUTSIDE;
break;
case OUTSIDE:
break;
case IGNORE:
state = OUTSIDE;
break;
}
}
}
Map *m = sort(head);
for (int i = 0; i<25 && m; i++) {
printf(
"%.*s\t%lld\n",
(int)m->key.len, m->key.data,
(long long)m->count
);
m = m->next;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment