Last active
December 20, 2025 17:42
-
-
Save skeeto/3fe6c9bfebf86d2fbc3f975f16973746 to your computer and use it in GitHub Desktop.
Baseline for a benchmark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // https://old.reddit.com/r/C_Programming/comments/1pr881z/ | |
| // $ cc -std=gnu23 -O -o baseline baseline.c | |
| #include <assert.h> | |
| #include <stddef.h> | |
| #include <stdint.h> | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <unistd.h> | |
| #define new(a, n, t) (t *)alloc(a, n, sizeof(t), _Alignof(t)) | |
| typedef struct { char *beg, *end; } Arena; | |
| static char *alloc(Arena *a, ptrdiff_t count, int size, int align) | |
| { | |
| ptrdiff_t pad = (ptrdiff_t)-(uintptr_t)a->beg & (align - 1); | |
| assert(count < (a->end - a->beg - pad)/size); // TODO: oom handling | |
| char *r = a->beg + pad; | |
| a->beg += pad + count*size; | |
| return memset(r, 0, (size_t)(count*size)); | |
| } | |
| typedef struct { | |
| char *data; | |
| ptrdiff_t len; | |
| } Str; | |
| static Str clone(Arena *a, Str s) | |
| { | |
| Str r = s; | |
| r.data = new(a, r.len, char); | |
| memcpy(r.data, s.data, (size_t)r.len); | |
| return r; | |
| } | |
| static bool equals(Str a, Str b) | |
| { | |
| return a.len==b.len && !memcmp(a.data, b.data, (size_t)a.len); | |
| } | |
| static ptrdiff_t compare(Str a, Str b) | |
| { | |
| ptrdiff_t len = a.len<b.len ? a.len : b.len; | |
| for (ptrdiff_t i = 0; i < len; i++) { | |
| if (a.data[i] != b.data[i]) { | |
| return (a.data[i]&255) - (b.data[i]&255); | |
| } | |
| } | |
| return a.len - b.len; | |
| } | |
| enum { MAPEXP = 3 }; | |
| typedef struct Map Map; | |
| struct Map { | |
| int64_t count; | |
| Str key; | |
| Map *child[1<<MAPEXP]; | |
| Map *next; | |
| }; | |
| static uint64_t hash(Str s) | |
| { | |
| uint64_t r = 0x100; | |
| for (ptrdiff_t i = 0; i < s.len; i++) { | |
| r ^= s.data[i] & 255; | |
| r *= 1111111111111111111u; | |
| } | |
| return r; | |
| } | |
| static Map *upsert(Map **m, Str key, Arena *a) | |
| { | |
| for (uint64_t h = hash(key); *m; h <<= MAPEXP) { | |
| if (equals(key, (*m)->key)) { | |
| return *m; | |
| } | |
| m = &(*m)->child[h>>(64 - MAPEXP)]; | |
| } | |
| *m = new(a, 1, Map); | |
| (*m)->key = clone(a, key); | |
| return *m; | |
| } | |
| static ptrdiff_t descending(Map *a, Map *b) | |
| { | |
| if (a->count < b->count) { | |
| return 1; | |
| } else if (a->count > b->count) { | |
| return -1; | |
| } | |
| return compare(a->key, b->key); | |
| } | |
| static Map *sort(Map *head) | |
| { | |
| if (!head || !head->next) { | |
| return head; | |
| } | |
| ptrdiff_t len = 0; | |
| Map *tail = head; | |
| Map *last = head; | |
| for (Map *m = head; m; m = m->next, len++) { | |
| if (len & 1) { | |
| last = tail; | |
| tail = tail->next; | |
| } | |
| } | |
| last->next = 0; | |
| head = sort(head); | |
| tail = sort(tail); | |
| Map *rhead = 0; | |
| Map **rtail = &rhead; | |
| while (head && tail) { | |
| if (descending(head, tail) < 0) { | |
| *rtail = head; | |
| head = head->next; | |
| } else { | |
| *rtail = tail; | |
| tail = tail->next; | |
| } | |
| rtail = &(*rtail)->next; | |
| } | |
| *rtail = head ? head : tail; | |
| return rhead; | |
| } | |
| static bool wordchar(int c) | |
| { | |
| return | |
| (c>='A' && c<='Z') || | |
| (c>='a' && c<='z') || | |
| c == '\'' || c>127; | |
| } | |
| static char downcase(int c) | |
| { | |
| if (c>='A' && c<='Z') { | |
| return (char)(c - ('A' - 'a')); | |
| } | |
| return (char)c; | |
| } | |
| typedef struct { | |
| int off; | |
| int len; | |
| char buf[1<<14]; | |
| } Input; | |
| static int get(Input *b) | |
| { | |
| assert(b->len >= 0); | |
| if (b->off == b->len) { | |
| b->off = 0; | |
| b->len = (int)read(0, b->buf, sizeof(b->buf)); | |
| if (b->len < 1) { | |
| return -1; | |
| } | |
| } | |
| return b->buf[b->off++] & 255; | |
| } | |
| int main() | |
| { | |
| ptrdiff_t cap = (ptrdiff_t)1<<38; | |
| char *mem = 0; | |
| for (; !mem; cap >>= 1) { | |
| mem = malloc((size_t)cap); | |
| } | |
| Arena a = {mem, mem+cap}; | |
| char buf[100] = {}; | |
| int len = 0; | |
| Map *counts = 0; | |
| Map *head = 0; | |
| Map **tail = &head; | |
| Input *b = new(&a, 1, Input); | |
| enum { OUTSIDE, INSIDE, IGNORE } state = OUTSIDE; | |
| for (int c = 0; c >= 0;) { | |
| c = get(b); | |
| if (wordchar(c)) { | |
| switch (state) { | |
| case OUTSIDE: | |
| state = INSIDE; | |
| len = 0; | |
| // fallthrough | |
| case INSIDE: | |
| if (len == (int)sizeof(buf)) { | |
| state = IGNORE; // too long to be a "word" | |
| } else { | |
| buf[len++] = downcase(c); | |
| } | |
| break; | |
| case IGNORE: | |
| break; | |
| } | |
| } else { | |
| switch (state) { | |
| case INSIDE: | |
| Str word = (Str){buf, len}; | |
| Map *entry = upsert(&counts, word, &a); | |
| if (++entry->count == 1) { | |
| *tail = entry; | |
| tail = &entry->next; | |
| } | |
| state = OUTSIDE; | |
| break; | |
| case OUTSIDE: | |
| break; | |
| case IGNORE: | |
| state = OUTSIDE; | |
| break; | |
| } | |
| } | |
| } | |
| Map *m = sort(head); | |
| for (int i = 0; i<25 && m; i++) { | |
| printf( | |
| "%.*s\t%lld\n", | |
| (int)m->key.len, m->key.data, | |
| (long long)m->count | |
| ); | |
| m = m->next; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment