Last active
June 19, 2024 18:25
-
-
Save lakshayg/ba280a0fa6619ae14a7934d8c4a7321a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// A solution to the problem described in | |
// https://www.youtube.com/watch?v=_-AfhLQfb6w | |
// | |
// How many sets of 5, 5-letter words exist such | |
// that the set has 25 distinct characters? | |
// | |
// It takes ~8sec to run on my machine. | |
// | |
// Usage: ./a.out < words_alpha.txt | |
// | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <time.h> | |
typedef uint_fast32_t Bitset; | |
typedef size_t WordIndex; | |
#define popcount(x) \ | |
_Generic((x), \ | |
unsigned int: __builtin_popcount, \ | |
unsigned long: __builtin_popcountl, \ | |
unsigned long long: __builtin_popcountll)(x) | |
#define WORD_TABLE_CAPACITY 40000 // max number of 5 letter words | |
#define LINE_CAPACITY 64 // max number of chars allowed per line in words list (including newline) | |
#define WORD_LEN 5 | |
// Select those elements from `candidates` whose bitset does not intersect with `hash` | |
size_t filter_words(WordIndex *restrict out, // Output array | |
Bitset *restrict word_hash, // Word index -> bitset | |
WordIndex *restrict candidates, // Word indices to choose from. If null, use [begin, end) | |
size_t candidates_begin, // | |
size_t candidates_end, // | |
Bitset hash // Output word bitsets should not intersect with this bitset | |
) | |
{ | |
size_t out_size = 0; | |
#pragma unroll | |
for (size_t i = candidates_begin; i < candidates_end; ++i) | |
{ | |
WordIndex candidate = candidates ? candidates[i] : i; | |
if ((word_hash[candidate] & hash) == 0) | |
{ | |
out[out_size++] = candidate; | |
} | |
} | |
return out_size; | |
} | |
int main() | |
{ | |
struct timespec start_time; | |
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_time); | |
// We store words made of 5 distinct characters in `word_list`. | |
// word_hash contains bitsets representing the chars in each word. | |
WordIndex word_table_size = 0; | |
char word_list[WORD_TABLE_CAPACITY][WORD_LEN]; | |
Bitset word_hash[WORD_TABLE_CAPACITY]; | |
while (1) | |
{ | |
// Read directly into the word table. | |
// Will be overwritten later if word is rejected | |
char *line = word_list[word_table_size]; | |
// if nothing was read, exit the loop | |
if (!fgets(line, LINE_CAPACITY, stdin)) | |
{ | |
break; | |
} | |
// Reject words with length != WORD_LEN | |
if (strcspn(line, "\r\n") != WORD_LEN) | |
{ | |
continue; | |
} | |
Bitset ONE = 1; | |
Bitset hash = 0; | |
for (size_t i = 0; i < WORD_LEN; ++i) | |
{ | |
hash = hash | (ONE << (line[i] - 'a')); | |
} | |
// Reject words with repeated chars | |
if (popcount(hash) != WORD_LEN) | |
{ | |
continue; | |
} | |
word_hash[word_table_size] = hash; | |
line = word_list[++word_table_size]; | |
} | |
WordIndex mem[WORD_TABLE_CAPACITY]; // buffer for storing intermediate results | |
for (size_t ai = 0; ai < word_table_size; ++ai) | |
{ | |
const WordIndex a = ai; | |
const Bitset hash_a = word_hash[a]; | |
WordIndex *table_b = mem; // words compatible with {a} | |
const size_t table_b_size = filter_words(table_b, word_hash, NULL, ai + 1, word_table_size, hash_a); | |
for (size_t bi = 0; bi < table_b_size; ++bi) | |
{ | |
const WordIndex b = table_b[bi]; | |
const Bitset hash_b = hash_a | word_hash[b]; | |
WordIndex *table_c = table_b + table_b_size; // words compatible with {a, b} | |
const size_t table_c_size = filter_words(table_c, word_hash, table_b, bi + 1, table_b_size, hash_b); | |
for (size_t ci = 0; ci < table_c_size; ++ci) | |
{ | |
const WordIndex c = table_c[ci]; | |
const Bitset hash_c = hash_b | word_hash[c]; | |
WordIndex *table_d = table_c + table_c_size; // words compatible with {a, b, c} | |
const size_t table_d_size = filter_words(table_d, word_hash, table_c, ci + 1, table_c_size, hash_c); | |
for (size_t di = 0; di < table_d_size; ++di) | |
{ | |
const WordIndex d = table_d[di]; | |
const Bitset hash_d = hash_c | word_hash[d]; | |
WordIndex *table_e = table_d + table_d_size; // words compatible with {a, b, c, d} | |
const size_t table_e_size = filter_words(table_e, word_hash, table_d, di + 1, table_d_size, hash_d); | |
for (size_t ei = 0; ei < table_e_size; ++ei) | |
{ | |
const WordIndex e = table_e[ei]; | |
struct timespec now; | |
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &now); | |
double elapsed = (now.tv_sec - start_time.tv_sec) + 1e-9 * (now.tv_nsec - start_time.tv_nsec); | |
// clang-format off | |
printf("%.*s %.*s %.*s %.*s %.*s %% elapsed-sec=%.3lf, mem-size=%lu\n", | |
WORD_LEN, word_list[a], | |
WORD_LEN, word_list[b], | |
WORD_LEN, word_list[c], | |
WORD_LEN, word_list[d], | |
WORD_LEN, word_list[e], | |
elapsed, | |
table_b_size + table_c_size + table_d_size); | |
// clang-format on | |
} | |
} | |
} | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment