Skip to content

Instantly share code, notes, and snippets.

@lakshayg
Last active June 19, 2024 18:25
Show Gist options
  • Save lakshayg/ba280a0fa6619ae14a7934d8c4a7321a to your computer and use it in GitHub Desktop.
Save lakshayg/ba280a0fa6619ae14a7934d8c4a7321a to your computer and use it in GitHub Desktop.
//
// A solution to the problem described in
// https://www.youtube.com/watch?v=_-AfhLQfb6w
//
// How many sets of 5, 5-letter words exist such
// that the set has 25 distinct characters?
//
// It takes ~8sec to run on my machine.
//
// Usage: ./a.out < words_alpha.txt
//
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
typedef uint_fast32_t Bitset;
typedef size_t WordIndex;
#define popcount(x) \
_Generic((x), \
unsigned int: __builtin_popcount, \
unsigned long: __builtin_popcountl, \
unsigned long long: __builtin_popcountll)(x)
#define WORD_TABLE_CAPACITY 40000 // max number of 5 letter words
#define LINE_CAPACITY 64 // max number of chars allowed per line in words list (including newline)
#define WORD_LEN 5
// Select those elements from `candidates` whose bitset does not intersect with `hash`
size_t filter_words(WordIndex *restrict out, // Output array
Bitset *restrict word_hash, // Word index -> bitset
WordIndex *restrict candidates, // Word indices to choose from. If null, use [begin, end)
size_t candidates_begin, //
size_t candidates_end, //
Bitset hash // Output word bitsets should not intersect with this bitset
)
{
size_t out_size = 0;
#pragma unroll
for (size_t i = candidates_begin; i < candidates_end; ++i)
{
WordIndex candidate = candidates ? candidates[i] : i;
if ((word_hash[candidate] & hash) == 0)
{
out[out_size++] = candidate;
}
}
return out_size;
}
int main()
{
struct timespec start_time;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_time);
// We store words made of 5 distinct characters in `word_list`.
// word_hash contains bitsets representing the chars in each word.
WordIndex word_table_size = 0;
char word_list[WORD_TABLE_CAPACITY][WORD_LEN];
Bitset word_hash[WORD_TABLE_CAPACITY];
while (1)
{
// Read directly into the word table.
// Will be overwritten later if word is rejected
char *line = word_list[word_table_size];
// if nothing was read, exit the loop
if (!fgets(line, LINE_CAPACITY, stdin))
{
break;
}
// Reject words with length != WORD_LEN
if (strcspn(line, "\r\n") != WORD_LEN)
{
continue;
}
Bitset ONE = 1;
Bitset hash = 0;
for (size_t i = 0; i < WORD_LEN; ++i)
{
hash = hash | (ONE << (line[i] - 'a'));
}
// Reject words with repeated chars
if (popcount(hash) != WORD_LEN)
{
continue;
}
word_hash[word_table_size] = hash;
line = word_list[++word_table_size];
}
WordIndex mem[WORD_TABLE_CAPACITY]; // buffer for storing intermediate results
for (size_t ai = 0; ai < word_table_size; ++ai)
{
const WordIndex a = ai;
const Bitset hash_a = word_hash[a];
WordIndex *table_b = mem; // words compatible with {a}
const size_t table_b_size = filter_words(table_b, word_hash, NULL, ai + 1, word_table_size, hash_a);
for (size_t bi = 0; bi < table_b_size; ++bi)
{
const WordIndex b = table_b[bi];
const Bitset hash_b = hash_a | word_hash[b];
WordIndex *table_c = table_b + table_b_size; // words compatible with {a, b}
const size_t table_c_size = filter_words(table_c, word_hash, table_b, bi + 1, table_b_size, hash_b);
for (size_t ci = 0; ci < table_c_size; ++ci)
{
const WordIndex c = table_c[ci];
const Bitset hash_c = hash_b | word_hash[c];
WordIndex *table_d = table_c + table_c_size; // words compatible with {a, b, c}
const size_t table_d_size = filter_words(table_d, word_hash, table_c, ci + 1, table_c_size, hash_c);
for (size_t di = 0; di < table_d_size; ++di)
{
const WordIndex d = table_d[di];
const Bitset hash_d = hash_c | word_hash[d];
WordIndex *table_e = table_d + table_d_size; // words compatible with {a, b, c, d}
const size_t table_e_size = filter_words(table_e, word_hash, table_d, di + 1, table_d_size, hash_d);
for (size_t ei = 0; ei < table_e_size; ++ei)
{
const WordIndex e = table_e[ei];
struct timespec now;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &now);
double elapsed = (now.tv_sec - start_time.tv_sec) + 1e-9 * (now.tv_nsec - start_time.tv_nsec);
// clang-format off
printf("%.*s %.*s %.*s %.*s %.*s %% elapsed-sec=%.3lf, mem-size=%lu\n",
WORD_LEN, word_list[a],
WORD_LEN, word_list[b],
WORD_LEN, word_list[c],
WORD_LEN, word_list[d],
WORD_LEN, word_list[e],
elapsed,
table_b_size + table_c_size + table_d_size);
// clang-format on
}
}
}
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment