lakshayg · June 19, 2024 18:25
diff --git a/five_letter_words.c b/five_letter_words.c
 //
 // A solution to the problem described in
 // https://www.youtube.com/watch?v=_-AfhLQfb6w
 //
 // How many sets of 5, 5-letter words exist such
 // that the set has 25 distinct characters?
 //
 // It takes ~8sec to run on my machine.
 //
 // Usage: ./a.out < words_alpha.txt
 //
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 #include <time.h>

 typedef uint_fast32_t Bitset;
 typedef size_t WordIndex;

 #define popcount(x)                                                                                                    \
    _Generic((x),                                                                                                      \
        unsigned int: __builtin_popcount,                                                                              \
        unsigned long: __builtin_popcountl,                                                                            \
        unsigned long long: __builtin_popcountll)(x)

 #define WORD_TABLE_CAPACITY 40000 // max number of 5 letter words
 #define LINE_CAPACITY 64          // max number of chars allowed per line in words list (including newline)
 #define WORD_LEN 5

 // Select those elements from `candidates` whose bitset does not intersect with `hash`
 size_t filter_words(WordIndex *restrict out,        // Output array
                    Bitset *restrict word_hash,     // Word index -> bitset
                    WordIndex *restrict candidates, // Word indices to choose from. If null, use [begin, end)
                    size_t candidates_begin,        //
                    size_t candidates_end,          //
                    Bitset hash                     // Output word bitsets should not intersect with this bitset
 )
 {
    size_t out_size = 0;
 #pragma unroll
    for (size_t i = candidates_begin; i < candidates_end; ++i)
    {
        WordIndex candidate = candidates ? candidates[i] : i;
        if ((word_hash[candidate] & hash) == 0)
        {
            out[out_size++] = candidate;
        }
    }
    return out_size;
 }

 int main()
 {
    struct timespec start_time;
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_time);

    // We store words made of 5 distinct characters in `word_list`.
    // word_hash contains bitsets representing the chars in each word.
    WordIndex word_table_size = 0;
    char word_list[WORD_TABLE_CAPACITY][WORD_LEN];
    Bitset word_hash[WORD_TABLE_CAPACITY];

    while (1)
    {
        // Read directly into the word table.
        // Will be overwritten later if word is rejected
        char *line = word_list[word_table_size];

        // if nothing was read, exit the loop
        if (!fgets(line, LINE_CAPACITY, stdin))
        {
            break;
        }

        // Reject words with length != WORD_LEN
        if (strcspn(line, "\r\n") != WORD_LEN)
        {
            continue;
        }

        Bitset ONE = 1;
        Bitset hash = 0;
        for (size_t i = 0; i < WORD_LEN; ++i)
        {
            hash = hash | (ONE << (line[i] - 'a'));
        }

        // Reject words with repeated chars
        if (popcount(hash) != WORD_LEN)
        {
            continue;
        }

        word_hash[word_table_size] = hash;
        line = word_list[++word_table_size];
    }

    WordIndex mem[WORD_TABLE_CAPACITY]; // buffer for storing intermediate results

    for (size_t ai = 0; ai < word_table_size; ++ai)
    {
        const WordIndex a = ai;
        const Bitset hash_a = word_hash[a];
        WordIndex *table_b = mem; // words compatible with {a}
        const size_t table_b_size = filter_words(table_b, word_hash, NULL, ai + 1, word_table_size, hash_a);

        for (size_t bi = 0; bi < table_b_size; ++bi)
        {
            const WordIndex b = table_b[bi];
            const Bitset hash_b = hash_a | word_hash[b];
            WordIndex *table_c = table_b + table_b_size; // words compatible with {a, b}
            const size_t table_c_size = filter_words(table_c, word_hash, table_b, bi + 1, table_b_size, hash_b);

            for (size_t ci = 0; ci < table_c_size; ++ci)
            {
                const WordIndex c = table_c[ci];
                const Bitset hash_c = hash_b | word_hash[c];
                WordIndex *table_d = table_c + table_c_size; // words compatible with {a, b, c}
                const size_t table_d_size = filter_words(table_d, word_hash, table_c, ci + 1, table_c_size, hash_c);

                for (size_t di = 0; di < table_d_size; ++di)
                {
                    const WordIndex d = table_d[di];
                    const Bitset hash_d = hash_c | word_hash[d];
                    WordIndex *table_e = table_d + table_d_size; // words compatible with {a, b, c, d}
                    const size_t table_e_size = filter_words(table_e, word_hash, table_d, di + 1, table_d_size, hash_d);

                    for (size_t ei = 0; ei < table_e_size; ++ei)
                    {
                        const WordIndex e = table_e[ei];
                        struct timespec now;
                        clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &now);
                        double elapsed = (now.tv_sec - start_time.tv_sec) + 1e-9 * (now.tv_nsec - start_time.tv_nsec);
                        // clang-format off
                        printf("%.*s %.*s %.*s %.*s %.*s    %% elapsed-sec=%.3lf, mem-size=%lu\n",
                                WORD_LEN, word_list[a],
                                WORD_LEN, word_list[b],
                                WORD_LEN, word_list[c],
                                WORD_LEN, word_list[d],
                                WORD_LEN, word_list[e],
                                elapsed,
                                table_b_size + table_c_size + table_d_size);
                        // clang-format on
                    }
                }
            }
        }
    }

    return 0;
 }
	//
	// A solution to the problem described in
	// https://www.youtube.com/watch?v=_-AfhLQfb6w
	//
	// How many sets of 5, 5-letter words exist such
	// that the set has 25 distinct characters?
	//
	// It takes ~8sec to run on my machine.
	//
	// Usage: ./a.out < words_alpha.txt
	//
	#include <stdint.h>
	#include <stdio.h>
	#include <string.h>
	#include <time.h>

	typedef uint_fast32_t Bitset;
	typedef size_t WordIndex;

	#define popcount(x) \
	_Generic((x), \
	unsigned int: __builtin_popcount, \
	unsigned long: __builtin_popcountl, \
	unsigned long long: __builtin_popcountll)(x)

	#define WORD_TABLE_CAPACITY 40000 // max number of 5 letter words
	#define LINE_CAPACITY 64 // max number of chars allowed per line in words list (including newline)
	#define WORD_LEN 5

	// Select those elements from `candidates` whose bitset does not intersect with `hash`
	size_t filter_words(WordIndex *restrict out, // Output array
	Bitset *restrict word_hash, // Word index -> bitset
	WordIndex *restrict candidates, // Word indices to choose from. If null, use [begin, end)
	size_t candidates_begin, //
	size_t candidates_end, //
	Bitset hash // Output word bitsets should not intersect with this bitset
	)
	{
	size_t out_size = 0;
	#pragma unroll
	for (size_t i = candidates_begin; i < candidates_end; ++i)
	{
	WordIndex candidate = candidates ? candidates[i] : i;
	if ((word_hash[candidate] & hash) == 0)
	{
	out[out_size++] = candidate;
	}
	}
	return out_size;
	}

	int main()
	{
	struct timespec start_time;
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start_time);

	// We store words made of 5 distinct characters in `word_list`.
	// word_hash contains bitsets representing the chars in each word.
	WordIndex word_table_size = 0;
	char word_list[WORD_TABLE_CAPACITY][WORD_LEN];
	Bitset word_hash[WORD_TABLE_CAPACITY];

	while (1)
	{
	// Read directly into the word table.
	// Will be overwritten later if word is rejected
	char *line = word_list[word_table_size];

	// if nothing was read, exit the loop
	if (!fgets(line, LINE_CAPACITY, stdin))
	{
	break;
	}

	// Reject words with length != WORD_LEN
	if (strcspn(line, "\r\n") != WORD_LEN)
	{
	continue;
	}

	Bitset ONE = 1;
	Bitset hash = 0;
	for (size_t i = 0; i < WORD_LEN; ++i)
	{
	hash = hash \| (ONE << (line[i] - 'a'));
	}

	// Reject words with repeated chars
	if (popcount(hash) != WORD_LEN)
	{
	continue;
	}

	word_hash[word_table_size] = hash;
	line = word_list[++word_table_size];
	}

	WordIndex mem[WORD_TABLE_CAPACITY]; // buffer for storing intermediate results

	for (size_t ai = 0; ai < word_table_size; ++ai)
	{
	const WordIndex a = ai;
	const Bitset hash_a = word_hash[a];
	WordIndex *table_b = mem; // words compatible with {a}
	const size_t table_b_size = filter_words(table_b, word_hash, NULL, ai + 1, word_table_size, hash_a);

	for (size_t bi = 0; bi < table_b_size; ++bi)
	{
	const WordIndex b = table_b[bi];
	const Bitset hash_b = hash_a \| word_hash[b];
	WordIndex *table_c = table_b + table_b_size; // words compatible with {a, b}
	const size_t table_c_size = filter_words(table_c, word_hash, table_b, bi + 1, table_b_size, hash_b);

	for (size_t ci = 0; ci < table_c_size; ++ci)
	{
	const WordIndex c = table_c[ci];
	const Bitset hash_c = hash_b \| word_hash[c];
	WordIndex *table_d = table_c + table_c_size; // words compatible with {a, b, c}
	const size_t table_d_size = filter_words(table_d, word_hash, table_c, ci + 1, table_c_size, hash_c);

	for (size_t di = 0; di < table_d_size; ++di)
	{
	const WordIndex d = table_d[di];
	const Bitset hash_d = hash_c \| word_hash[d];
	WordIndex *table_e = table_d + table_d_size; // words compatible with {a, b, c, d}
	const size_t table_e_size = filter_words(table_e, word_hash, table_d, di + 1, table_d_size, hash_d);

	for (size_t ei = 0; ei < table_e_size; ++ei)
	{
	const WordIndex e = table_e[ei];
	struct timespec now;
	clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &now);
	double elapsed = (now.tv_sec - start_time.tv_sec) + 1e-9 * (now.tv_nsec - start_time.tv_nsec);
	// clang-format off
	printf("%.s %.s %.s %.s %.*s %% elapsed-sec=%.3lf, mem-size=%lu\n",
	WORD_LEN, word_list[a],
	WORD_LEN, word_list[b],
	WORD_LEN, word_list[c],
	WORD_LEN, word_list[d],
	WORD_LEN, word_list[e],
	elapsed,
	table_b_size + table_c_size + table_d_size);
	// clang-format on
	}
	}
	}
	}
	}

	return 0;
	}