Last active
January 5, 2023 14:39
-
-
Save hdf/ef8f06726c7b1781ed351e5640770c9c to your computer and use it in GitHub Desktop.
Crossword puzzle solving helpers.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Usage: | |
# --- | |
# python combine2len.py [total_len [num_num_spaces ['words.txt' ['banned.txt']]]] | |
from itertools import combinations | |
from pprint import pprint | |
from sys import argv | |
def get_combinations(strings, num_strings, total_length): | |
result = [] | |
for combination in combinations(strings, num_strings): | |
if sum(len(s) for s in combination) == total_length: | |
result.append(frozenset(combination)) | |
return set(result) | |
if __name__ == '__main__': | |
total_len = int(argv[1]) if len(argv) > 1 and int(argv[1]) > 1 else 9 | |
num_spaces = int(argv[2]) if len(argv) > 2 and int(argv[2]) in range(4) else 1 # 0-3 | |
if len(argv) > 3: | |
with open(argv[3], 'rt') as f: | |
words = set([l.strip() for l in f]) | |
else: | |
words = set(['EGYEDÜL', 'A', 'KÁR', 'TE', 'DOHÁNYBOLT', 'VAGY', 'FÉNYEK', 'KICSINY', 'BÁL', 'VAGY', 'ELSŐ', 'AZ', 'MÉLABÚ', 'VARÁZSHEGYEN', 'KÉSŐ', 'NEKEM', 'A', 'JEL', 'HÁZ', 'MÁR', 'DARABOT', 'LE', 'VÁLUNK', 'HELYEM', 'KÉNE', 'CSÚSSZ', 'A', 'TUDOM', 'AZ', 'ÉVAD', 'MOST', 'NEM', 'SZÍVEMBŐL', 'KEZDENÜNK', 'SZÉP', 'MAJD', 'A', 'INDIÁN', 'SOSEM', 'AKI', 'ÚR', 'VIGYÁZZ', 'ÉGNEK', 'VAGY', 'ÉLLEK', 'KIAZAKI', 'MOSD', 'VAGY', 'LÉGY', 'FEHÉRRE', 'TÚL', 'NEM', 'A', 'TANÁR', 'MIÉRT', 'RÁM', 'SPANOM', 'A', 'OPERÁBAN', 'FALUM', 'EGY', 'NYÁR', 'LEONARD', 'KÁR', 'DÜBÖRÖG', 'IMMUNISSÁ', 'NYÁR', 'FONYÓDI', 'VAN', 'KERESEM', 'TÚL']) | |
if len(argv) > 4: | |
with open(argv[4], 'rt') as f: | |
banned = set([l.strip() for l in f]) | |
else: | |
banned = set(['FONYÓDI', 'MÉLABÚ', 'DÜBÖRÖG', 'HÁZ', 'ÉLLEK', 'TÚL', 'MIÉRT', 'SZÉP', 'NEKEM', 'TUDOM', 'ÉGNEK', 'FÉNYEK', 'KERESEM', 'HELYEM', 'DOHÁNYBOLT', 'SPANOM', 'VAN', 'KICSINY', 'FALUM', 'LÉGY', 'AKI', 'VIGYÁZZ', 'RÁM']) | |
words -= banned | |
res = get_combinations(words, num_spaces + 1, total_len - num_spaces) | |
sorted_sub = [sorted(sorted(e), key=len, reverse=True) for e in res] | |
if num_spaces == 0: | |
sorted_by_len = sorted(sorted([list(e)[0] for e in res]), key=len, reverse=True) | |
elif num_spaces == 1: | |
sorted_by_alpha = sorted(sorted_sub, key=lambda e: (e[0], e[1])) | |
sorted_by_len = sorted(sorted_by_alpha, key=lambda e: (-len(e[0]), -len(e[1]))) | |
elif num_spaces == 2: | |
sorted_by_alpha = sorted(sorted_sub, key=lambda e: (e[0], e[1], e[2])) | |
sorted_by_len = sorted(sorted_by_alpha, key=lambda e: (-len(e[0]), -len(e[1]), -len(e[2]))) | |
elif num_spaces == 3: | |
sorted_by_alpha = sorted(sorted_sub, key=lambda e: (e[0], e[1], e[2], e[3])) | |
sorted_by_len = sorted(sorted_by_alpha, key=lambda e: (-len(e[0]), -len(e[1]), -len(e[2]), -len(e[3]))) | |
pprint(sorted_by_len) | |
print(len(res)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Prerequisites: | |
# --- | |
# conda install pyenchant | |
# From: | |
# https://github.com/LibreOffice/dictionaries/tree/master/hu_HU | |
# Copy 'hu_HU.aff' and 'hu_HU.dic' to: | |
# H:\Anaconda3\Lib\site-packages\enchant\data\mingw64\share\enchant\hunspell | |
# "magyar-szavak.txt" from: | |
# https://gist.github.com/Konstantinusz/f9517357e46fa827c3736031ac8d01c7 | |
# Usage: | |
# --- | |
# python findword.py ['string' [sought_length ['hu_HU']]] | |
from sys import argv, exit | |
from collections import Counter, defaultdict | |
from itertools import combinations | |
import enchant | |
def find_all_anagrams(seek_word): | |
result = [] | |
seek_word_counter = Counter(seek_word.lower()) | |
for word in open('magyar-szavak.txt', mode='r', encoding='utf-8'): | |
word = word.strip() | |
if len(word) > 1 and word not in result: | |
word_counter = Counter(word) | |
if all(n <= seek_word_counter[letter] for letter, n in word_counter.items()): | |
result.append(word) | |
return result | |
def len_histogram(strings): | |
hist = defaultdict(int) | |
for s in strings: | |
hist[len(s)] += 1 | |
return hist | |
def group_by_length(strings): | |
groups = defaultdict(list) | |
for s in strings: | |
groups[len(s)].append(s) | |
return groups | |
def pair_by_length(groups, dct, length): | |
result = [] | |
for l in range(1, length): | |
if l in groups and length - l in groups: | |
for s1 in groups[l]: | |
for s2 in groups[length - l]: | |
compound_str = s1 + s2 | |
# Unfortunately, this is useless, as enchant accepts words it should not. | |
if compound_str not in result and dct.check(compound_str): | |
result.append(compound_str) | |
compound_str = s2 + s1 | |
if compound_str not in result and dct.check(compound_str): | |
result.append(compound_str) | |
return result | |
if __name__ == '__main__': | |
chars = argv[1] if len(argv) > 1 else 'PLASMFHPASYŰARÁKTLBPCRIS' | |
length = int(argv[2]) if len(argv) > 2 else 14 | |
dct = argv[3] if len(argv) > 3 else 'hu_HU' | |
if not enchant.dict_exists(dct): | |
print(dct, 'not found, installed dictionaries:', enchant.list_languages()) | |
exit() | |
d = enchant.Dict(dct) | |
substrings = sorted(sorted(find_all_anagrams(chars)), key=len, reverse=True) | |
string_groups = group_by_length(substrings) | |
print(string_groups) | |
with open('found_words.txt', mode='wt', encoding='utf-8') as fp: | |
fp.writelines(s + '\n' for s in substrings) | |
print(len_histogram(substrings)) | |
#with open('compund_words.txt', mode='wt', encoding='utf-8') as fp: | |
# fp.writelines(s + '\n' for s in pair_by_length(string_groups, d, length)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Prerequisites: | |
# --- | |
# conda install pyenchant more-itertools | |
# From: | |
# https://github.com/LibreOffice/dictionaries/tree/master/hu_HU | |
# Copy 'hu_HU.aff' and 'hu_HU.dic' to: | |
# H:\Anaconda3\Lib\site-packages\enchant\data\mingw64\share\enchant\hunspell | |
# Usage: | |
# --- | |
# python xwordsolver.py ['input.csv' 'output.csv' 'found_words.csv' [min_word_length ['hu_HU' [nth_unused]]]] | |
# Also usefull: | |
# https://www.szogenerator.hu/ | |
import enchant, numpy as np, string | |
from more_itertools import substrings_indexes | |
from sys import argv, exit | |
input_file = argv[1] if len(argv) > 3 else 'tabla.csv' | |
output_file = argv[2] if len(argv) > 3 else 'tabla_kesz.csv' | |
found_words_file = argv[3] if len(argv) > 3 else 'talalt_szavak.csv' | |
min_length = int(argv[4]) if len(argv) > 4 else 4 | |
nth = int(argv[6]) if len(argv) > 6 else 2 | |
#print(enchant.list_languages()) | |
dct = argv[5] if len(argv) > 5 else 'hu_HU' | |
if not enchant.dict_exists(dct): | |
print(dct, 'not found, installed dictionaries:', enchant.list_languages()) | |
exit() | |
d = enchant.Dict(dct) | |
extra_words = ['OREGÁNÓ', 'LILAKÁPOSZTA', 'LILAHAGYMA', 'FEKETEBORS'] | |
for w in extra_words: | |
if not d.check(w): | |
d.add(w) | |
banned_words = ['IRAM', 'GUTIS', 'MIND', 'PIÉT', 'KUKK', 'CSÓRD', 'ÉTOLAJÉ', 'LASZA', 'BUZI', 'APÓI', 'SORS', 'ISZOD', 'NANA', 'ÁSÓK', 'ALAK', 'ASMÉHI', 'BORA', 'GUTH', 'SCPM', 'ZSIL', 'KAKUKKFŰZ', 'TROPA', 'ÁTSI', 'PÁKA', 'AVAR', 'TEKE', 'HALI', 'DÉLT', 'KAIN', 'ÓNÁG', 'KARÉ', 'COLTI', 'RENG', 'INAL', 'KAPORT', 'SKATÓ', 'OBRA', 'ZSELÉK', 'PAFF', 'BOYI', 'FOST', 'FÚST', 'GUTI', 'CSÓR', 'ASZAB', 'GISZ', 'ASMÉH', 'FIID', 'ZAYK', 'COLT', 'KATÓ', 'LASZ', 'MÉHI', 'ASZA', 'KUKA', 'SZAB', 'TÁLAS', 'TÁLA', 'DARA', 'ZSELÉ', 'FOKHAGYMAPORA', 'PITA', 'BUCI'] | |
special_words = ['SÓ', 'VÍZ', 'HÚS'] | |
data = np.genfromtxt(input_file, delimiter=';', encoding='utf-8', dtype=str) | |
data_coords = np.zeros(data.shape, dtype=object) | |
for i in range(data.shape[0]): | |
for j in range(data.shape[1]): | |
data[i, j] = data[i, j].upper() | |
data_coords[i, j] = (i, j) | |
data_out = np.copy(data) | |
found_longest_words = [] | |
data_vertical = data.T | |
data_coords_vertical = data_coords.T | |
data_reverse = np.fliplr(data) | |
data_coords_reverse = np.fliplr(data_coords) | |
data_vertical_reverse = np.fliplr(data_vertical) | |
data_coords_vertical_reverse = np.fliplr(data_coords_vertical) | |
data_diagonal_right = [data[::-1, :].diagonal(i) for i in range(-data.shape[0] + 1, data.shape[1])] | |
data_coords_diagonal_right = [data_coords[::-1, :].diagonal(i) for i in range(-data_coords.shape[0] + 1, data_coords.shape[1])] | |
data_diagonal_left = [data.diagonal(i) for i in range(data.shape[1] - 1, -data.shape[0], -1)] | |
data_coords_diagonal_left = [data_coords.diagonal(i) for i in range(data_coords.shape[1] - 1, -data_coords.shape[0], -1)] | |
data_diagonal_right_reverse = [row[::-1] for row in data_diagonal_right] | |
data_coords_diagonal_right_reverse = [row[::-1] for row in data_coords_diagonal_right] | |
data_diagonal_left_reverse = [row[::-1] for row in data_diagonal_left] | |
data_coords_diagonal_left_reverse = [row[::-1] for row in data_coords_diagonal_left] | |
views = (data, data_vertical, data_reverse, data_vertical_reverse, data_diagonal_right, data_diagonal_left, data_diagonal_right_reverse, data_diagonal_left_reverse) | |
coord_views = (data_coords, data_coords_vertical, data_coords_reverse, data_coords_vertical_reverse, data_coords_diagonal_right, data_coords_diagonal_left, data_coords_diagonal_right_reverse, data_coords_diagonal_left_reverse) | |
view_direction_arrows = [u'\u2192', u'\u2193', u'\u2190', u'\u2191', u'\u2197', u'\u2198', u'\u2199', u'\u2196'] | |
view_direction_strings = ['R', 'D', 'L', 'U', 'RU', 'RD', 'LD', 'LU'] | |
print('Found words and their starting location (in Excel format as well) and direction:') | |
def n2a(n, b=string.ascii_uppercase): | |
d, m = divmod(n, len(b)) | |
return n2a(d - 1, b) + b[m] if d else b[m] | |
def coord_to_excel(xy): | |
return n2a(xy[1]) + str(xy[0] + 1) | |
for view_index in range(len(views)): | |
view = views[view_index] | |
for i in range(len(view)): | |
if len(view[i]) < min_length: # In diagonal views skip corners. | |
continue | |
#print(view_index, i) | |
found_strs = [] | |
for substr in substrings_indexes(view[i], reverse=True): | |
if len(substr[0]) < min_length and ''.join(substr[0]) not in special_words: | |
continue | |
current_str = ''.join(substr[0]) | |
if d.check(current_str) and current_str not in banned_words: | |
if len(found_strs) > 0 and any(current_str in found_str for found_str in found_strs): | |
#print(current_str, 'in', found_strs, substr[1:]) | |
continue | |
found_strs.append(current_str) | |
coords = coord_views[view_index][i][substr[1]] | |
print( | |
current_str, | |
coords, | |
coord_to_excel(coords), | |
view_direction_strings[view_index], | |
view_direction_arrows[view_index] | |
) | |
# We remove the largest found strings from the output table. | |
if len(found_strs) == 1: | |
found_longest_words.append(current_str) | |
for j in range(substr[1], substr[2]): | |
data_out[coord_views[view_index][i][j]] = ' ' | |
np.savetxt(output_file, data_out, fmt='%s', delimiter=';', encoding='utf-8') | |
np.savetxt(found_words_file, found_longest_words, fmt='%s', delimiter=';', encoding='utf-8') | |
def first_nth_unused(arr2d, n=2): | |
result = [] | |
for row in arr2d: | |
at = 1 | |
for i in range(len(row)): | |
if at == n and row[i] != ' ': | |
result.append(row[i]) | |
break | |
if row[i] != ' ': | |
at += 1 | |
return ''.join(result) | |
def last_nth_unused(arr2d, n=2): | |
result = [] | |
for row in arr2d: | |
at = 1 | |
for i in reversed(range(len(row))): | |
if at == n and row[i] != ' ': | |
result.append(row[i]) | |
break | |
if row[i] != ' ': | |
at += 1 | |
return ''.join(result) | |
print('\nFrom each row, we take the ' + str(nth) + 'th not found character, from the left:') | |
print(first_nth_unused(data_out, nth)) | |
print('\nFrom each row, we take the ' + str(nth) + 'th not found character, from the right:') | |
print(last_nth_unused(data_out, nth)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment