Skip to content

Instantly share code, notes, and snippets.

@devig
Last active May 16, 2025 12:41
Show Gist options
  • Save devig/662f961b3b3071c503c5389879c4fe1d to your computer and use it in GitHub Desktop.
Save devig/662f961b3b3071c503c5389879c4fe1d to your computer and use it in GitHub Desktop.
Генерация всевозможных словоформ из списка слов (минус-слов)
import pymorphy2
import os
import sys
def is_russian(word):
return all('а' <= ch <= 'я' or ch == 'ё' for ch in word)
def get_word_forms(word, morph):
parses = morph.parse(word)
word_forms = set()
for p in parses:
if 'NOUN' in p.tag or 'ADJF' in p.tag or 'VERB' in p.tag or 'INFN' in p.tag:
forms = {f.word for f in p.lexeme}
word_forms.update(forms)
return word_forms
def process_file(input_path):
morph = pymorphy2.MorphAnalyzer()
base_name = os.path.basename(input_path)
output_path = os.path.join(os.path.dirname(input_path), f'allforms_{base_name}')
input_words = set()
output_words = set()
no_forms_russian = []
with open(input_path, 'r', encoding='utf-8') as infile:
for line in infile:
word = line.strip().lstrip('-').lower()
if word:
input_words.add(word)
for word in input_words:
if is_russian(word):
forms = get_word_forms(word, morph)
if forms:
output_words.update(forms)
else:
no_forms_russian.append(word)
output_words.add(word)
else:
output_words.add(word)
with open(output_path, 'w', encoding='utf-8') as outfile:
for word in sorted(output_words):
outfile.write(word + '\n')
print(f"Было слов: {len(input_words)}")
print(f"Стало уникальных словоформ: {len(output_words)}")
if no_forms_russian:
print(f"Для {len(no_forms_russian)} русских слов не найдены словоформы:")
for word in no_forms_russian:
print(f" - {word}")
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Использование: python allforms.py input.txt")
else:
input_path = sys.argv[1]
process_file(input_path)
@devig
Copy link
Author

devig commented May 16, 2025

python3 allforms.py minus-kondei.txt
python allforms.py minus-kondei.txt

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment