Created
November 24, 2016 16:43
-
-
Save JIghtuse/a059d63bf31b56c0e71ac03e69584f48 to your computer and use it in GitHub Desktop.
Spellchecker using enchant
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Spellchecks src/*.md files with enchant""" | |
import glob | |
import sys | |
import enchant | |
from enchant.tokenize import get_tokenizer, EmailFilter, URLFilter | |
DICT_TAG = "en_US" | |
EXTRA_DICT_FILENAME = "dictionary.txt" | |
WORD_LENGTH_THRESHOLD = 3 | |
def check_file(filename, tokenizer, dic): | |
found_misspellings = False | |
with open(filename) as input_file: | |
for lineno, line in enumerate(input_file.readlines()): | |
line.replace("’", "'") | |
for word in tokenizer(line): | |
word = word[0] | |
if len(word) > WORD_LENGTH_THRESHOLD and not dic.check(word): | |
print("{}:{} {}".format(filename, lineno + 1, word)) | |
found_misspellings = True | |
return found_misspellings | |
def check_files(files): | |
"""Spellchecks @files with enchant""" | |
tokenizer = get_tokenizer(DICT_TAG, filters=[EmailFilter, URLFilter]) | |
dic = enchant.DictWithPWL(DICT_TAG, EXTRA_DICT_FILENAME) | |
found_misspellings = False | |
for filename in files: | |
found_misspellings = check_file(filename, | |
tokenizer, | |
dic) or found_misspellings | |
return found_misspellings | |
sys.exit(check_files(glob.glob('src/*.md'))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment