Created
October 23, 2017 07:02
-
-
Save brandtg/0a9ef936780a2cdc9508f914b609d417 to your computer and use it in GitHub Desktop.
A script to generate alliterative nicknames
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from nltk.corpus import brown | |
import argparse | |
def is_valid(name, word, tag=None, allowed_parts_of_speech=None): | |
""" | |
Tests if allowed part of speech and is alliterative | |
""" | |
# Must be allowed part of speech | |
if tag and tag not in allowed_parts_of_speech: | |
return False | |
# Must be alliterative | |
if word.lower()[0] != name.lower()[0]: | |
return False | |
return True | |
def get_brown_candidates(name, allowed_parts_of_speech, categories=None): | |
""" | |
Uses some tagged data set (part of speech and text category) to generate | |
candidates. | |
""" | |
# Use some tagged data set for (word, part_of_speech) pairs | |
# See: http://www.nltk.org/book/ch05.html | |
tagged_words = brown.tagged_words(categories=categories, tagset='universal') | |
# Generate candidates / deduplicate | |
return list(set([ word.lower() for (word, tag) in tagged_words \ | |
if is_valid(name, word, tag=tag, | |
allowed_parts_of_speech=allowed_parts_of_speech) ])) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('name', help='The root nickname (suffix)') | |
parser.add_argument('--skip_nouns', action='store_true', help='Skip nouns for prefixes') | |
parser.add_argument('--skip_adjectives', action='store_true', help='Skip adjectives for prefixes') | |
parser.add_argument('--categories', default=None, help=str(brown.categories())) | |
args = parser.parse_args() | |
# Determine allowed parts of speech | |
allowed_parts_of_speech = set(['NOUN', 'ADJ']) | |
if args.skip_nouns and args.skip_adjectives: | |
raise Exception('Cannot specify both --skip_nouns and --skip_adjectives') | |
elif args.skip_nouns: | |
allowed_parts_of_speech.remove('NOUN') | |
elif args.skip_adjectives: | |
allowed_parts_of_speech.remove('ADJ') | |
# Generate candidates | |
categories = args.categories.split(',') if args.categories else None | |
candidates = get_brown_candidates(args.name, allowed_parts_of_speech, categories=categories) | |
# Show results | |
for candidate in candidates: | |
print '{} {}'.format(candidate, args.name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment