Skip to content

Instantly share code, notes, and snippets.

@brandtg
Created October 23, 2017 07:02
Show Gist options
  • Save brandtg/0a9ef936780a2cdc9508f914b609d417 to your computer and use it in GitHub Desktop.
Save brandtg/0a9ef936780a2cdc9508f914b609d417 to your computer and use it in GitHub Desktop.
A script to generate alliterative nicknames
#!/usr/bin/env python
from nltk.corpus import brown
import argparse
def is_valid(name, word, tag=None, allowed_parts_of_speech=None):
"""
Tests if allowed part of speech and is alliterative
"""
# Must be allowed part of speech
if tag and tag not in allowed_parts_of_speech:
return False
# Must be alliterative
if word.lower()[0] != name.lower()[0]:
return False
return True
def get_brown_candidates(name, allowed_parts_of_speech, categories=None):
"""
Uses some tagged data set (part of speech and text category) to generate
candidates.
"""
# Use some tagged data set for (word, part_of_speech) pairs
# See: http://www.nltk.org/book/ch05.html
tagged_words = brown.tagged_words(categories=categories, tagset='universal')
# Generate candidates / deduplicate
return list(set([ word.lower() for (word, tag) in tagged_words \
if is_valid(name, word, tag=tag,
allowed_parts_of_speech=allowed_parts_of_speech) ]))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('name', help='The root nickname (suffix)')
parser.add_argument('--skip_nouns', action='store_true', help='Skip nouns for prefixes')
parser.add_argument('--skip_adjectives', action='store_true', help='Skip adjectives for prefixes')
parser.add_argument('--categories', default=None, help=str(brown.categories()))
args = parser.parse_args()
# Determine allowed parts of speech
allowed_parts_of_speech = set(['NOUN', 'ADJ'])
if args.skip_nouns and args.skip_adjectives:
raise Exception('Cannot specify both --skip_nouns and --skip_adjectives')
elif args.skip_nouns:
allowed_parts_of_speech.remove('NOUN')
elif args.skip_adjectives:
allowed_parts_of_speech.remove('ADJ')
# Generate candidates
categories = args.categories.split(',') if args.categories else None
candidates = get_brown_candidates(args.name, allowed_parts_of_speech, categories=categories)
# Show results
for candidate in candidates:
print '{} {}'.format(candidate, args.name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment