Skip to content

Instantly share code, notes, and snippets.

@MrValdez
Created April 1, 2017 08:21
Show Gist options
  • Save MrValdez/32fdbadf015a00f8124d338f74ca6d63 to your computer and use it in GitHub Desktop.
Save MrValdez/32fdbadf015a00f8124d338f74ca6d63 to your computer and use it in GitHub Desktop.
Basic example of natural language
# http://nltk.org/book
# https://spacy.io/docs/usage/showcase
import nltk
import random
def get_dataset():
boys = ["Ryan", "Azooz", "Jonard", "Chippo", "Pierre",
"John", "Michael", "Jordan", "Bobby", "Stephen"]
girls = ["Cypress", "Ammi", "Kathleen", "Ruby", "Jelly",
"Jazzel", "Sarah", "Nicole", "Jenna", "Concepcion"]
dataset = []
for boy in boys:
# name, label
dataset.append((boy, "Boy"))
for girl in girls:
dataset.append((girl, "Girl"))
random.shuffle(dataset)
return dataset
def extract_gender_feature(data):
data = data.lower()
feature = {}
feature["last letter"] = data[-1]
feature["size"] = len(data)
feature["vowel count"] = ( data.count("a") +
data.count("e") +
data.count("i") +
data.count("o") +
data.count("u"))
for letter in "abcdefghijklmnopqrstuvwxyz":
if data.count(letter) == 0:
continue
key = "{} count".format(letter)
feature[key] = data.count(letter)
return feature
dataset = get_dataset()
features = []
for name, label in dataset:
feature = (extract_gender_feature(name), label)
features.append(feature)
classifier = nltk.NaiveBayesClassifier.train(features)
def classify(name):
print(classifier.classify(extract_gender_feature(name)))
# python -i natural_language_example.py
# >>> classify("Ben")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment