Created
October 26, 2017 03:14
-
-
Save dela3499/17296f2142253d4e89bd27c14c62bfc7 to your computer and use it in GitHub Desktop.
Generate sentences from a context-free grammar.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Natural Language Toolkit: Generating from a CFG | |
# | |
# Copyright (C) 2001-2017 NLTK Project | |
# Author: Steven Bird <[email protected]> | |
# Peter Ljunglöf <[email protected]> | |
# URL: <http://nltk.org/> | |
# For license information, see LICENSE.TXT | |
# | |
from __future__ import print_function | |
import itertools | |
import sys | |
from nltk.grammar import Nonterminal | |
def generate(grammar, start=None, depth=None, n=None): | |
""" | |
Generates an iterator of all sentences from a CFG. | |
:param grammar: The Grammar used to generate sentences. | |
:param start: The Nonterminal from which to start generate sentences. | |
:param depth: The maximal depth of the generated tree. | |
:param n: The maximum number of sentences to return. | |
:return: An iterator of lists of terminal tokens. | |
""" | |
if not start: | |
start = grammar.start() | |
if depth is None: | |
depth = sys.maxsize | |
iter = _generate_all(grammar, [start], depth) | |
if n: | |
iter = itertools.islice(iter, n) | |
return iter | |
def _generate_all(grammar, items, depth): | |
if items: | |
try: | |
for frag1 in _generate_one(grammar, items[0], depth): | |
for frag2 in _generate_all(grammar, items[1:], depth): | |
yield frag1 + frag2 | |
except RuntimeError as _error: | |
if _error.message == "maximum recursion depth exceeded": | |
# Helpful error message while still showing the recursion stack. | |
raise RuntimeError("The grammar has rule(s) that yield infinite recursion!!") | |
else: | |
raise | |
else: | |
yield [] | |
def _generate_one(grammar, item, depth): | |
if depth > 0: | |
if isinstance(item, Nonterminal): | |
for prod in grammar.productions(lhs=item): | |
for frag in _generate_all(grammar, prod.rhs(), depth-1): | |
yield frag | |
else: | |
yield [item] | |
demo_grammar = open('./grammar.txt').read() | |
def demo(N=2003): | |
from nltk.grammar import CFG | |
# print('Generating the first %d sentences for demo grammar:' % (N,)) | |
# print(demo_grammar) | |
grammar = CFG.fromstring(demo_grammar) | |
results = [] | |
for n, sent in enumerate(generate(grammar, n=N), 1): | |
results.append('%3d. %s' % (n, ' '.join(sent))) | |
# print('%3d. %s' % (n, ' '.join(sent))) | |
return "\n".join(results) | |
open('result.txt','w').write(demo()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment