Skip to content

Instantly share code, notes, and snippets.

@amn41
Created July 31, 2017 14:20
Show Gist options
  • Save amn41/0a3556e79d177ad5e0ce5fd84afe9f59 to your computer and use it in GitHub Desktop.
Save amn41/0a3556e79d177ad5e0ce5fd84afe9f59 to your computer and use it in GitHub Desktop.
read rasa nlu logs, optionally reprocess, and dump to file
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import
from builtins import str as text
import argparse
import io
import json
from rasa_nlu.converters import load_data
from rasa_nlu.model import Metadata, Interpreter
def create_argparser():
parser = argparse.ArgumentParser(
description='Process logs from Rasa NLU server. If a model dir is specified, ' +
'load that model and re-do the predictions. Sort by intent confidence, ' +
'and output the data in the rasa json format for training data'
)
parser.add_argument('-m', '--model_dir', default=None,
help='dir containing model (optional)')
parser.add_argument('-l', '--log_file',
help='file or dir containing training data')
parser.add_argument('-o', '--out_file',
help='file where to save the logs in rasa format')
return parser
def process_logs(model_dir, log_file, out_file):
logged_predictions = [
json.loads(l) for l in io.open(log_file).readlines()
]
if model_dir is not None:
# load model & its training data
metadata = Metadata.load(model_directory)
interpreter = Interpreter.load(metadata, RasaNLUConfig())
training_data = load_data(interpreter.config["training_data"]).training_examples
logged_texts = set([t["text"] for t in logged_predictions])
# dedupe & create test set
train_texts = set([t['text'] for t in training_data])
test_texts = logged_texts.difference(train_texts)
# predict on test set
predictions = [interpreter.parse(t) for t in test_texts]
else:
predictions = logged_predictions
predictions = [p for p in predictions if p.get("user_input").get("intent_ranking") is not None]
predictions.sort(key=lambda p:p["user_input"]["intent"]["confidence"])
preds = [
{
"intent": p["user_input"]["intent"]["name"],
"entities": p["user_input"]["entities"],
"text": p["user_input"]["text"]
}
for p in predictions
]
data = {"rasa_nlu_data": {"common_examples": preds } }
# persist
with io.open(out_file, "w") as f:
f.write(text(json.dumps(data, indent=2)))
if __name__ == "__main__":
parser = create_argparser()
args = parser.parse_args()
process_logs(args.model_dir, args.log_file, args.out_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment