Last active
April 1, 2024 13:05
-
-
Save chenhunghan/eed59a465205a3c29c7a170cce4be6e1 to your computer and use it in GitHub Desktop.
DSPy prompt evaluation with metric using llama.cpp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A gist for using the `llama.cpp` model with the `dspy` library. | |
# | |
# DSPy features used in this gist | |
# - `dspy.Predict` | |
# - `dspy.Signature` | |
# - `dspy.context` | |
# The script first prompts the model to answer a example question and assess the correctness and engagingness of the answer. | |
# | |
# Install `llama.cpp` from brew with built-in OpenAI-compatible server. | |
# brew install ggerganov/ggerganov/llama.cpp | |
# llama-server --hf-repo TheBloke/Mistral-7B-Instruct-v0.2-GGUF --model mistral-7b-instruct-v0.2.Q4_K_M.gguf --hf-file mistral-7b-instruct-v0.2.Q4_K_M.gguf | |
import dspy | |
# Optional for displaying the results on stdout as tables | |
from rich import print | |
from rich.table import Table | |
# The example question-answer pair, we already know the answer is `yes` and want to access the correctness and engagingness | |
# of the answer from the model | |
example = dspy.Example( | |
question="Are both Nehi and Nectar d.o.o. part of the beverage industry?", | |
answer="yes", | |
) | |
# The `llama.cpp` model | |
llama_cpp_model = dspy.OpenAI( | |
# assume llama-server is running on localhost:8080 | |
api_base="http://localhost:8080/v1/", | |
# placeholder, or it will raise an error | |
api_key="none", | |
# for some reasons, an error will be raised if set to `text` (llama-server issue?) | |
model_type="chat", | |
# stop word for mistral-7b-instruct-v0.2 | |
stop="\n\n", | |
# max number of tokens to generate | |
max_tokens=250, | |
) | |
dspy.settings.configure(lm=llama_cpp_model) | |
# A dspy signature for automatic assessments of a question-answer pair | |
class Assess(dspy.Signature): | |
"""Assess the quality of a answer of a question.""" | |
assessed_text = dspy.InputField() | |
assessment_question = dspy.InputField() | |
assessment_answer = dspy.OutputField(desc="Yes or No") | |
# the predict module built from the assessment signature | |
# use in the correct_engaging_metric function | |
assess_pred = dspy.Predict(Assess) | |
# a metric returning a score between 0 and 1 for the correctness of the answer and the engagingness of the answer | |
def correct_engaging_metric(gold, pred, trace=None): | |
question, answer, gen_answer = gold.question, gold.answer, pred.answer | |
engaging = "Is the assessed text self-contained, information?" | |
correct = f"The text should answer `{question}` with `{answer}`. Does the assessed text contain this answer?" | |
with dspy.context(lm=llama_cpp_model): | |
correct = assess_pred(assessed_text=gen_answer, assessment_question=correct) | |
engaging = assess_pred(assessed_text=gen_answer, assessment_question=engaging) | |
correct, engaging = [ | |
"yes" in m.assessment_answer.lower() for m in [correct, engaging] | |
] | |
score = correct + engaging | |
if trace is not None: | |
return score >= 2 # noqa: E701 | |
return score / 2.0 | |
# A predict module accept a signature (can be string or a `dspy.Signature` class) | |
# the following are example signature strings | |
# question -> answer | |
# sentence -> sentiment | |
# document -> sunmary | |
# text -> gist | |
# long_context -> tldr | |
# content, question -> answer | |
# question, choices -> reasoning, selection | |
# | |
# example: | |
# predict_module = dspy.Predict('document -> sunmary') | |
# a predict module for answering questions | |
qa_predict_module = dspy.Predict("question -> answer") | |
# prompt the llm to answer the question | |
output = qa_predict_module(question=example.question) | |
score = correct_engaging_metric(example, output) | |
table = Table(title="Metrics") | |
table.add_column("Question") | |
table.add_column("Expected Answer") | |
table.add_column("Generated Answer") | |
table.add_column("Score (0..1)", style="green") | |
table.add_row(example.question, example.answer, output.answer, str(score)) | |
print(table) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment