chenhunghan · April 1, 2024 13:05
diff --git a/llama_cpp_dspy_metric.py b/llama_cpp_dspy_metric.py
 # A gist for using the `llama.cpp` model with the `dspy` library.
 #
 # DSPy features used in this gist
 # - `dspy.Predict`
 # - `dspy.Signature`
 # - `dspy.context`
 # The script first prompts the model to answer a example question and assess the correctness and engagingness of the answer.
 #
 # Install `llama.cpp` from brew with built-in OpenAI-compatible server.
 # brew install ggerganov/ggerganov/llama.cpp
 # llama-server --hf-repo TheBloke/Mistral-7B-Instruct-v0.2-GGUF --model mistral-7b-instruct-v0.2.Q4_K_M.gguf --hf-file mistral-7b-instruct-v0.2.Q4_K_M.gguf

 import dspy

 # Optional for displaying the results on stdout as tables
 from rich import print
 from rich.table import Table

 # The example question-answer pair, we already know the answer is `yes` and want to access the correctness and engagingness
 # of the answer from the model
 example = dspy.Example(
    question="Are both Nehi and Nectar d.o.o. part of the beverage industry?",
    answer="yes",
 )

 # The `llama.cpp` model
 llama_cpp_model = dspy.OpenAI(
    # assume llama-server is running on localhost:8080
    api_base="http://localhost:8080/v1/",
    # placeholder, or it will raise an error
    api_key="none",
    # for some reasons, an error will be raised if set to `text` (llama-server issue?)
    model_type="chat",
    # stop word for mistral-7b-instruct-v0.2
    stop="\n\n",
    # max number of tokens to generate
    max_tokens=250,
 )

 dspy.settings.configure(lm=llama_cpp_model)


 # A dspy signature for automatic assessments of a question-answer pair
 class Assess(dspy.Signature):
    """Assess the quality of a answer of a question."""

    assessed_text = dspy.InputField()
    assessment_question = dspy.InputField()
    assessment_answer = dspy.OutputField(desc="Yes or No")


 # the predict module built from the assessment signature
 # use in the correct_engaging_metric function
 assess_pred = dspy.Predict(Assess)


 # a metric returning a score between 0 and 1 for the correctness of the answer and the engagingness of the answer
 def correct_engaging_metric(gold, pred, trace=None):
    question, answer, gen_answer = gold.question, gold.answer, pred.answer

    engaging = "Is the assessed text self-contained, information?"
    correct = f"The text should answer `{question}` with `{answer}`. Does the assessed text contain this answer?"
    with dspy.context(lm=llama_cpp_model):
        correct = assess_pred(assessed_text=gen_answer, assessment_question=correct)
        engaging = assess_pred(assessed_text=gen_answer, assessment_question=engaging)

    correct, engaging = [
        "yes" in m.assessment_answer.lower() for m in [correct, engaging]
    ]
    score = correct + engaging
    if trace is not None:
        return score >= 2  # noqa: E701
    return score / 2.0


 # A predict module accept a signature (can be string or a `dspy.Signature` class)
 # the following are example signature strings
 # question -> answer
 # sentence -> sentiment
 # document -> sunmary
 # text -> gist
 # long_context -> tldr
 # content, question -> answer
 # question, choices -> reasoning, selection
 #
 # example:
 # predict_module = dspy.Predict('document -> sunmary')

 # a predict module for answering questions
 qa_predict_module = dspy.Predict("question -> answer")

 # prompt the llm to answer the question
 output = qa_predict_module(question=example.question)

 score = correct_engaging_metric(example, output)

 table = Table(title="Metrics")
 table.add_column("Question")
 table.add_column("Expected Answer")
 table.add_column("Generated Answer")
 table.add_column("Score (0..1)", style="green")

 table.add_row(example.question, example.answer, output.answer, str(score))

 print(table)
	# A gist for using the `llama.cpp` model with the `dspy` library.
	#
	# DSPy features used in this gist
	# - `dspy.Predict`
	# - `dspy.Signature`
	# - `dspy.context`
	# The script first prompts the model to answer a example question and assess the correctness and engagingness of the answer.
	#
	# Install `llama.cpp` from brew with built-in OpenAI-compatible server.
	# brew install ggerganov/ggerganov/llama.cpp
	# llama-server --hf-repo TheBloke/Mistral-7B-Instruct-v0.2-GGUF --model mistral-7b-instruct-v0.2.Q4_K_M.gguf --hf-file mistral-7b-instruct-v0.2.Q4_K_M.gguf

	import dspy

	# Optional for displaying the results on stdout as tables
	from rich import print
	from rich.table import Table

	# The example question-answer pair, we already know the answer is `yes` and want to access the correctness and engagingness
	# of the answer from the model
	example = dspy.Example(
	question="Are both Nehi and Nectar d.o.o. part of the beverage industry?",
	answer="yes",
	)

	# The `llama.cpp` model
	llama_cpp_model = dspy.OpenAI(
	# assume llama-server is running on localhost:8080
	api_base="http://localhost:8080/v1/",
	# placeholder, or it will raise an error
	api_key="none",
	# for some reasons, an error will be raised if set to `text` (llama-server issue?)
	model_type="chat",
	# stop word for mistral-7b-instruct-v0.2
	stop="\n\n",
	# max number of tokens to generate
	max_tokens=250,
	)

	dspy.settings.configure(lm=llama_cpp_model)


	# A dspy signature for automatic assessments of a question-answer pair
	class Assess(dspy.Signature):
	"""Assess the quality of a answer of a question."""

	assessed_text = dspy.InputField()
	assessment_question = dspy.InputField()
	assessment_answer = dspy.OutputField(desc="Yes or No")


	# the predict module built from the assessment signature
	# use in the correct_engaging_metric function
	assess_pred = dspy.Predict(Assess)


	# a metric returning a score between 0 and 1 for the correctness of the answer and the engagingness of the answer
	def correct_engaging_metric(gold, pred, trace=None):
	question, answer, gen_answer = gold.question, gold.answer, pred.answer

	engaging = "Is the assessed text self-contained, information?"
	correct = f"The text should answer `{question}` with `{answer}`. Does the assessed text contain this answer?"
	with dspy.context(lm=llama_cpp_model):
	correct = assess_pred(assessed_text=gen_answer, assessment_question=correct)
	engaging = assess_pred(assessed_text=gen_answer, assessment_question=engaging)

	correct, engaging = [
	"yes" in m.assessment_answer.lower() for m in [correct, engaging]
	]
	score = correct + engaging
	if trace is not None:
	return score >= 2 # noqa: E701
	return score / 2.0


	# A predict module accept a signature (can be string or a `dspy.Signature` class)
	# the following are example signature strings
	# question -> answer
	# sentence -> sentiment
	# document -> sunmary
	# text -> gist
	# long_context -> tldr
	# content, question -> answer
	# question, choices -> reasoning, selection
	#
	# example:
	# predict_module = dspy.Predict('document -> sunmary')

	# a predict module for answering questions
	qa_predict_module = dspy.Predict("question -> answer")

	# prompt the llm to answer the question
	output = qa_predict_module(question=example.question)

	score = correct_engaging_metric(example, output)

	table = Table(title="Metrics")
	table.add_column("Question")
	table.add_column("Expected Answer")
	table.add_column("Generated Answer")
	table.add_column("Score (0..1)", style="green")

	table.add_row(example.question, example.answer, output.answer, str(score))

	print(table)