Created
April 2, 2024 08:49
-
-
Save tedsecretsource/c236b06ca7721b9ed5a2681f25ecfbe9 to your computer and use it in GitHub Desktop.
Set up and run langsmith tests
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import logging | |
from langchain import hub | |
from langsmith import Client | |
from dotenv import load_dotenv, find_dotenv | |
load_dotenv(find_dotenv()) # read local .env file | |
from langchain.chat_models import ChatOpenAI | |
from langchain.prompts import ChatPromptTemplate | |
from langchain.schema.runnable import RunnableMap, RunnablePassthrough | |
logging.basicConfig(level=logging.DEBUG) | |
# Since chains and agents can be stateful (they can have memory), | |
# create a constructor to pass in to the run_on_dataset method. | |
def create_runnable(): | |
llm = ChatOpenAI(model_name=os.environ["SARASWATI_LLM"], temperature=0) | |
prompt = get_system_prompt() | |
return RunnableMap({"context": RunnablePassthrough(), "question": RunnablePassthrough()}) | prompt | llm | |
def get_system_prompt(): | |
system_prompt_repo = os.getenv("SARASWATI_SYSTEM_PROMPT") | |
return hub.pull(system_prompt_repo) if system_prompt_repo is not None else ( | |
"Use the following pieces of context to answer the users question. \n" | |
"If you don't know the answer, just say that you don't know, don't try to make up an answer.\n" | |
"----------------" | |
) | |
def _doc_exists(doc_source, documents): | |
"""Check if a document exists in a list of documents based on its source.""" | |
return any(doc.metadata['source'] == doc_source for doc in documents) | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores.pgvector import PGVector | |
def get_question_context(question: str): | |
doc_list = [] | |
context = "" | |
db = PGVector( | |
embedding_function=OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"]), | |
collection_name=os.environ["SARASWATI_COLLECTION_ID"], | |
connection_string=os.environ["DATABASE_URL"] | |
) | |
search_results = db.similarity_search(question, k=int(os.environ['SARASWATI_SIMILARITY_SEARCH_K'])) | |
for i, doc in enumerate(search_results, start=1): | |
logging.debug("Source: %s", doc.metadata['source']) | |
if not _doc_exists(doc.metadata['source'], doc_list): | |
context += doc.page_content | |
logging.debug("Context: %s", context) | |
return context | |
from langchain.smith import RunEvalConfig, run_on_dataset | |
example_inputs = [ | |
"What are the main features of the Blue Card Renewal project?", | |
"Write a sufficient number of user stories to fully cover the feature identified as “Supporting online evidence provision” for the Blue Card Renewals project.", | |
"Write sufficient acceptance criteria to cover the following user story for the Blue Card Renewal project: I, as a learner, while on the attach evidence screen, can see the names of the files as they appear on my device after they have been uploaded so I can tell which files I've uploaded and which remain to be uploaded.", | |
"Given the following acceptance criteria, write one or more tests in code to fulfill the criteria: Given that I have uploaded multiple files, when I view the attach evidence screen, then I should see the names of all the uploaded files displayed in a list above the upload button.", | |
"Write a development plan for the feature identified as “Supporting online evidence provision” for the Blue Card Renewals project." | |
] | |
client = Client() | |
dataset_name = "Augmented Developer Dataset" | |
# Get a generator of all datasets with the given name | |
datasets = client.list_datasets(dataset_name=dataset_name) | |
logging.debug("type: %s", type(datasets)) | |
logging.debug("Datasets: %s", datasets) | |
while True: | |
try: | |
dataset = next(datasets) | |
logging.debug("Dataset: %s", dataset) | |
except StopIteration: | |
logging.debug("No more datasets") | |
break | |
if dataset is None: | |
dataset = client.create_dataset( | |
dataset_name=dataset_name, description="Augmented Developer prompts", | |
) | |
# If no datasets exist, create one | |
for input_prompt in example_inputs: | |
# Each example must be unique and have inputs defined. | |
# Outputs are optional | |
client.create_example( | |
inputs={"question": input_prompt, "context": get_question_context(input_prompt)}, | |
outputs=None, | |
dataset_id=dataset.id, | |
) | |
eval_config = RunEvalConfig( | |
evaluators=[ | |
# You can specify an evaluator by name/enum. | |
# In this case, the default criterion is "helpfulness" | |
"criteria", | |
# Or you can configure the evaluator | |
RunEvalConfig.Criteria("harmfulness"), | |
# RunEvalConfig.Criteria( | |
# {"cliche": "Are the lyrics cliche?" | |
# " Respond Y if they are, N if they're entirely unique."} | |
# ) | |
], | |
input_key="question" | |
) | |
run_on_dataset( | |
client=client, | |
dataset_name=dataset_name, | |
llm_or_chain_factory=create_runnable, | |
evaluation=eval_config, | |
verbose=True, | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment