RAG / Question Answer Example
import dbnl
import os
import pandas as pd
from openai import OpenAI
from dbnl.eval.llm import OpenAILLMClient
from dbnl.eval.embedding_clients import OpenAIEmbeddingClient
from dbnl.eval import evaluate
# 1. create client to power LLM-as-judge and embedding metrics [optional]
base_oai_client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
eval_llm_client = OpenAILLMClient.from_existing_client(base_oai_client, llm_model="gpt-3.5-turbo-0125")
eval_embd_client = OpenAIEmbeddingClient.from_existing_client(base_oai_client, embedding_model="text-embedding-ada-002")
eval_df = pd.DataFrame(
[
{
"question_text": "Is the protein Cathepsin secreted?",
"top_k_retrieved_doc_texts": ["Some irrelevant document that the rag system retrieved"],
"top_k_retrieved_doc_ids": ["4123"],
"top_retrieved_doc_text": "Some irrelevant document that the rag system retrieved",
"gt_reference_doc_id": "1099",
"gt_reference_doc_text": "The protein Cathepsin is known to be secreted",
"ground_truth_answer": "Yes, Cathepsin is a secreted protein",
"generated_answer":"I have no relevant knowledge",},
{
"question_text": "Is the protein Cathepsin secreted?",
"top_k_retrieved_doc_texts": ["Some irrelevant document that the rag system retrieved",
"Many proteins are secreted such as hormones, enzymes, toxins",
"The protein Cathepsin is known to be secreted"],
"top_k_retrieved_doc_ids": ["4123","21","1099"],
"top_retrieved_doc_text": "Some irrelevant document that the rag system retrieved",
"gt_reference_doc_id": "1099",
"gt_reference_doc_text": "The protein Capilin is known to be secreted",
"ground_truth_answer": "Yes, Cathepsin is a secreted protein",
"generated_answer":"Many proteins are known to be secreted",},
{
"question_text": "Is the protein Cathepsin secreted?",
"top_k_retrieved_doc_texts": ["The protein Cathepsin is known to be secreted",
"Some irrelevant document that the rag system retrieved"],
"top_k_retrieved_doc_ids": ["1099","4123"],
"top_retrieved_doc_text": "The protein Cathepsin is known to be secreted",
"gt_reference_doc_id": "1099",
"gt_reference_doc_text": "The protein Cathepsin is known to be secreted",
"ground_truth_answer": "Yes, Cathepsin is a secreted protein",
"generated_answer":"Yes, cathepsin is a secreted protein",},
] * 4
)
# 2. get text metrics appropriate for RAG / QA systems
qa_text_metrics = dbnl.eval.metrics.question_and_answer_metrics(
prediction="generated_answer", input="question_text", target="ground_truth_answer",
context="top_k_retrieved_doc_texts", top_retrieved_document_text="top_retrieved_doc_text",
retrieved_document_ids="top_k_retrieved_doc_ids", ground_truth_document_id="gt_reference_doc_id",
eval_llm_client=eval_llm_client, eval_embedding_client=eval_embd_client
)
# 3. run qa text metrics
aug_eval_df = evaluate(eval_df, qa_text_metrics)
# 4. publish to DBNL
dbnl.login()
project = dbnl.get_or_create_project(name="RAG_demo")
cols = dbnl.experimental.get_column_schemas_from_dataframe(aug_eval_df)
run_config = dbnl.create_run_config(project=project, columns=cols)
run = dbnl.create_run(project=project, run_config=run_config)
dbnl.report_results(run=run, data=aug_eval_df)
dbnl.close_run(run=run)idx
mrr__gt_reference_doc_id__top_k_retrieved_doc_ids
answer_similarity_v0__generated_answer_question_text_ground_truth_answer
Was this helpful?

