Skip to content

Commit

Permalink
update demo
Browse files Browse the repository at this point in the history
  • Loading branch information
jasinliu committed Nov 5, 2024
1 parent c6c72a8 commit 6e82497
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 49 deletions.
2 changes: 1 addition & 1 deletion hugegraph-llm/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ python-dotenv>=1.0.1
pyarrow~=17.0.0 # TODO: a temporary dependency for pandas, figure out why ImportError
pandas~=2.2.2
openpyxl~=3.1.5
ragas~=0.1.20
git+https://github.com/jasinliu/ragas.git@patch-2 # TODO: wait for release
90 changes: 56 additions & 34 deletions hugegraph-llm/src/hugegraph_llm/demo/rag_demo/rag_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,33 @@

import json
import os
from typing import Tuple, List, Literal, Optional
from typing import List, Literal, Optional, Tuple

from datasets import Dataset
import gradio as gr
from gradio.utils import NamedString
import pandas as pd
from datasets import Dataset
from gradio.utils import NamedString
from langchain_openai.chat_models import ChatOpenAI
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper

from hugegraph_llm.config import resource_path, prompt
from hugegraph_llm.config import prompt, resource_path, settings
from hugegraph_llm.operators.graph_rag_task import RAGPipeline
from hugegraph_llm.utils.log import log
from hugegraph_llm.utils.ragas_utils import RAGAS_METRICS_DICT
from hugegraph_llm.utils.ragas_utils import RAGAS_METRICS_DICT, RAGAS_METRICS_ZH_DICT


def rag_answer(
text: str,
raw_answer: bool,
vector_only_answer: bool,
graph_only_answer: bool,
graph_vector_answer: bool,
graph_ratio: float,
rerank_method: Literal["bleu", "reranker"],
near_neighbor_first: bool,
custom_related_information: str,
answer_prompt: str,
text: str,
raw_answer: bool,
vector_only_answer: bool,
graph_only_answer: bool,
graph_vector_answer: bool,
graph_ratio: float,
rerank_method: Literal["bleu", "reranker"],
near_neighbor_first: bool,
custom_related_information: str,
answer_prompt: str,
) -> Tuple:
"""
Generate an answer using the RAG (Retrieval-Augmented Generation) pipeline.
Expand Down Expand Up @@ -177,8 +179,7 @@ def toggle_slider(enable):
> 1. Download the template file & fill in the questions you want to test.
> 2. Upload the file & click the button to generate answers. (Preview shows the first 40 lines)
> 3. The answer options are the same as the above RAG/Q&A frame
"""
)
""")

# TODO: Replace string with python constant
tests_df_headers = [
Expand Down Expand Up @@ -309,29 +310,45 @@ def several_rag_answer(
questions_file.change(read_file_to_excel, questions_file, [qa_dataframe, answer_max_line_count])
answer_max_line_count.change(change_showing_excel, answer_max_line_count, qa_dataframe)

def evaluate_rag(metrics: List[str], num: int):
def evaluate_rag(metrics: List[str], num: int, language: Literal["english", "chinese"]):
answers_df = pd.read_excel(answers_path)
answers_df = answers_df.head(num)
if not any(answers_df.columns.isin(rag_answer_header_dict)):
raise gr.Error("No RAG answers found in the answer file.")
rag_answers = [answer for answer in rag_answer_header_dict if answer in answers_df.columns]
df = pd.DataFrame()
if language == "chinese":
eval_metrics = [RAGAS_METRICS_ZH_DICT[metric] for metric in metrics]
else:
eval_metrics = [RAGAS_METRICS_DICT[metric] for metric in metrics]
rag_method_names = [answer for answer in rag_answer_header_dict if answer in answers_df.columns]
score_df = pd.DataFrame()

for answer in rag_answers:
for answer in rag_method_names:
context_header = rag_answer_header_dict[answer]
answers_df[context_header] = answers_df[context_header].apply(json.loads)
rag_data = {
"question": answers_df["Question"].to_list(),
"answer": answers_df[answer].to_list(),
"contexts": answers_df[rag_answer_header_dict[answer]].to_list(),
"ground_truth": answers_df["Expected Answer"].to_list(),
"user_input": answers_df["Question"].to_list(),
"response": answers_df[answer].to_list(),
"retrieved_contexts": answers_df[rag_answer_header_dict[answer]].to_list(),
"reference": answers_df["Expected Answer"].to_list(),
}
eval_llm = LangchainLLMWrapper(
ChatOpenAI(
model="gpt-4o-mini",
temperature=0,
base_url=settings.openai_api_base,
api_key=settings.openai_api_key,
)
)

dataset = Dataset.from_dict(rag_data)
score = evaluate(dataset, metrics=[RAGAS_METRICS_DICT[metric] for metric in metrics])
print(score.scores.to_pandas())
df = pd.concat([df, score.scores.to_pandas()])
df.insert(0, 'method', rag_answers)
return df
score = evaluate(
dataset,
metrics=eval_metrics,
llm=eval_llm,
)
score_df = pd.concat([score_df, score.to_pandas()])
score_df.insert(0, "method", rag_method_names)
return score_df

with gr.Row():
with gr.Column():
Expand All @@ -340,14 +357,19 @@ def evaluate_rag(metrics: List[str], num: int):
value=ragas_metrics_list[:4],
multiselect=True,
label="Metrics",
info="Several evaluation metrics from `ragas`, please refer to https://docs.ragas.io/en/stable/concepts/metrics/index.html",
info=(
"Several evaluation metrics from `ragas`, ",
"please refer to https://docs.ragas.io/en/stable/concepts/metrics/index.html",
),
)
with gr.Column():
dataset_nums = gr.Number(1, label="Dataset Numbers", minimum=1, maximum=1)
with gr.Row():
dataset_nums = gr.Number(1, label="Dataset Numbers", minimum=1, maximum=1)
language = gr.Radio(["english", "chinese"], label="Language", value="chinese")
ragas_btn = gr.Button("Evaluate RAG", variant="primary")
ragas_btn.click(
evaluate_rag,
inputs=[ragas_metrics, dataset_nums],
inputs=[ragas_metrics, dataset_nums, language],
outputs=[gr.DataFrame(label="RAG Evaluation Results", headers=ragas_metrics_list)],
)
return inp, answer_prompt_input
return inp, answer_prompt_input
43 changes: 29 additions & 14 deletions hugegraph-llm/src/hugegraph_llm/utils/ragas_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,37 @@
# specific language governing permissions and limitations
# under the License.

from pysbd import Segmenter
from ragas.metrics import (
faithfulness,
answer_correctness,
context_precision,
answer_relevancy,
context_recall,
context_utilization,
context_entity_recall,
ContextEntityRecall,
FactualCorrectness,
Faithfulness,
LLMContextPrecisionWithoutReference,
LLMContextPrecisionWithReference,
LLMContextRecall,
NoiseSensitivity,
ResponseRelevancy,
)

RAGAS_METRICS_DICT = {
"context_precision": context_precision,
"faithfulness": faithfulness,
"answer_relevancy": answer_relevancy,
"answer_correctness": answer_correctness,
"context_recall": context_recall,
"context_utilization": context_utilization,
"context_entity_recall": context_entity_recall,
"context_entity_recall": ContextEntityRecall(),
"factual_correctness": FactualCorrectness(),
"faithfulness": Faithfulness(),
"llm_context_precision_without_reference": LLMContextPrecisionWithoutReference(),
"llm_context_precision_with_reference": LLMContextPrecisionWithReference(),
"llm_context_recall": LLMContextRecall(),
"noise_sensitivity": NoiseSensitivity(),
"response_relevancy": ResponseRelevancy(),
}

RAGAS_METRICS_ZH_DICT = {
"context_entity_recall": ContextEntityRecall(),
"factual_correctness": FactualCorrectness(sentence_segmenter=Segmenter(language="zh", clean=True)),
"faithfulness": Faithfulness(sentence_segmenter=Segmenter(language="zh", clean=True)),
"llm_context_precision_without_reference": LLMContextPrecisionWithoutReference(),
"llm_context_precision_with_reference": LLMContextPrecisionWithReference(),
"llm_context_recall": LLMContextRecall(),
"noise_sensitivity": NoiseSensitivity(sentence_segmenter=Segmenter(language="zh", clean=True)),
"response_relevancy": ResponseRelevancy(),
}

0 comments on commit 6e82497

Please sign in to comment.