(incubator-hugegraph-ai) branch main updated: feat(llm): support multi-QA test/generate (#78)

jin Mon, 09 Sep 2024 20:32:36 -0700

This is an automated email from the ASF dual-hosted git repository.

jin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git



The following commit(s) were added to refs/heads/main by this push:
     new 38acded  feat(llm): support multi-QA test/generate (#78)
38acded is described below

commit 38acdedf32bd591f7cb39df53038b517e053f42c
Author: Liu Jiajun <[email protected]>
AuthorDate: Tue Sep 10 11:32:27 2024 +0800

    feat(llm): support multi-QA test/generate (#78)
    
    Note:
    pip install required dependencies first (csv/xls support)
    
    ---------
    
    Co-authored-by: imbajin <[email protected]>
---
 hugegraph-llm/.gitignore                           |   2 +
 hugegraph-llm/requirements.txt                     |   2 +
 .../src/hugegraph_llm/demo/rag_web_demo.py         | 110 ++++++++++++++++++++-
 .../resources/demo/questions_template.xlsx         | Bin 0 -> 11747 bytes
 4 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/hugegraph-llm/.gitignore b/hugegraph-llm/.gitignore
new file mode 100644
index 0000000..4de6eba
--- /dev/null
+++ b/hugegraph-llm/.gitignore
@@ -0,0 +1,2 @@
+src/hugegraph_llm/resources/demo/questions_answers.xlsx
+src/hugegraph_llm/resources/demo/questions.xlsx
diff --git a/hugegraph-llm/requirements.txt b/hugegraph-llm/requirements.txt
index 1ee9db6..bc3e606 100644
--- a/hugegraph-llm/requirements.txt
+++ b/hugegraph-llm/requirements.txt
@@ -11,3 +11,5 @@ python-docx~=1.1.2
 langchain-text-splitters~=0.2.2
 faiss-cpu~=1.8.0
 python-dotenv>=1.0.1
+pandas~=2.2.2
+openpyxl~=3.1.5
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py 
b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py
index 20ed5b0..e0232cf 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py
@@ -23,6 +23,7 @@ from typing import List, Union, Tuple, Literal, Optional
 
 import docx
 import gradio as gr
+import pandas as pd
 import requests
 import uvicorn
 from fastapi import FastAPI, Depends, APIRouter
@@ -602,7 +603,114 @@ def init_rag_ui() -> gr.Interface:
             outputs=[raw_out, vector_only_out, graph_only_out, 
graph_vector_out],
         )
 
-        gr.Markdown("""## 3. Others (🚧) """)
+        gr.Markdown("""## 3. User Functions """)
+        tests_df_headers = [
+            "Question",
+            "Expected Answer",
+            "Basic LLM Answer",
+            "Vector-only Answer",
+            "Graph-only Answer",
+            "Graph-Vector Answer",
+        ]
+        answers_path = os.path.join(resource_path, "demo", 
"questions_answers.xlsx")
+        questions_path = os.path.join(resource_path, "demo", "questions.xlsx")
+        questions_template_path = os.path.join(resource_path, "demo", 
"questions_template.xlsx")
+
+        def read_file_to_excel(file: NamedString, line_count: Optional[int] = 
None):
+            df = None
+            if not file:
+                return pd.DataFrame(), 1
+            if file.name.endswith(".xlsx"):
+                df = pd.read_excel(file.name, nrows=line_count) if file else 
pd.DataFrame()
+            elif file.name.endswith(".csv"):
+                df = pd.read_csv(file.name, nrows=line_count) if file else 
pd.DataFrame()
+            df.to_excel(questions_path, index=False)
+            if df.empty:
+                df = pd.DataFrame([[""] * len(tests_df_headers)], 
columns=tests_df_headers)
+            else:
+                df.columns = tests_df_headers
+            # truncate the dataframe if it's too long
+            if len(df) > 40:
+                return df.head(40), 40
+            return df, len(df)
+
+        def change_showing_excel(line_count):
+            if os.path.exists(answers_path):
+                df = pd.read_excel(answers_path, nrows=line_count)
+            elif os.path.exists(questions_path):
+                df = pd.read_excel(questions_path, nrows=line_count)
+            else:
+                df = pd.read_excel(questions_template_path, nrows=line_count)
+            return df
+
+        def several_rag_answer(
+            is_raw_answer: bool,
+            is_vector_only_answer: bool,
+            is_graph_only_answer: bool,
+            is_graph_vector_answer: bool,
+            graph_ratio: float,
+            rerank_method: Literal["bleu", "reranker"],
+            near_neighbor_first: bool,
+            custom_related_information: str,
+            answer_prompt: str,
+            progress=gr.Progress(track_tqdm=True),
+            answer_max_line_count: int = 1,
+        ):
+            df = pd.read_excel(questions_path, dtype=str)
+            total_rows = len(df)
+            for index, row in df.iterrows():
+                question = row.iloc[0]
+                basic_llm_answer, vector_only_answer, graph_only_answer, 
graph_vector_answer = rag_answer(
+                    question,
+                    is_raw_answer,
+                    is_vector_only_answer,
+                    is_graph_only_answer,
+                    is_graph_vector_answer,
+                    graph_ratio,
+                    rerank_method,
+                    near_neighbor_first,
+                    custom_related_information,
+                    answer_prompt,
+                )
+                df.at[index, "Basic LLM Answer"] = basic_llm_answer
+                df.at[index, "Vector-only Answer"] = vector_only_answer
+                df.at[index, "Graph-only Answer"] = graph_only_answer
+                df.at[index, "Graph-Vector Answer"] = graph_vector_answer
+                progress((index + 1, total_rows))
+            answers_path = os.path.join(resource_path, "demo", 
"questions_answers.xlsx")
+            df.to_excel(answers_path, index=False)
+            return df.head(answer_max_line_count), answers_path
+
+        with gr.Row():
+            with gr.Column():
+                questions_file = gr.File(file_types=[".xlsx", ".csv"], 
label="Questions File (.xlsx & csv)")
+            with gr.Column():
+                test_template_file = os.path.join(resource_path, "demo", 
"questions_template.xlsx")
+                gr.File(value=test_template_file, label="Download Template 
File")
+                answer_max_line_count = gr.Number(1, label="Max Lines To 
Show", minimum=1, maximum=40)
+                answers_btn = gr.Button("Generate Answer (Batch)", 
variant="primary")
+        # TODO: Set individual progress bars for dataframe
+        qa_dataframe = gr.DataFrame(label="Questions & Answers (Preview)", 
headers=tests_df_headers)
+        answers_btn.click(
+            several_rag_answer,
+            inputs=[
+                raw_radio,
+                vector_only_radio,
+                graph_only_radio,
+                graph_vector_radio,
+                graph_ratio,
+                rerank_method,
+                near_neighbor_first,
+                custom_related_information,
+                answer_prompt_input,
+                answer_max_line_count,
+            ],
+            outputs=[qa_dataframe, gr.File(label="Download Answered File", 
min_width=40)],
+        )
+        questions_file.change(read_file_to_excel, questions_file, 
[qa_dataframe, answer_max_line_count])
+        answer_max_line_count.change(change_showing_excel, 
answer_max_line_count, qa_dataframe)
+
+        gr.Markdown("""## 4. Others (🚧) """)
         with gr.Row():
             with gr.Column():
                 inp = gr.Textbox(value="g.V().limit(10)", label="Gremlin 
query", show_copy_button=True)
diff --git 
a/hugegraph-llm/src/hugegraph_llm/resources/demo/questions_template.xlsx 
b/hugegraph-llm/src/hugegraph_llm/resources/demo/questions_template.xlsx
new file mode 100644
index 0000000..deb70c1
Binary files /dev/null and 
b/hugegraph-llm/src/hugegraph_llm/resources/demo/questions_template.xlsx differ

(incubator-hugegraph-ai) branch main updated: feat(llm): support multi-QA test/generate (#78)

Reply via email to