This is an automated email from the ASF dual-hosted git repository.
jin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git
The following commit(s) were added to refs/heads/main by this push:
new 9239799 refactor(llm): extract `topk_per_keyword` &
`topk_return_results` to .env
9239799 is described below
commit 9239799f899b181869e188f25d0268d3dbc3c49a
Author: SoJGooo <[email protected]>
AuthorDate: Thu Jan 9 16:11:19 2025 +0800
refactor(llm): extract `topk_per_keyword` & `topk_return_results` to .env
1. Extract argument `topk_per_keyword` to file .env
2. Extract argument `topk_return_results` to file .env
3. Rename `max_items` to `max_graph_items` (argument of function
`query_graphdb`)
---------
Co-authored-by: imbajin <[email protected]>
---
hugegraph-llm/README.md | 2 +-
hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py | 11 ++++++++++-
hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py | 2 +-
.../hugegraph_llm/operators/common_op/merge_dedup_rerank.py | 3 ++-
hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py | 12 +++++++-----
.../hugegraph_llm/operators/hugegraph_op/graph_rag_query.py | 4 ++--
.../hugegraph_llm/operators/index_op/semantic_id_query.py | 2 +-
7 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/hugegraph-llm/README.md b/hugegraph-llm/README.md
index 49fe502..2b25185 100644
--- a/hugegraph-llm/README.md
+++ b/hugegraph-llm/README.md
@@ -159,7 +159,7 @@ Here is a brief usage guide:
```
3. **Query Graph for Rag**: Retrieve the corresponding keywords and their
multi-degree associated relationships from HugeGraph.
```python
- graph_rag.query_graphdb(max_deep=2, max_items=30).print_result()
+ graph_rag.query_graphdb(max_deep=2, max_graph_items=30).print_result()
```
4. **Rerank Searched Result**: Rerank the searched results based on the
similarity between the question and the results.
```python
diff --git a/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py
b/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py
index cde225a..eac2def 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/hugegraph_config.py
@@ -21,14 +21,23 @@ from .models import BaseConfig
class HugeGraphConfig(BaseConfig):
"""HugeGraph settings"""
+ # graph server config
graph_ip: Optional[str] = "127.0.0.1"
graph_port: Optional[str] = "8080"
graph_name: Optional[str] = "hugegraph"
graph_user: Optional[str] = "admin"
graph_pwd: Optional[str] = "xxx"
graph_space: Optional[str] = None
+
+ # graph query config
limit_property: Optional[str] = "False"
max_graph_path: Optional[int] = 10
- max_items: Optional[int] = 30
+ max_graph_items: Optional[int] = 30
edge_limit_pre_label: Optional[int] = 8
+
+ # vector config
vector_dis_threshold: Optional[float] = 0.9
+ topk_per_keyword: Optional[int] = 1
+
+ # rerank config
+ topk_return_results: Optional[int] = 20
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
index bcc1198..83b027b 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/app.py
@@ -63,7 +63,7 @@ def init_rag_ui() -> gr.Interface:
title="HugeGraph RAG Platform",
css=CSS,
) as hugegraph_llm_ui:
- gr.Markdown("# HugeGraph LLM RAG Demo")
+ gr.Markdown("# HugeGraph RAG Platform 🚀")
"""
TODO: leave a general idea of the unresolved part
diff --git
a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
index c4ff757..d9c5e98 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/common_op/merge_dedup_rerank.py
@@ -22,6 +22,7 @@ import jieba
import requests
from nltk.translate.bleu_score import sentence_bleu
+from hugegraph_llm.config import huge_settings
from hugegraph_llm.models.embeddings.base import BaseEmbedding
from hugegraph_llm.models.rerankers.init_reranker import Rerankers
from hugegraph_llm.utils.log import log
@@ -43,7 +44,7 @@ class MergeDedupRerank:
def __init__(
self,
embedding: BaseEmbedding,
- topk: int = 20,
+ topk: int = huge_settings.topk_return_results,
graph_ratio: float = 0.5,
method: Literal["bleu", "reranker"] = "bleu",
near_neighbor_first: bool = False,
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
index 399864a..7df36d7 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py
@@ -32,7 +32,7 @@ from hugegraph_llm.operators.index_op.vector_index_query
import VectorIndexQuery
from hugegraph_llm.operators.llm_op.answer_synthesize import AnswerSynthesize
from hugegraph_llm.operators.llm_op.keyword_extract import KeywordExtract
from hugegraph_llm.utils.decorators import log_time, log_operator_time,
record_qps
-from hugegraph_llm.config import prompt
+from hugegraph_llm.config import prompt, huge_settings
class RAGPipeline:
@@ -98,7 +98,7 @@ class RAGPipeline:
def keywords_to_vid(
self,
by: Literal["query", "keywords"] = "keywords",
- topk_per_keyword: int = 1,
+ topk_per_keyword: int = huge_settings.topk_per_keyword,
topk_per_query: int = 10,
):
"""
@@ -121,7 +121,7 @@ class RAGPipeline:
def query_graphdb(
self,
max_deep: int = 2,
- max_items: int = 30,
+ max_graph_items: int = huge_settings.max_graph_items,
max_v_prop_len: int = 2048,
max_e_prop_len: int = 256,
prop_to_match: Optional[str] = None,
@@ -132,16 +132,18 @@ class RAGPipeline:
Add a graph RAG query operator to the pipeline.
:param max_deep: Maximum depth for the graph query.
- :param max_items: Maximum number of items to retrieve.
+ :param max_graph_items: Maximum number of items to retrieve.
:param max_v_prop_len: Maximum length of vertex properties.
:param max_e_prop_len: Maximum length of edge properties.
:param prop_to_match: Property to match in the graph.
+ :param num_gremlin_generate_example: Number of examples to generate.
+ :param gremlin_prompt: Gremlin prompt for generating examples.
:return: Self-instance for chaining.
"""
self._operators.append(
GraphRAGQuery(
max_deep=max_deep,
- max_items=max_items,
+ max_graph_items=max_graph_items,
max_v_prop_len=max_v_prop_len,
max_e_prop_len=max_e_prop_len,
prop_to_match=prop_to_match,
diff --git
a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
index e213c37..2ced618 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
@@ -78,7 +78,7 @@ class GraphRAGQuery:
def __init__(
self,
max_deep: int = 2,
- max_items: int = int(huge_settings.max_items),
+ max_graph_items: int = huge_settings.max_graph_items,
prop_to_match: Optional[str] = None,
llm: Optional[BaseLLM] = None,
embedding: Optional[BaseEmbedding] = None,
@@ -96,7 +96,7 @@ class GraphRAGQuery:
huge_settings.graph_space,
)
self._max_deep = max_deep
- self._max_items = max_items
+ self._max_items = max_graph_items
self._prop_to_match = prop_to_match
self._schema = ""
self._limit_property = huge_settings.limit_property.lower() == "true"
diff --git
a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py
b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py
index 47e80f0..8aa6411 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/index_op/semantic_id_query.py
@@ -34,7 +34,7 @@ class SemanticIdQuery:
embedding: BaseEmbedding,
by: Literal["query", "keywords"] = "keywords",
topk_per_query: int = 10,
- topk_per_keyword: int = 1
+ topk_per_keyword: int = huge_settings.topk_per_keyword
):
self.index_dir = str(os.path.join(resource_path,
huge_settings.graph_name, "graph_vids"))
self.vector_index = VectorIndex.from_index_file(self.index_dir)