This is an automated email from the ASF dual-hosted git repository.
jin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git
The following commit(s) were added to refs/heads/main by this push:
new ca28faf refactor(llm): replace vid by full vertexes info (#189)
ca28faf is described below
commit ca28faf29b1d3499467cc8b366b4462b829182e5
Author: imbajin <[email protected]>
AuthorDate: Mon Mar 3 19:42:30 2025 +0800
refactor(llm): replace vid by full vertexes info (#189)
---
.../src/hugegraph_llm/api/models/rag_requests.py | 2 +-
hugegraph-llm/src/hugegraph_llm/api/rag_api.py | 13 +++++++++++--
.../hugegraph_llm/demo/rag_demo/text2gremlin_block.py | 5 +++--
.../operators/hugegraph_op/graph_rag_query.py | 19 +++++++++++++++----
4 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
index 9dc868a..a6b58b4 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
@@ -76,7 +76,7 @@ class GraphRAGRequest(BaseModel):
from the query, by default only the most
similar one is returned.")
client_config : Optional[GraphConfigRequest] = Query(None,
description="hugegraph server config.")
- get_vid_only: bool = Query(False, description="return only keywords & vid
(early stop).")
+ get_vertex_only: bool = Query(False, description="return only keywords &
vertex (early stop).")
gremlin_tmpl_num: int = Query(
1, description="Number of Gremlin templates to use. If num <=0 means
template is not provided"
diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
index 5e81c56..04c7b9a 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
@@ -32,7 +32,7 @@ from hugegraph_llm.api.models.rag_response import RAGResponse
from hugegraph_llm.config import llm_settings, prompt
from hugegraph_llm.utils.log import log
-
+# pylint: disable=too-many-statements
def rag_http_api(
router: APIRouter,
rag_answer_func,
@@ -101,9 +101,18 @@ def rag_http_api(
near_neighbor_first=req.near_neighbor_first,
custom_related_information=req.custom_priority_info,
gremlin_prompt=req.gremlin_prompt or
prompt.gremlin_generate_prompt,
- get_vid_only=req.get_vid_only
+ get_vertex_only=req.get_vertex_only
)
+ if req.get_vertex_only:
+ from hugegraph_llm.operators.hugegraph_op.graph_rag_query
import GraphRAGQuery
+ graph_rag = GraphRAGQuery()
+ graph_rag.init_client(result)
+ vertex_details =
graph_rag.get_vertex_details(result["match_vids"])
+
+ if vertex_details:
+ result["match_vids"] = vertex_details
+
if isinstance(result, dict):
params = [
"query",
diff --git
a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
index cce84ff..0fcc7f7 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
@@ -192,7 +192,7 @@ def graph_rag_recall(
topk_return_results: int,
vector_dis_threshold: float,
topk_per_keyword: int,
- get_vid_only: bool
+ get_vertex_only: bool = False,
) -> dict:
store_schema(prompt.text2gql_graph_schema, query, gremlin_prompt)
rag = RAGPipeline()
@@ -200,7 +200,8 @@ def graph_rag_recall(
vector_dis_threshold=vector_dis_threshold,
topk_per_keyword=topk_per_keyword,
)
- if not get_vid_only:
+
+ if not get_vertex_only:
rag.import_schema(huge_settings.graph_name).query_graphdb(
num_gremlin_generate_example=gremlin_tmpl_num,
gremlin_prompt=gremlin_prompt,
diff --git
a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
index 2ced618..53aff68 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
@@ -110,7 +110,7 @@ class GraphRAGQuery:
self._gremlin_prompt = gremlin_prompt or prompt.gremlin_generate_prompt
def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
- self._init_client(context)
+ self.init_client(context)
# initial flag: -1 means no result, 0 means subgraph query, 1 means
gremlin query
context["graph_result_flag"] = -1
@@ -239,7 +239,9 @@ class GraphRAGQuery:
)
return context
- def _init_client(self, context):
+ # TODO: move this method to a util file for reuse (remove self param)
+ def init_client(self, context):
+ """Initialize the HugeGraph client from context or default settings."""
# pylint: disable=R0915 (too-many-statements)
if self._client is None:
if isinstance(context.get("graph_client"), PyHugeClient):
@@ -254,6 +256,15 @@ class GraphRAGQuery:
self._client = PyHugeClient(ip, port, graph, user, pwd, gs)
assert self._client is not None, "No valid graph to search."
+ def get_vertex_details(self, vertex_ids: List[str]) -> List[Dict[str,
Any]]:
+ if not vertex_ids:
+ return []
+
+ formatted_ids = ", ".join(f"'{vid}'" for vid in vertex_ids)
+ gremlin_query = f"g.V({formatted_ids}).limit(20)"
+ result = self._client.gremlin().exec(gremlin=gremlin_query)["data"]
+ return result
+
def _format_graph_from_vertex(self, query_result: List[Any]) -> Set[str]:
knowledge = set()
for item in query_result:
@@ -374,8 +385,8 @@ class GraphRAGQuery:
schema = self._get_graph_schema()
vertex_props_str, edge_props_str = schema.split("\n")[:2]
# TODO: rename to vertex (also need update in the schema)
- vertex_props_str = vertex_props_str[len("Vertex properties: ")
:].strip("[").strip("]")
- edge_props_str = edge_props_str[len("Edge properties: ")
:].strip("[").strip("]")
+ vertex_props_str = vertex_props_str[len("Vertex properties:
"):].strip("[").strip("]")
+ edge_props_str = edge_props_str[len("Edge properties:
"):].strip("[").strip("]")
vertex_labels = self._extract_label_names(vertex_props_str)
edge_labels = self._extract_label_names(edge_props_str)
return vertex_labels, edge_labels