(incubator-hugegraph-ai) 01/01: refactor(llm): replace vid by full vertexes info

jin Mon, 03 Mar 2025 03:31:49 -0800

This is an automated email from the ASF dual-hosted git repository.

jin pushed a commit to branch enhance-vid
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git


commit 942107ec80b2ed8f154f7013d9710a342efd2480
Author: imbajin <[email protected]>
AuthorDate: Mon Mar 3 19:31:27 2025 +0800

    refactor(llm): replace vid by full vertexes info
---
 .../src/hugegraph_llm/api/models/rag_requests.py      |  2 +-
 hugegraph-llm/src/hugegraph_llm/api/rag_api.py        | 11 ++++++++++-
 .../hugegraph_llm/demo/rag_demo/text2gremlin_block.py |  5 +++--
 .../operators/hugegraph_op/graph_rag_query.py         | 19 +++++++++++++++++--
 4 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py 
b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
index 9dc868a..a6b58b4 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py
@@ -76,7 +76,7 @@ class GraphRAGRequest(BaseModel):
                                     from the query, by default only the most 
similar one is returned.")
 
     client_config : Optional[GraphConfigRequest] = Query(None, 
description="hugegraph server config.")
-    get_vid_only: bool = Query(False, description="return only keywords & vid 
(early stop).")
+    get_vertex_only: bool = Query(False, description="return only keywords & 
vertex (early stop).")
 
     gremlin_tmpl_num: int = Query(
         1, description="Number of Gremlin templates to use. If num <=0 means 
template is not provided"
diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py 
b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
index 5e81c56..79061f1 100644
--- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
+++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py
@@ -101,9 +101,18 @@ def rag_http_api(
                 near_neighbor_first=req.near_neighbor_first,
                 custom_related_information=req.custom_priority_info,
                 gremlin_prompt=req.gremlin_prompt or 
prompt.gremlin_generate_prompt,
-                get_vid_only=req.get_vid_only
+                get_vertex_only=req.get_vertex_only
             )
 
+            if req.get_vertex_only:
+                from hugegraph_llm.operators.hugegraph_op.graph_rag_query 
import GraphRAGQuery
+                graph_rag = GraphRAGQuery()
+                graph_rag.init_client(result)
+                vertex_details = 
graph_rag.get_vertex_details(result["match_vids"])
+
+                if vertex_details:
+                    result["match_vids"] = vertex_details
+
             if isinstance(result, dict):
                 params = [
                     "query",
diff --git 
a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py 
b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
index cce84ff..0fcc7f7 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py
@@ -192,7 +192,7 @@ def graph_rag_recall(
     topk_return_results: int,
     vector_dis_threshold: float,
     topk_per_keyword: int,
-    get_vid_only: bool
+    get_vertex_only: bool = False,
 ) -> dict:
     store_schema(prompt.text2gql_graph_schema, query, gremlin_prompt)
     rag = RAGPipeline()
@@ -200,7 +200,8 @@ def graph_rag_recall(
             vector_dis_threshold=vector_dis_threshold,
             topk_per_keyword=topk_per_keyword,
         )
-    if not get_vid_only:
+
+    if not get_vertex_only:
         rag.import_schema(huge_settings.graph_name).query_graphdb(
             num_gremlin_generate_example=gremlin_tmpl_num,
             gremlin_prompt=gremlin_prompt,
diff --git 
a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py 
b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
index 2ced618..19b23c5 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py
@@ -110,7 +110,7 @@ class GraphRAGQuery:
         self._gremlin_prompt = gremlin_prompt or prompt.gremlin_generate_prompt
 
     def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
-        self._init_client(context)
+        self.init_client(context)
 
         # initial flag: -1 means no result, 0 means subgraph query, 1 means 
gremlin query
         context["graph_result_flag"] = -1
@@ -239,7 +239,9 @@ class GraphRAGQuery:
             )
         return context
 
-    def _init_client(self, context):
+    # TODO: move this method to a util file for reuse (remove self param)
+    def init_client(self, context):
+        """Initialize the HugeGraph client from context or default settings."""
         # pylint: disable=R0915 (too-many-statements)
         if self._client is None:
             if isinstance(context.get("graph_client"), PyHugeClient):
@@ -253,6 +255,19 @@ class GraphRAGQuery:
                 gs = context.get("graphspace") or None
                 self._client = PyHugeClient(ip, port, graph, user, pwd, gs)
         assert self._client is not None, "No valid graph to search."
+        
+    def get_vertex_details(self, vertex_ids: List[str]) -> List[Dict[str, 
Any]]:
+        if not vertex_ids:
+            return []
+            
+        formatted_ids = ", ".join(f"'{vid}'" for vid in vertex_ids)
+        gremlin_query = f"g.V({formatted_ids}).limit(20)"
+        try:
+            result = self._client.gremlin().exec(gremlin=gremlin_query)["data"]
+            return result
+        except Exception as e:
+            log.error("Failed to get vertex details: %s", e)
+            return []
 
     def _format_graph_from_vertex(self, query_result: List[Any]) -> Set[str]:
         knowledge = set()

(incubator-hugegraph-ai) 01/01: refactor(llm): replace vid by full vertexes info

Reply via email to