This is an automated email from the ASF dual-hosted git repository. jin pushed a commit to branch enhance-vid in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git
commit 942107ec80b2ed8f154f7013d9710a342efd2480 Author: imbajin <[email protected]> AuthorDate: Mon Mar 3 19:31:27 2025 +0800 refactor(llm): replace vid by full vertexes info --- .../src/hugegraph_llm/api/models/rag_requests.py | 2 +- hugegraph-llm/src/hugegraph_llm/api/rag_api.py | 11 ++++++++++- .../hugegraph_llm/demo/rag_demo/text2gremlin_block.py | 5 +++-- .../operators/hugegraph_op/graph_rag_query.py | 19 +++++++++++++++++-- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py index 9dc868a..a6b58b4 100644 --- a/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py +++ b/hugegraph-llm/src/hugegraph_llm/api/models/rag_requests.py @@ -76,7 +76,7 @@ class GraphRAGRequest(BaseModel): from the query, by default only the most similar one is returned.") client_config : Optional[GraphConfigRequest] = Query(None, description="hugegraph server config.") - get_vid_only: bool = Query(False, description="return only keywords & vid (early stop).") + get_vertex_only: bool = Query(False, description="return only keywords & vertex (early stop).") gremlin_tmpl_num: int = Query( 1, description="Number of Gremlin templates to use. If num <=0 means template is not provided" diff --git a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py index 5e81c56..79061f1 100644 --- a/hugegraph-llm/src/hugegraph_llm/api/rag_api.py +++ b/hugegraph-llm/src/hugegraph_llm/api/rag_api.py @@ -101,9 +101,18 @@ def rag_http_api( near_neighbor_first=req.near_neighbor_first, custom_related_information=req.custom_priority_info, gremlin_prompt=req.gremlin_prompt or prompt.gremlin_generate_prompt, - get_vid_only=req.get_vid_only + get_vertex_only=req.get_vertex_only ) + if req.get_vertex_only: + from hugegraph_llm.operators.hugegraph_op.graph_rag_query import GraphRAGQuery + graph_rag = GraphRAGQuery() + graph_rag.init_client(result) + vertex_details = graph_rag.get_vertex_details(result["match_vids"]) + + if vertex_details: + result["match_vids"] = vertex_details + if isinstance(result, dict): params = [ "query", diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py index cce84ff..0fcc7f7 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/text2gremlin_block.py @@ -192,7 +192,7 @@ def graph_rag_recall( topk_return_results: int, vector_dis_threshold: float, topk_per_keyword: int, - get_vid_only: bool + get_vertex_only: bool = False, ) -> dict: store_schema(prompt.text2gql_graph_schema, query, gremlin_prompt) rag = RAGPipeline() @@ -200,7 +200,8 @@ def graph_rag_recall( vector_dis_threshold=vector_dis_threshold, topk_per_keyword=topk_per_keyword, ) - if not get_vid_only: + + if not get_vertex_only: rag.import_schema(huge_settings.graph_name).query_graphdb( num_gremlin_generate_example=gremlin_tmpl_num, gremlin_prompt=gremlin_prompt, diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py index 2ced618..19b23c5 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py @@ -110,7 +110,7 @@ class GraphRAGQuery: self._gremlin_prompt = gremlin_prompt or prompt.gremlin_generate_prompt def run(self, context: Dict[str, Any]) -> Dict[str, Any]: - self._init_client(context) + self.init_client(context) # initial flag: -1 means no result, 0 means subgraph query, 1 means gremlin query context["graph_result_flag"] = -1 @@ -239,7 +239,9 @@ class GraphRAGQuery: ) return context - def _init_client(self, context): + # TODO: move this method to a util file for reuse (remove self param) + def init_client(self, context): + """Initialize the HugeGraph client from context or default settings.""" # pylint: disable=R0915 (too-many-statements) if self._client is None: if isinstance(context.get("graph_client"), PyHugeClient): @@ -253,6 +255,19 @@ class GraphRAGQuery: gs = context.get("graphspace") or None self._client = PyHugeClient(ip, port, graph, user, pwd, gs) assert self._client is not None, "No valid graph to search." + + def get_vertex_details(self, vertex_ids: List[str]) -> List[Dict[str, Any]]: + if not vertex_ids: + return [] + + formatted_ids = ", ".join(f"'{vid}'" for vid in vertex_ids) + gremlin_query = f"g.V({formatted_ids}).limit(20)" + try: + result = self._client.gremlin().exec(gremlin=gremlin_query)["data"] + return result + except Exception as e: + log.error("Failed to get vertex details: %s", e) + return [] def _format_graph_from_vertex(self, query_result: List[Any]) -> Set[str]: knowledge = set()
