This is an automated email from the ASF dual-hosted git repository. ming pushed a commit to branch web in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git
commit 56195e2fe93323ffab8dee528bc233fc66d2c514 Author: zhangshiming <[email protected]> AuthorDate: Wed Feb 28 16:56:14 2024 +0800 Introduce Gradio for creating interactive and visual demo --- hugegraph-llm/examples/build_kg_test.py | 10 +-- hugegraph-llm/examples/graph_rag_test.py | 81 ++-------------------- hugegraph-llm/requirements.txt | 1 + hugegraph-llm/src/config/config.ini | 2 +- hugegraph-llm/src/hugegraph_llm/llms/ernie_bot.py | 4 +- .../src/hugegraph_llm/operators/graph_rag_task.py | 10 +-- .../operators/hugegraph_op/commit_to_hugegraph.py | 5 +- .../operators/hugegraph_op/graph_rag_query.py | 18 +++-- .../operators/kg_construction_task.py | 9 ++- .../operators/llm_op/answer_synthesize.py | 2 +- .../operators/llm_op/keyword_extract.py | 15 ++-- hugegraph-llm/src/hugegraph_llm/utils/config.py | 10 ++- 12 files changed, 57 insertions(+), 110 deletions(-) diff --git a/hugegraph-llm/examples/build_kg_test.py b/hugegraph-llm/examples/build_kg_test.py index b0b8c51..d076d4f 100644 --- a/hugegraph-llm/examples/build_kg_test.py +++ b/hugegraph-llm/examples/build_kg_test.py @@ -49,13 +49,15 @@ if __name__ == "__main__": } ( - builder.import_schema(from_hugegraph="xxx") + builder + # .import_schema(from_hugegraph="xxx") .print_result() # .import_schema(from_extraction="xxx").print_result() - # .import_schema(from_user_defined=xxx).print_result() + .import_schema(from_user_defined=schema) + .print_result() .extract_triples(TEXT) .print_result() - .disambiguate_word_sense() - .commit_to_hugegraph() + # .disambiguate_word_sense() + # .commit_to_hugegraph() .run() ) diff --git a/hugegraph-llm/examples/graph_rag_test.py b/hugegraph-llm/examples/graph_rag_test.py index bbd6862..510f85d 100644 --- a/hugegraph-llm/examples/graph_rag_test.py +++ b/hugegraph-llm/examples/graph_rag_test.py @@ -19,95 +19,26 @@ import os from hugegraph_llm.operators.graph_rag_task import GraphRAG -from pyhugegraph.client import PyHugeClient - - -def prepare_data(): - client = PyHugeClient("127.0.0.1", 8080, "hugegraph", "admin", "admin") - schema = client.schema() - schema.propertyKey("name").asText().ifNotExist().create() - schema.propertyKey("birthDate").asText().ifNotExist().create() - schema.vertexLabel("Person").properties( - "name", "birthDate" - ).useCustomizeStringId().ifNotExist().create() - schema.vertexLabel("Movie").properties("name").useCustomizeStringId().ifNotExist().create() - schema.indexLabel("PersonByName").onV("Person").by("name").secondary().ifNotExist().create() - schema.indexLabel("MovieByName").onV("Movie").by("name").secondary().ifNotExist().create() - schema.edgeLabel("ActedIn").sourceLabel("Person").targetLabel("Movie").ifNotExist().create() - - graph = client.graph() - graph.addVertex("Person", {"name": "Al Pacino", "birthDate": "1940-04-25"}, id="Al Pacino") - graph.addVertex( - "Person", - {"name": "Robert De Niro", "birthDate": "1943-08-17"}, - id="Robert De Niro", - ) - graph.addVertex("Movie", {"name": "The Godfather"}, id="The Godfather") - graph.addVertex("Movie", {"name": "The Godfather Part II"}, id="The Godfather Part II") - graph.addVertex( - "Movie", - {"name": "The Godfather Coda The Death of Michael Corleone"}, - id="The Godfather Coda The Death of Michael Corleone", - ) - - graph.addEdge("ActedIn", "Al Pacino", "The Godfather", {}) - graph.addEdge("ActedIn", "Al Pacino", "The Godfather Part II", {}) - graph.addEdge("ActedIn", "Al Pacino", "The Godfather Coda The Death of Michael Corleone", {}) - graph.addEdge("ActedIn", "Robert De Niro", "The Godfather Part II", {}) - - graph.close() - +from hugegraph_llm.utils.gradio_demo import init_hg_test_data if __name__ == "__main__": + init_hg_test_data() os.environ["http_proxy"] = "" os.environ["https_proxy"] = "" os.environ["OPENAI_API_KEY"] = "" - # prepare_data() - graph_rag = GraphRAG() - - # configure operator with context dict - context = { - # hugegraph client - "ip": "localhost", # default to "localhost" if not set - "port": 18080, # default to 8080 if not set - "user": "admin", # default to "admin" if not set - "pwd": "admin", # default to "admin" if not set - "graph": "hugegraph", # default to "hugegraph" if not set - # query question - "query": "Tell me about Al Pacino.", # must be set - # keywords extraction - "max_keywords": 5, # default to 5 if not set - "language": "english", # default to "english" if not set - # graph rag query - "prop_to_match": "name", # default to None if not set - "max_deep": 2, # default to 2 if not set - "max_items": 30, # default to 30 if not set - # print intermediate processes result - "verbose": True, # default to False if not set - } - result = graph_rag.extract_keyword().query_graph_for_rag().synthesize_answer().run(**context) - print(f"Query:\n- {context['query']}") - print(f"Answer:\n- {result['answer']}") - - print("--------------------------------------------------------") - - # configure operator with parameters - graph_client = PyHugeClient("127.0.0.1", 18080, "hugegraph", "admin", "admin") result = ( - graph_rag.extract_keyword( - text="Tell me about Al Pacino.", - max_keywords=5, # default to 5 if not set - language="english", # default to "english" if not set - ) + graph_rag.extract_keyword(text="Tell me about Al Pacino.") + .print_result() .query_graph_for_rag( - graph_client=graph_client, max_deep=2, # default to 2 if not set max_items=30, # default to 30 if not set prop_to_match=None, # default to None if not set ) + .print_result() .synthesize_answer() + .print_result() .run(verbose=True) ) print("Query:\n- Tell me about Al Pacino.") diff --git a/hugegraph-llm/requirements.txt b/hugegraph-llm/requirements.txt index 03bba7f..7a09ea3 100644 --- a/hugegraph-llm/requirements.txt +++ b/hugegraph-llm/requirements.txt @@ -2,3 +2,4 @@ openai==0.28.1 retry==0.9.2 tiktoken==0.5.1 nltk==3.8.1 +gradio==4.19.1 diff --git a/hugegraph-llm/src/config/config.ini b/hugegraph-llm/src/config/config.ini index 6f9219f..7ff45c4 100644 --- a/hugegraph-llm/src/config/config.ini +++ b/hugegraph-llm/src/config/config.ini @@ -27,6 +27,6 @@ graph = hugegraph type = openai api_key = xxx secret_key = xxx -ernie_url = https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro?access_token= +llm_url = https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro?access_token= model_name = gpt-3.5-turbo-16k max_token = 4000 diff --git a/hugegraph-llm/src/hugegraph_llm/llms/ernie_bot.py b/hugegraph-llm/src/hugegraph_llm/llms/ernie_bot.py index 085f7a0..00fa6fa 100644 --- a/hugegraph-llm/src/hugegraph_llm/llms/ernie_bot.py +++ b/hugegraph-llm/src/hugegraph_llm/llms/ernie_bot.py @@ -31,7 +31,7 @@ class ErnieBotClient(BaseLLM): self.c = Config(section=Constants.LLM_CONFIG) self.api_key = self.c.get_llm_api_key() self.secret_key = self.c.get_llm_secret_key() - self.base_url = self.c.get_llm_ernie_url() + self.base_url = self.c.get_llm_url() self.get_access_token() def get_access_token(self): @@ -61,7 +61,7 @@ class ErnieBotClient(BaseLLM): raise Exception( f"Request failed with code {response.status_code}, message: {response.text}" ) - return response.text + return json.loads(response.text)["result"] def generate_streaming( self, diff --git a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py index 0088119..a62f4b1 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/graph_rag_task.py @@ -20,10 +20,10 @@ from typing import Dict, Any, Optional, List from hugegraph_llm.llms.base import BaseLLM from hugegraph_llm.llms.init_llm import LLMs +from hugegraph_llm.operators.common_op.print_result import PrintResult from hugegraph_llm.operators.hugegraph_op.graph_rag_query import GraphRAGQuery from hugegraph_llm.operators.llm_op.answer_synthesize import AnswerSynthesize from hugegraph_llm.operators.llm_op.keyword_extract import KeywordExtract -from pyhugegraph.client import PyHugeClient class GraphRAG: @@ -52,14 +52,12 @@ class GraphRAG: def query_graph_for_rag( self, - graph_client: Optional[PyHugeClient] = None, max_deep: int = 2, max_items: int = 30, prop_to_match: Optional[str] = None, ): self._operators.append( GraphRAGQuery( - client=graph_client, max_deep=max_deep, max_items=max_items, prop_to_match=prop_to_match, @@ -78,6 +76,10 @@ class GraphRAG: ) return self + def print_result(self): + self._operators.append(PrintResult()) + return self + def run(self, **kwargs) -> Dict[str, Any]: if len(self._operators) == 0: self.extract_keyword().query_graph_for_rag().synthesize_answer() @@ -85,5 +87,5 @@ class GraphRAG: context = kwargs context["llm"] = self._llm for op in self._operators: - context = op.run(context=context) + context = op.run(context) return context diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/commit_to_hugegraph.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/commit_to_hugegraph.py index 558a8ba..695c5b7 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/commit_to_hugegraph.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/commit_to_hugegraph.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - +from typing import Dict, Any from hugegraph_llm.utils.config import Config from hugegraph_llm.utils.constants import Constants @@ -34,7 +34,7 @@ class CommitToKg: ) self.schema = self.client.schema() - def run(self, data: dict): + def run(self, data: dict) -> Dict[str, Any]: if "schema" not in data: self.schema_free_mode(data["triples"]) else: @@ -43,6 +43,7 @@ class CommitToKg: edges = data["edges"] self.init_schema(schema) self.init_graph(vertices, edges) + return data def init_graph(self, vertices, edges): vids = {} diff --git a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py index a59acc1..6d5ede6 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py @@ -19,6 +19,8 @@ import re from typing import Any, Dict, Optional, List, Set, Tuple +from hugegraph_llm.utils.config import Config +from hugegraph_llm.utils.constants import Constants from pyhugegraph.client import PyHugeClient @@ -65,12 +67,18 @@ class GraphRAGQuery: def __init__( self, - client: Optional[PyHugeClient] = None, max_deep: int = 2, max_items: int = 30, prop_to_match: Optional[str] = None, ): - self._client = client + config = Config(section=Constants.HUGEGRAPH_CONFIG) + self._client = PyHugeClient( + config.get_graph_ip(), + config.get_graph_port(), + config.get_graph_name(), + config.get_graph_user(), + config.get_graph_pwd(), + ) self._max_deep = max_deep self._max_items = max_items self._prop_to_match = prop_to_match @@ -231,9 +239,9 @@ class GraphRAGQuery: return self._schema schema = self._client.schema() - vertex_schema = schema.get_vertex_labels() - edge_schema = schema.get_edge_labels() - relationships = schema.get_relations() + vertex_schema = schema.getVertexLabels() + edge_schema = schema.getEdgeLabels() + relationships = schema.getRelations() self._schema = ( f"Node properties: {vertex_schema}\n" diff --git a/hugegraph-llm/src/hugegraph_llm/operators/kg_construction_task.py b/hugegraph-llm/src/hugegraph_llm/operators/kg_construction_task.py index 082058d..283a5a1 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/kg_construction_task.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/kg_construction_task.py @@ -16,6 +16,8 @@ # under the License. +from typing import Dict, Any + from hugegraph_llm.llms.base import BaseLLM from hugegraph_llm.operators.common_op.check_schema import CheckSchema from hugegraph_llm.operators.common_op.print_result import PrintResult @@ -58,7 +60,8 @@ class KgBuilder: self.operators.append(PrintResult()) return self - def run(self): - result = "" + def run(self) -> Dict[str, Any]: + context = "" for operator in self.operators: - result = operator.run(result) + context = operator.run(context) + return context diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py index b08adb3..6216494 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py @@ -91,4 +91,4 @@ class AnswerSynthesize: if verbose: print(f"\033[91mANSWER: {response}\033[0m") - return context + return context["answer"] diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/keyword_extract.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/keyword_extract.py index 9a94a11..54d698c 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/keyword_extract.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/keyword_extract.py @@ -23,17 +23,10 @@ from hugegraph_llm.llms.base import BaseLLM from hugegraph_llm.llms.init_llm import LLMs from hugegraph_llm.operators.common_op.nltk_helper import NLTKHelper -DEFAULT_KEYWORDS_EXTRACT_TEMPLATE_TMPL = ( - "A question is provided below. Given the question, " - "extract up to {max_keywords} keywords from the text. " - "Focus on extracting the keywords that we can use " - "to best lookup answers to the question. " - "Avoid stopwords.\n" - "---------------------\n" - "{question}\n" - "---------------------\n" - "Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'" -) +DEFAULT_KEYWORDS_EXTRACT_TEMPLATE_TMPL = """extract {max_keywords} keywords from the text: + {question} + Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>' + """ DEFAULT_KEYWORDS_EXPAND_TEMPLATE_TMPL = ( "Generate synonyms or possible form of keywords up to {max_keywords} in total,\n" diff --git a/hugegraph-llm/src/hugegraph_llm/utils/config.py b/hugegraph-llm/src/hugegraph_llm/utils/config.py index d7ec13f..b11585a 100644 --- a/hugegraph-llm/src/hugegraph_llm/utils/config.py +++ b/hugegraph-llm/src/hugegraph_llm/utils/config.py @@ -31,6 +31,12 @@ class Config: self.config.read(self.config_file) self.section = section + def update_config(self, updates): + for key, value in updates.items(): + self.config.set(self.section, key, value) + with open(self.config_file, "w", encoding="utf-8") as configfile: + self.config.write(configfile) + def get_config(self): return self.config @@ -55,8 +61,8 @@ class Config: def get_llm_secret_key(self): return self.config.get(self.section, "secret_key") - def get_llm_ernie_url(self): - return self.config.get(self.section, "ernie_url") + def get_llm_url(self): + return self.config.get(self.section, "llm_url") def get_llm_type(self): return self.config.get(self.section, "type")
