(incubator-hugegraph-ai) branch main updated: refact(llm): separate user prompt configs (#77)

jin Wed, 11 Sep 2024 01:19:28 -0700

This is an automated email from the ASF dual-hosted git repository.

jin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git



The following commit(s) were added to refs/heads/main by this push:
     new d254d9e  refact(llm): separate user prompt configs (#77)
d254d9e is described below

commit d254d9e6f5dff5d19192e4605dedcd2f7032e21e
Author: Hongjun Li <[email protected]>
AuthorDate: Wed Sep 11 16:18:14 2024 +0800

    refact(llm): separate user prompt configs (#77)
    
    TODO: we need separate more llm settings out of  .env
    
    ---------
    
    Co-authored-by: imbajin <[email protected]>
---
 .gitignore                                         |   3 +
 hugegraph-llm/src/hugegraph_llm/config/__init__.py |  10 +-
 hugegraph-llm/src/hugegraph_llm/config/config.py   | 111 +++++++------
 .../src/hugegraph_llm/config/config_data.py        | 172 +++++++++++++++++++++
 hugegraph-llm/src/hugegraph_llm/config/generate.py |   4 +-
 .../src/hugegraph_llm/demo/rag_web_demo.py         |  66 +++-----
 .../operators/llm_op/answer_synthesize.py          |  18 +--
 .../operators/llm_op/property_graph_extract.py     |  41 +----
 8 files changed, 272 insertions(+), 153 deletions(-)

diff --git a/.gitignore b/.gitignore
index f39daf9..77c4168 100644
--- a/.gitignore
+++ b/.gitignore
@@ -125,6 +125,9 @@ celerybeat.pid
 # SageMath parsed files
 *.sage.py
 
+# prompt config
+config_prompt.yaml
+
 # Environments
 .env
 .venv
diff --git a/hugegraph-llm/src/hugegraph_llm/config/__init__.py 
b/hugegraph-llm/src/hugegraph_llm/config/__init__.py
index 3e6c9e9..e077fee 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/__init__.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/__init__.py
@@ -16,17 +16,15 @@
 # under the License.
 
 
-__all__ = [
-    "settings",
-    "resource_path"
-]
+__all__ = ["settings", "resource_path"]
 
 import os
-
-from .config import Config
+from .config import Config, PromptConfig
 
 settings = Config()
 settings.from_env()
+prompt = PromptConfig()
+
 
 package_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 resource_path = os.path.join(package_path, "resources")
diff --git a/hugegraph-llm/src/hugegraph_llm/config/config.py 
b/hugegraph-llm/src/hugegraph_llm/config/config.py
index 2a73b62..e70fbde 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/config.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/config.py
@@ -17,64 +17,24 @@
 
 
 import os
-
 from dataclasses import dataclass
-from typing import Literal, Optional
+from typing import Optional
+
+import yaml
 from dotenv import dotenv_values, set_key
 
+from hugegraph_llm.config.config_data import ConfigData, PromptData
 from hugegraph_llm.utils.log import log
 
-dirname = os.path.dirname
-package_path = dirname(dirname(dirname(dirname(os.path.abspath(__file__)))))
+dir_name = os.path.dirname
+package_path = 
dir_name(dir_name(dir_name(dir_name(os.path.abspath(__file__)))))
 env_path = os.path.join(package_path, ".env")
+f_name = "config_prompt.yaml"
+yaml_file_path = os.path.join(package_path, 
f"src/hugegraph_llm/resources/demo/{f_name}")
 
 
 @dataclass
-class Config:
-    """LLM settings"""
-    # env_path: Optional[str] = ".env"
-    llm_type: Literal["openai", "ollama", "qianfan_wenxin", "zhipu"] = "openai"
-    embedding_type: Optional[Literal["openai", "ollama", "qianfan_wenxin", 
"zhipu"]] = "openai"
-    reranker_type: Optional[Literal["cohere", "siliconflow"]] = None
-    # 1. OpenAI settings
-    openai_api_base: Optional[str] = os.environ.get("OPENAI_BASE_URL", 
"https://api.openai.com/v1";)
-    openai_api_key: Optional[str] = os.environ.get("OPENAI_API_KEY")
-    openai_language_model: Optional[str] = "gpt-4o-mini"
-    openai_embedding_model: Optional[str] = "text-embedding-3-small"
-    openai_max_tokens: int = 4096
-    # 2. Rerank settings
-    cohere_base_url: Optional[str] = os.environ.get("CO_API_URL", 
"https://api.cohere.com/v1/rerank";)
-    reranker_api_key: Optional[str] = None
-    reranker_model: Optional[str] = None
-    # 3. Ollama settings
-    ollama_host: Optional[str] = "127.0.0.1"
-    ollama_port: Optional[int] = 11434
-    ollama_language_model: Optional[str] = None
-    ollama_embedding_model: Optional[str] = None
-    # 4. QianFan/WenXin settings
-    qianfan_api_key: Optional[str] = None
-    qianfan_secret_key: Optional[str] = None
-    qianfan_access_token: Optional[str] = None
-    # 4.1 URL settings
-    qianfan_url_prefix: Optional[str] = 
"https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop";
-    qianfan_chat_url: Optional[str] = qianfan_url_prefix + "/chat/"
-    qianfan_language_model: Optional[str] = "ERNIE-4.0-Turbo-8K"
-    qianfan_embed_url: Optional[str] = qianfan_url_prefix + "/embeddings/"
-    # refer https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu to get more 
details
-    qianfan_embedding_model: Optional[str] = "embedding-v1"
-    # 5. ZhiPu(GLM) settings
-    zhipu_api_key: Optional[str] = None
-    zhipu_language_model: Optional[str] = "glm-4"
-    zhipu_embedding_model: Optional[str] = "embedding-2"
-
-    """HugeGraph settings"""
-    graph_ip: Optional[str] = "127.0.0.1"
-    graph_port: Optional[str] = "8080"
-    graph_name: Optional[str] = "hugegraph"
-    graph_user: Optional[str] = "admin"
-    graph_pwd: Optional[str] = "xxx"
-    graph_space: Optional[str] = None
-
+class Config(ConfigData):
     def from_env(self):
         if os.path.exists(env_path):
             env_config = read_dotenv()
@@ -125,3 +85,56 @@ def read_dotenv() -> dict[str, Optional[str]]:
         if key not in os.environ:
             os.environ[key] = value or ""
     return env_config
+
+
+class PromptConfig(PromptData):
+
+    def __init__(self):
+        self.ensure_yaml_file_exists()
+
+    def ensure_yaml_file_exists(self):
+        if os.path.exists(yaml_file_path):
+            log.info(f"Loading prompt file '{f_name}' successfully.")
+            with open(yaml_file_path, "r") as file:
+                data = yaml.safe_load(file)
+                # Load existing values from the YAML file into the class 
attributes
+                for key, value in data.items():
+                    setattr(self, key, value)
+        else:
+            self.save_to_yaml()
+            log.info(f"Prompt file '{yaml_file_path}' doesn't exist, create 
it.")
+
+
+    def save_to_yaml(self):
+        indented_schema = "\n".join([f"  {line}" for line in 
self.rag_schema.splitlines()])
+        indented_example_prompt = "\n".join([f"    {line}" for line in 
self.schema_example_prompt.splitlines()])
+        indented_question = "\n".join([f"    {line}" for line in 
self.question.splitlines()])
+        indented_custom_related_information = (
+            "\n".join([f"    {line}" for line in 
self.custom_related_information.splitlines()])
+        )
+        indented_default_answer_template = "\n".join([f"    {line}" for line 
in self.default_answer_template.splitlines()])
+
+        # This can be extended to add storage fields according to the data 
needs to be stored
+        yaml_content = f"""rag_schema: |
+{indented_schema}
+
+schema_example_prompt: |
+{indented_example_prompt}
+
+question: |
+{indented_question}
+
+custom_related_information: |
+{indented_custom_related_information}
+
+default_answer_template: |
+{indented_default_answer_template}
+
+"""
+        with open(yaml_file_path, "w") as file:
+            file.write(yaml_content)
+
+
+    def update_yaml_file(self):
+        self.save_to_yaml()
+        log.info(f"Prompt file '{f_name}' updated successfully.")
diff --git a/hugegraph-llm/src/hugegraph_llm/config/config_data.py 
b/hugegraph-llm/src/hugegraph_llm/config/config_data.py
new file mode 100644
index 0000000..b2a4fd1
--- /dev/null
+++ b/hugegraph-llm/src/hugegraph_llm/config/config_data.py
@@ -0,0 +1,172 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.\
+
+
+import os
+from dataclasses import dataclass
+from typing import Literal, Optional
+
+
+@dataclass
+class ConfigData:
+    """LLM settings"""
+
+    # env_path: Optional[str] = ".env"
+    llm_type: Literal["openai", "ollama", "qianfan_wenxin", "zhipu"] = "openai"
+    embedding_type: Optional[Literal["openai", "ollama", "qianfan_wenxin", 
"zhipu"]] = "openai"
+    reranker_type: Optional[Literal["cohere", "siliconflow"]] = None
+    # 1. OpenAI settings
+    openai_api_base: Optional[str] = os.environ.get("OPENAI_BASE_URL", 
"https://api.openai.com/v1";)
+    openai_api_key: Optional[str] = os.environ.get("OPENAI_API_KEY")
+    openai_language_model: Optional[str] = "gpt-4o-mini"
+    openai_embedding_model: Optional[str] = "text-embedding-3-small"
+    openai_max_tokens: int = 4096
+    # 2. Rerank settings
+    cohere_base_url: Optional[str] = os.environ.get("CO_API_URL", 
"https://api.cohere.com/v1/rerank";)
+    reranker_api_key: Optional[str] = None
+    reranker_model: Optional[str] = None
+    # 3. Ollama settings
+    ollama_host: Optional[str] = "127.0.0.1"
+    ollama_port: Optional[int] = 11434
+    ollama_language_model: Optional[str] = None
+    ollama_embedding_model: Optional[str] = None
+    # 4. QianFan/WenXin settings
+    qianfan_api_key: Optional[str] = None
+    qianfan_secret_key: Optional[str] = None
+    qianfan_access_token: Optional[str] = None
+    # 4.1 URL settings
+    qianfan_url_prefix: Optional[str] = 
"https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop";
+    qianfan_chat_url: Optional[str] = qianfan_url_prefix + "/chat/"
+    qianfan_language_model: Optional[str] = "ERNIE-4.0-Turbo-8K"
+    qianfan_embed_url: Optional[str] = qianfan_url_prefix + "/embeddings/"
+    # refer https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu to get more 
details
+    qianfan_embedding_model: Optional[str] = "embedding-v1"
+    # TODO: To be confirmed, whether to configure
+    # 5. ZhiPu(GLM) settings
+    zhipu_api_key: Optional[str] = None
+    zhipu_language_model: Optional[str] = "glm-4"
+    zhipu_embedding_model: Optional[str] = "embedding-2"
+
+    """HugeGraph settings"""
+    graph_ip: Optional[str] = "127.0.0.1"
+    graph_port: Optional[str] = "8080"
+    graph_name: Optional[str] = "hugegraph"
+    graph_user: Optional[str] = "admin"
+    graph_pwd: Optional[str] = "xxx"
+    graph_space: Optional[str] = None
+    
+
+# Additional static content like PromptConfig
+class PromptData:
+
+    # Data is detached from 
hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
+    default_answer_template = f"""You are an expert in knowledge graphs and 
natural language processing. 
+Your task is to provide a precise and accurate answer based on the given 
context.
+
+Context information is below.
+---------------------
+{{context_str}}
+---------------------
+
+Given the context information and without using fictive knowledge, 
+answer the following query in a concise and professional manner.
+Query: {{query_str}}
+Answer:
+"""
+
+    custom_related_information = """"""
+
+    question = """Tell me about Sarah."""
+
+    # Data is detached from 
hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
+    schema_example_prompt = """## Main Task
+Given the following graph schema and a piece of text, your task is to analyze 
the text and extract information that fits into the schema's structure, 
formatting the information into vertices and edges as specified.
+## Basic Rules
+### Schema Format
+Graph Schema:
+- Vertices: [List of vertex labels and their properties]
+- Edges: [List of edge labels, their source and target vertex labels, and 
properties]
+### Content Rule
+Please read the provided text carefully and identify any information that 
corresponds to the vertices and edges defined in the schema. For each piece of 
information that matches a vertex or edge, format it according to the following 
JSON structures:
+#### Vertex Format:
+{"id":"vertexLabelID:entityName","label":"vertexLabel","type":"vertex","properties":{"propertyName":"propertyValue",
 ...}}
+#### Edge Format:
+{"label":"edgeLabel","type":"edge","outV":"sourceVertexId","outVLabel":"sourceVertexLabel","inV":"targetVertexId","inVLabel":"targetVertexLabel","properties":{"propertyName":"propertyValue",...}}
+Also follow the rules: 
+1. Don't extract property fields that do not exist in the given schema
+2. Ensure the extracted property is in the same type as the schema (like 'age' 
should be a number)
+3. If there are multiple primary keys, the strategy for generating VID is: 
vertexlabelID:pk1!pk2!pk3 (pk means primary key, and '!' is the separator)
+4. Output should be a list of JSON objects, each representing a vertex or an 
edge, extracted and formatted based on the text and schema.
+5. Translate the schema fields into Chinese if the given text is Chinese but 
the schema is in English (Optional)
+## Example
+### Input example:
+#### text
+Meet Sarah, a 30-year-old attorney, and her roommate, James, whom she's shared 
a home with since 2010. James, in his professional life, works as a journalist. 
 
+#### graph schema
+{"vertices":[{"vertex_label":"person","properties":["name","age","occupation"]}],
 "edges":[{"edge_label":"roommate", 
"source_vertex_label":"person","target_vertex_label":"person","properties":["date"]]}
+### Output example:
+[{"id":"1:Sarah","label":"person","type":"vertex","properties":{"name":"Sarah","age":30,"occupation":"attorney"}},{"id":"1:James","label":"person","type":"vertex","properties":{"name":"James","occupation":"journalist"}},{"label":"roommate","type":"edge","outV":"1:Sarah","outVLabel":"person","inV":"1:James","inVLabel":"person","properties":{"date":"2010"}}]
+"""
+
+    rag_schema = """{
+"vertexlabels": [
+    {
+    "id": 1,
+    "name": "person",
+    "id_strategy": "PRIMARY_KEY",
+    "primary_keys": [
+        "name"
+    ],
+    "properties": [
+        "name",
+        "age",
+        "occupation"
+    ]
+    },
+    {
+    "id": 2,
+    "name": "webpage",
+    "id_strategy": "PRIMARY_KEY",
+    "primary_keys": [
+        "name"
+    ],
+    "properties": [
+        "name",
+        "url"
+    ]
+    }
+],
+"edgelabels": [
+    {
+    "id": 1,
+    "name": "roommate",
+    "source_label": "person",
+    "target_label": "person",
+    "properties": [
+        "date"
+    ]
+    },
+    {
+    "id": 2,
+    "name": "link",
+    "source_label": "webpage",
+    "target_label": "person",
+    "properties": []
+    }
+]
+}
+"""
diff --git a/hugegraph-llm/src/hugegraph_llm/config/generate.py 
b/hugegraph-llm/src/hugegraph_llm/config/generate.py
index 7016cbb..fb8943b 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/generate.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/generate.py
@@ -20,8 +20,8 @@ import argparse
 
 from hugegraph_llm.config import settings
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Generate hugegraph-llm 
config file')
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate hugegraph-llm 
config file")
     parser.add_argument("-U", "--update", action="store_true", help="Update 
the config file")
     args = parser.parse_args()
     if args.update:
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py 
b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py
index e0232cf..aaaf0a4 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py
@@ -32,13 +32,14 @@ from gradio.utils import NamedString
 from requests.auth import HTTPBasicAuth
 
 from hugegraph_llm.api.rag_api import rag_http_api
-from hugegraph_llm.config import settings, resource_path
+from hugegraph_llm.config import settings, resource_path, prompt
 from hugegraph_llm.enums.build_mode import BuildMode
 from hugegraph_llm.models.embeddings.init_embedding import Embeddings
 from hugegraph_llm.models.llms.init_llm import LLMs
 from hugegraph_llm.operators.graph_rag_task import RAGPipeline
 from hugegraph_llm.operators.kg_construction_task import KgBuilder
 from hugegraph_llm.operators.llm_op.property_graph_extract import 
SCHEMA_EXAMPLE_PROMPT
+from hugegraph_llm.operators.llm_op.answer_synthesize import 
DEFAULT_ANSWER_TEMPLATE
 from hugegraph_llm.utils.hugegraph_utils import get_hg_client
 from hugegraph_llm.utils.hugegraph_utils import init_hg_test_data, 
run_gremlin_query, clean_hg_data
 from hugegraph_llm.utils.log import log
@@ -71,6 +72,13 @@ def rag_answer(
     custom_related_information: str,
     answer_prompt: str,
 ) -> Tuple:
+    
+    if prompt.question != text or prompt.custom_related_information != 
custom_related_information or prompt.default_answer_template != answer_prompt:
+        prompt.custom_related_information = custom_related_information
+        prompt.question = text
+        prompt.default_answer_template = answer_prompt
+        prompt.update_yaml_file()
+    
     vector_search = vector_only_answer or graph_vector_answer
     graph_search = graph_only_answer or graph_vector_answer
 
@@ -117,6 +125,13 @@ def build_kg(  # pylint: disable=too-many-branches
     example_prompt: str,
     build_mode: str,
 ) -> str:
+    
+    # update env variables: schema and example_prompt
+    if prompt.rag_schema != schema or prompt.schema_example_prompt != 
example_prompt:
+        prompt.rag_schema = schema
+        prompt.schema_example_prompt = example_prompt
+        prompt.update_yaml_file()
+    
     if isinstance(files, NamedString):
         files = [files]
     texts = []
@@ -482,49 +497,16 @@ def init_rag_ui() -> gr.Interface:
 """
         )
 
-        schema = """{
-  "vertexlabels": [
-    {
-      "id":1,
-      "name": "person",
-      "id_strategy": "PRIMARY_KEY",
-      "primary_keys":["name"],
-      "properties": ["name","age","occupation"]
-    },
-    {
-      "id":2,
-      "name": "webpage",
-      "id_strategy":"PRIMARY_KEY",
-      "primary_keys":["name"],
-      "properties": ["name","url"]
-    }
-  ],
-  "edgelabels": [
-    {
-      "id": 1,
-      "name": "roommate",
-      "source_label": "person",
-      "target_label": "person",
-      "properties": ["date"]
-    },
-    {
-      "id": 2,
-      "name": "link",
-      "source_label": "webpage",
-      "target_label": "person",
-      "properties": []
-    }
-  ]
-}"""
-
+        schema = prompt.rag_schema
+        
         with gr.Row():
             input_file = gr.File(
                 value=[os.path.join(resource_path, "demo", "test.txt")],
                 label="Docs (multi-files can be selected together)",
                 file_count="multiple",
             )
-            input_schema = gr.Textbox(value=schema, label="Schema")
-            info_extract_template = gr.Textbox(value=SCHEMA_EXAMPLE_PROMPT, 
label="Info extract head")
+            input_schema = gr.Textbox(value=schema, label="Schema", lines=2)
+            info_extract_template = gr.Textbox(value=SCHEMA_EXAMPLE_PROMPT, 
label="Info extract head", lines=2)
             with gr.Column():
                 mode = gr.Radio(
                     choices=["Test Mode", "Import Mode", "Clear and Import", 
"Rebuild Vector"],
@@ -543,7 +525,7 @@ def init_rag_ui() -> gr.Interface:
         gr.Markdown("""## 2. RAG with HugeGraph 📖""")
         with gr.Row():
             with gr.Column(scale=2):
-                inp = gr.Textbox(value="Tell me about Sarah.", 
label="Question", show_copy_button=True)
+                inp = gr.Textbox(value=prompt.question, label="Question", 
show_copy_button=True, lines=2)
                 raw_out = gr.Textbox(label="Basic LLM Answer", 
show_copy_button=True)
                 vector_only_out = gr.Textbox(label="Vector-only Answer", 
show_copy_button=True)
                 graph_only_out = gr.Textbox(label="Graph-only Answer", 
show_copy_button=True)
@@ -551,7 +533,7 @@ def init_rag_ui() -> gr.Interface:
                 from hugegraph_llm.operators.llm_op.answer_synthesize import 
DEFAULT_ANSWER_TEMPLATE
 
                 answer_prompt_input = gr.Textbox(
-                    value=DEFAULT_ANSWER_TEMPLATE, label="Custom Prompt", 
show_copy_button=True
+                    value=DEFAULT_ANSWER_TEMPLATE, label="Custom Prompt", 
show_copy_button=True, lines=2
                 )
             with gr.Column(scale=1):
                 with gr.Row():
@@ -581,7 +563,7 @@ def init_rag_ui() -> gr.Interface:
                         info="One-depth neighbors > two-depth neighbors",
                     )
                     custom_related_information = gr.Text(
-                        "",
+                        prompt.custom_related_information,
                         label="Custom related information(Optional)",
                     )
                     btn = gr.Button("Answer Question", variant="primary")
@@ -747,7 +729,7 @@ if __name__ == "__main__":
 
     app.include_router(app_auth)
     auth_enabled = os.getenv("ENABLE_LOGIN", "False").lower() == "true"
-    log.info("Authentication is %s.", "enabled" if auth_enabled else 
"disabled")
+    log.info("(Status) Authentication is %s now.", "enabled" if auth_enabled 
else "disabled")
     # TODO: support multi-user login when need
 
     app = gr.mount_gradio_app(app, hugegraph_llm, path="/", auth=("rag", 
os.getenv("TOKEN")) if auth_enabled else None)
diff --git 
a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py 
b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
index 2d05160..129b77b 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
@@ -21,22 +21,10 @@ from typing import Any, Dict, Optional
 
 from hugegraph_llm.models.llms.base import BaseLLM
 from hugegraph_llm.models.llms.init_llm import LLMs
+from hugegraph_llm.config import prompt
 
-# TODO: we need enhance the template to answer the question (put it in a 
separate file)
-DEFAULT_ANSWER_TEMPLATE = f"""
-You are an expert in knowledge graphs and natural language processing. 
-Your task is to provide a precise and accurate answer based on the given 
context.
-
-Context information is below.
----------------------
-{{context_str}}
----------------------
-
-Given the context information and without using fictive knowledge, 
-answer the following query in a concise and professional manner.
-Query: {{query_str}}
-Answer:
-"""
+
+DEFAULT_ANSWER_TEMPLATE = prompt.default_answer_template
 
 
 class AnswerSynthesize:
diff --git 
a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py 
b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
index 5f802af..d080d56 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
@@ -20,52 +20,15 @@ import json
 import re
 from typing import List, Any, Dict
 
+from hugegraph_llm.config import prompt
 from hugegraph_llm.document.chunk_split import ChunkSplitter
 from hugegraph_llm.models.llms.base import BaseLLM
 from hugegraph_llm.utils.log import log
 
-# TODO: put in a separate file for users to customize the content
-SCHEMA_EXAMPLE_PROMPT = """## Main Task
-Given the following graph schema and a piece of text, your task is to analyze 
the text and extract information that fits into the schema's structure, 
formatting the information into vertices and edges as specified.
-
-## Basic Rules
-### Schema Format
-Graph Schema:
-- Vertices: [List of vertex labels and their properties]
-- Edges: [List of edge labels, their source and target vertex labels, and 
properties]
-
-### Content Rule
-Please read the provided text carefully and identify any information that 
corresponds to the vertices and edges defined in the schema. For each piece of 
information that matches a vertex or edge, format it according to the following 
JSON structures:
-#### Vertex Format:
-{"id":"vertexLabelID:entityName","label":"vertexLabel","type":"vertex","properties":{"propertyName":"propertyValue",
-...}}
-
-#### Edge Format:
-{"label":"edgeLabel","type":"edge","outV":"sourceVertexId","outVLabel":"sourceVertexLabel","inV":"targetVertexId","inVLabel":"targetVertexLabel","properties":{"propertyName":"propertyValue",...}}
-
-Also follow the rules: 
-1. Don't extract property fields that do not exist in the given schema
-2. Ensure the extract property is in the same type as the schema (like 'age' 
should be a number)
-3. If there are multiple primarykeys provided, then the generating strategy of 
VID is: vertexlabelID:pk1!pk2!pk3 (pk means primary key, and '!' is the 
separator, no extra space between them)
-4. Your output should be a list of such JSON objects, each representing either 
a vertex or an edge, extracted and formatted based on the text and the provided 
schema.
-5. Translate the given schema filed into Chinese if the given text is Chinese 
but the schema is in English (Optional) 
-
-
-## Example
-### Input example:
-#### text
-Meet Sarah, a 30-year-old attorney, and her roommate, James, whom she's shared 
a home with since 2010. James, in his professional life, works as a journalist. 
 
-#### graph schema
-{"vertices":[{"vertex_label":"person","properties":["name","age","occupation"]}],
 "edges":[{"edge_label":"roommate", 
"source_vertex_label":"person","target_vertex_label":"person","properties":["date"]]}
-
-### Output example:
-[{"id":"1:Sarah","label":"person","type":"vertex","properties":{"name":"Sarah","age":30,"occupation":"attorney"}},{"id":"1:James","label":"person","type":"vertex","properties":{"name":"James","occupation":"journalist"}},{"label":"roommate","type":"edge","outV":"1:Sarah","outVLabel":"person","inV":"1:James","inVLabel":"person","properties":{"date":"2010"}}]
-"""
-
+SCHEMA_EXAMPLE_PROMPT = prompt.schema_example_prompt
 
 def generate_extract_property_graph_prompt(text, schema=None) -> str:
     return f"""---
-
 Following the full instructions above, try to extract the following text from 
the given schema, output the JSON result:
 # Input
 ## Text:

(incubator-hugegraph-ai) branch main updated: refact(llm): separate user prompt configs (#77)

Reply via email to