This is an automated email from the ASF dual-hosted git repository.
jin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git
The following commit(s) were added to refs/heads/main by this push:
new d254d9e refact(llm): separate user prompt configs (#77)
d254d9e is described below
commit d254d9e6f5dff5d19192e4605dedcd2f7032e21e
Author: Hongjun Li <[email protected]>
AuthorDate: Wed Sep 11 16:18:14 2024 +0800
refact(llm): separate user prompt configs (#77)
TODO: we need separate more llm settings out of .env
---------
Co-authored-by: imbajin <[email protected]>
---
.gitignore | 3 +
hugegraph-llm/src/hugegraph_llm/config/__init__.py | 10 +-
hugegraph-llm/src/hugegraph_llm/config/config.py | 111 +++++++------
.../src/hugegraph_llm/config/config_data.py | 172 +++++++++++++++++++++
hugegraph-llm/src/hugegraph_llm/config/generate.py | 4 +-
.../src/hugegraph_llm/demo/rag_web_demo.py | 66 +++-----
.../operators/llm_op/answer_synthesize.py | 18 +--
.../operators/llm_op/property_graph_extract.py | 41 +----
8 files changed, 272 insertions(+), 153 deletions(-)
diff --git a/.gitignore b/.gitignore
index f39daf9..77c4168 100644
--- a/.gitignore
+++ b/.gitignore
@@ -125,6 +125,9 @@ celerybeat.pid
# SageMath parsed files
*.sage.py
+# prompt config
+config_prompt.yaml
+
# Environments
.env
.venv
diff --git a/hugegraph-llm/src/hugegraph_llm/config/__init__.py
b/hugegraph-llm/src/hugegraph_llm/config/__init__.py
index 3e6c9e9..e077fee 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/__init__.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/__init__.py
@@ -16,17 +16,15 @@
# under the License.
-__all__ = [
- "settings",
- "resource_path"
-]
+__all__ = ["settings", "resource_path"]
import os
-
-from .config import Config
+from .config import Config, PromptConfig
settings = Config()
settings.from_env()
+prompt = PromptConfig()
+
package_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
resource_path = os.path.join(package_path, "resources")
diff --git a/hugegraph-llm/src/hugegraph_llm/config/config.py
b/hugegraph-llm/src/hugegraph_llm/config/config.py
index 2a73b62..e70fbde 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/config.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/config.py
@@ -17,64 +17,24 @@
import os
-
from dataclasses import dataclass
-from typing import Literal, Optional
+from typing import Optional
+
+import yaml
from dotenv import dotenv_values, set_key
+from hugegraph_llm.config.config_data import ConfigData, PromptData
from hugegraph_llm.utils.log import log
-dirname = os.path.dirname
-package_path = dirname(dirname(dirname(dirname(os.path.abspath(__file__)))))
+dir_name = os.path.dirname
+package_path =
dir_name(dir_name(dir_name(dir_name(os.path.abspath(__file__)))))
env_path = os.path.join(package_path, ".env")
+f_name = "config_prompt.yaml"
+yaml_file_path = os.path.join(package_path,
f"src/hugegraph_llm/resources/demo/{f_name}")
@dataclass
-class Config:
- """LLM settings"""
- # env_path: Optional[str] = ".env"
- llm_type: Literal["openai", "ollama", "qianfan_wenxin", "zhipu"] = "openai"
- embedding_type: Optional[Literal["openai", "ollama", "qianfan_wenxin",
"zhipu"]] = "openai"
- reranker_type: Optional[Literal["cohere", "siliconflow"]] = None
- # 1. OpenAI settings
- openai_api_base: Optional[str] = os.environ.get("OPENAI_BASE_URL",
"https://api.openai.com/v1")
- openai_api_key: Optional[str] = os.environ.get("OPENAI_API_KEY")
- openai_language_model: Optional[str] = "gpt-4o-mini"
- openai_embedding_model: Optional[str] = "text-embedding-3-small"
- openai_max_tokens: int = 4096
- # 2. Rerank settings
- cohere_base_url: Optional[str] = os.environ.get("CO_API_URL",
"https://api.cohere.com/v1/rerank")
- reranker_api_key: Optional[str] = None
- reranker_model: Optional[str] = None
- # 3. Ollama settings
- ollama_host: Optional[str] = "127.0.0.1"
- ollama_port: Optional[int] = 11434
- ollama_language_model: Optional[str] = None
- ollama_embedding_model: Optional[str] = None
- # 4. QianFan/WenXin settings
- qianfan_api_key: Optional[str] = None
- qianfan_secret_key: Optional[str] = None
- qianfan_access_token: Optional[str] = None
- # 4.1 URL settings
- qianfan_url_prefix: Optional[str] =
"https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop"
- qianfan_chat_url: Optional[str] = qianfan_url_prefix + "/chat/"
- qianfan_language_model: Optional[str] = "ERNIE-4.0-Turbo-8K"
- qianfan_embed_url: Optional[str] = qianfan_url_prefix + "/embeddings/"
- # refer https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu to get more
details
- qianfan_embedding_model: Optional[str] = "embedding-v1"
- # 5. ZhiPu(GLM) settings
- zhipu_api_key: Optional[str] = None
- zhipu_language_model: Optional[str] = "glm-4"
- zhipu_embedding_model: Optional[str] = "embedding-2"
-
- """HugeGraph settings"""
- graph_ip: Optional[str] = "127.0.0.1"
- graph_port: Optional[str] = "8080"
- graph_name: Optional[str] = "hugegraph"
- graph_user: Optional[str] = "admin"
- graph_pwd: Optional[str] = "xxx"
- graph_space: Optional[str] = None
-
+class Config(ConfigData):
def from_env(self):
if os.path.exists(env_path):
env_config = read_dotenv()
@@ -125,3 +85,56 @@ def read_dotenv() -> dict[str, Optional[str]]:
if key not in os.environ:
os.environ[key] = value or ""
return env_config
+
+
+class PromptConfig(PromptData):
+
+ def __init__(self):
+ self.ensure_yaml_file_exists()
+
+ def ensure_yaml_file_exists(self):
+ if os.path.exists(yaml_file_path):
+ log.info(f"Loading prompt file '{f_name}' successfully.")
+ with open(yaml_file_path, "r") as file:
+ data = yaml.safe_load(file)
+ # Load existing values from the YAML file into the class
attributes
+ for key, value in data.items():
+ setattr(self, key, value)
+ else:
+ self.save_to_yaml()
+ log.info(f"Prompt file '{yaml_file_path}' doesn't exist, create
it.")
+
+
+ def save_to_yaml(self):
+ indented_schema = "\n".join([f" {line}" for line in
self.rag_schema.splitlines()])
+ indented_example_prompt = "\n".join([f" {line}" for line in
self.schema_example_prompt.splitlines()])
+ indented_question = "\n".join([f" {line}" for line in
self.question.splitlines()])
+ indented_custom_related_information = (
+ "\n".join([f" {line}" for line in
self.custom_related_information.splitlines()])
+ )
+ indented_default_answer_template = "\n".join([f" {line}" for line
in self.default_answer_template.splitlines()])
+
+ # This can be extended to add storage fields according to the data
needs to be stored
+ yaml_content = f"""rag_schema: |
+{indented_schema}
+
+schema_example_prompt: |
+{indented_example_prompt}
+
+question: |
+{indented_question}
+
+custom_related_information: |
+{indented_custom_related_information}
+
+default_answer_template: |
+{indented_default_answer_template}
+
+"""
+ with open(yaml_file_path, "w") as file:
+ file.write(yaml_content)
+
+
+ def update_yaml_file(self):
+ self.save_to_yaml()
+ log.info(f"Prompt file '{f_name}' updated successfully.")
diff --git a/hugegraph-llm/src/hugegraph_llm/config/config_data.py
b/hugegraph-llm/src/hugegraph_llm/config/config_data.py
new file mode 100644
index 0000000..b2a4fd1
--- /dev/null
+++ b/hugegraph-llm/src/hugegraph_llm/config/config_data.py
@@ -0,0 +1,172 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.\
+
+
+import os
+from dataclasses import dataclass
+from typing import Literal, Optional
+
+
+@dataclass
+class ConfigData:
+ """LLM settings"""
+
+ # env_path: Optional[str] = ".env"
+ llm_type: Literal["openai", "ollama", "qianfan_wenxin", "zhipu"] = "openai"
+ embedding_type: Optional[Literal["openai", "ollama", "qianfan_wenxin",
"zhipu"]] = "openai"
+ reranker_type: Optional[Literal["cohere", "siliconflow"]] = None
+ # 1. OpenAI settings
+ openai_api_base: Optional[str] = os.environ.get("OPENAI_BASE_URL",
"https://api.openai.com/v1")
+ openai_api_key: Optional[str] = os.environ.get("OPENAI_API_KEY")
+ openai_language_model: Optional[str] = "gpt-4o-mini"
+ openai_embedding_model: Optional[str] = "text-embedding-3-small"
+ openai_max_tokens: int = 4096
+ # 2. Rerank settings
+ cohere_base_url: Optional[str] = os.environ.get("CO_API_URL",
"https://api.cohere.com/v1/rerank")
+ reranker_api_key: Optional[str] = None
+ reranker_model: Optional[str] = None
+ # 3. Ollama settings
+ ollama_host: Optional[str] = "127.0.0.1"
+ ollama_port: Optional[int] = 11434
+ ollama_language_model: Optional[str] = None
+ ollama_embedding_model: Optional[str] = None
+ # 4. QianFan/WenXin settings
+ qianfan_api_key: Optional[str] = None
+ qianfan_secret_key: Optional[str] = None
+ qianfan_access_token: Optional[str] = None
+ # 4.1 URL settings
+ qianfan_url_prefix: Optional[str] =
"https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop"
+ qianfan_chat_url: Optional[str] = qianfan_url_prefix + "/chat/"
+ qianfan_language_model: Optional[str] = "ERNIE-4.0-Turbo-8K"
+ qianfan_embed_url: Optional[str] = qianfan_url_prefix + "/embeddings/"
+ # refer https://cloud.baidu.com/doc/WENXINWORKSHOP/s/alj562vvu to get more
details
+ qianfan_embedding_model: Optional[str] = "embedding-v1"
+ # TODO: To be confirmed, whether to configure
+ # 5. ZhiPu(GLM) settings
+ zhipu_api_key: Optional[str] = None
+ zhipu_language_model: Optional[str] = "glm-4"
+ zhipu_embedding_model: Optional[str] = "embedding-2"
+
+ """HugeGraph settings"""
+ graph_ip: Optional[str] = "127.0.0.1"
+ graph_port: Optional[str] = "8080"
+ graph_name: Optional[str] = "hugegraph"
+ graph_user: Optional[str] = "admin"
+ graph_pwd: Optional[str] = "xxx"
+ graph_space: Optional[str] = None
+
+
+# Additional static content like PromptConfig
+class PromptData:
+
+ # Data is detached from
hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
+ default_answer_template = f"""You are an expert in knowledge graphs and
natural language processing.
+Your task is to provide a precise and accurate answer based on the given
context.
+
+Context information is below.
+---------------------
+{{context_str}}
+---------------------
+
+Given the context information and without using fictive knowledge,
+answer the following query in a concise and professional manner.
+Query: {{query_str}}
+Answer:
+"""
+
+ custom_related_information = """"""
+
+ question = """Tell me about Sarah."""
+
+ # Data is detached from
hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
+ schema_example_prompt = """## Main Task
+Given the following graph schema and a piece of text, your task is to analyze
the text and extract information that fits into the schema's structure,
formatting the information into vertices and edges as specified.
+## Basic Rules
+### Schema Format
+Graph Schema:
+- Vertices: [List of vertex labels and their properties]
+- Edges: [List of edge labels, their source and target vertex labels, and
properties]
+### Content Rule
+Please read the provided text carefully and identify any information that
corresponds to the vertices and edges defined in the schema. For each piece of
information that matches a vertex or edge, format it according to the following
JSON structures:
+#### Vertex Format:
+{"id":"vertexLabelID:entityName","label":"vertexLabel","type":"vertex","properties":{"propertyName":"propertyValue",
...}}
+#### Edge Format:
+{"label":"edgeLabel","type":"edge","outV":"sourceVertexId","outVLabel":"sourceVertexLabel","inV":"targetVertexId","inVLabel":"targetVertexLabel","properties":{"propertyName":"propertyValue",...}}
+Also follow the rules:
+1. Don't extract property fields that do not exist in the given schema
+2. Ensure the extracted property is in the same type as the schema (like 'age'
should be a number)
+3. If there are multiple primary keys, the strategy for generating VID is:
vertexlabelID:pk1!pk2!pk3 (pk means primary key, and '!' is the separator)
+4. Output should be a list of JSON objects, each representing a vertex or an
edge, extracted and formatted based on the text and schema.
+5. Translate the schema fields into Chinese if the given text is Chinese but
the schema is in English (Optional)
+## Example
+### Input example:
+#### text
+Meet Sarah, a 30-year-old attorney, and her roommate, James, whom she's shared
a home with since 2010. James, in his professional life, works as a journalist.
+#### graph schema
+{"vertices":[{"vertex_label":"person","properties":["name","age","occupation"]}],
"edges":[{"edge_label":"roommate",
"source_vertex_label":"person","target_vertex_label":"person","properties":["date"]]}
+### Output example:
+[{"id":"1:Sarah","label":"person","type":"vertex","properties":{"name":"Sarah","age":30,"occupation":"attorney"}},{"id":"1:James","label":"person","type":"vertex","properties":{"name":"James","occupation":"journalist"}},{"label":"roommate","type":"edge","outV":"1:Sarah","outVLabel":"person","inV":"1:James","inVLabel":"person","properties":{"date":"2010"}}]
+"""
+
+ rag_schema = """{
+"vertexlabels": [
+ {
+ "id": 1,
+ "name": "person",
+ "id_strategy": "PRIMARY_KEY",
+ "primary_keys": [
+ "name"
+ ],
+ "properties": [
+ "name",
+ "age",
+ "occupation"
+ ]
+ },
+ {
+ "id": 2,
+ "name": "webpage",
+ "id_strategy": "PRIMARY_KEY",
+ "primary_keys": [
+ "name"
+ ],
+ "properties": [
+ "name",
+ "url"
+ ]
+ }
+],
+"edgelabels": [
+ {
+ "id": 1,
+ "name": "roommate",
+ "source_label": "person",
+ "target_label": "person",
+ "properties": [
+ "date"
+ ]
+ },
+ {
+ "id": 2,
+ "name": "link",
+ "source_label": "webpage",
+ "target_label": "person",
+ "properties": []
+ }
+]
+}
+"""
diff --git a/hugegraph-llm/src/hugegraph_llm/config/generate.py
b/hugegraph-llm/src/hugegraph_llm/config/generate.py
index 7016cbb..fb8943b 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/generate.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/generate.py
@@ -20,8 +20,8 @@ import argparse
from hugegraph_llm.config import settings
-if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='Generate hugegraph-llm
config file')
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Generate hugegraph-llm
config file")
parser.add_argument("-U", "--update", action="store_true", help="Update
the config file")
args = parser.parse_args()
if args.update:
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py
b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py
index e0232cf..aaaf0a4 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_web_demo.py
@@ -32,13 +32,14 @@ from gradio.utils import NamedString
from requests.auth import HTTPBasicAuth
from hugegraph_llm.api.rag_api import rag_http_api
-from hugegraph_llm.config import settings, resource_path
+from hugegraph_llm.config import settings, resource_path, prompt
from hugegraph_llm.enums.build_mode import BuildMode
from hugegraph_llm.models.embeddings.init_embedding import Embeddings
from hugegraph_llm.models.llms.init_llm import LLMs
from hugegraph_llm.operators.graph_rag_task import RAGPipeline
from hugegraph_llm.operators.kg_construction_task import KgBuilder
from hugegraph_llm.operators.llm_op.property_graph_extract import
SCHEMA_EXAMPLE_PROMPT
+from hugegraph_llm.operators.llm_op.answer_synthesize import
DEFAULT_ANSWER_TEMPLATE
from hugegraph_llm.utils.hugegraph_utils import get_hg_client
from hugegraph_llm.utils.hugegraph_utils import init_hg_test_data,
run_gremlin_query, clean_hg_data
from hugegraph_llm.utils.log import log
@@ -71,6 +72,13 @@ def rag_answer(
custom_related_information: str,
answer_prompt: str,
) -> Tuple:
+
+ if prompt.question != text or prompt.custom_related_information !=
custom_related_information or prompt.default_answer_template != answer_prompt:
+ prompt.custom_related_information = custom_related_information
+ prompt.question = text
+ prompt.default_answer_template = answer_prompt
+ prompt.update_yaml_file()
+
vector_search = vector_only_answer or graph_vector_answer
graph_search = graph_only_answer or graph_vector_answer
@@ -117,6 +125,13 @@ def build_kg( # pylint: disable=too-many-branches
example_prompt: str,
build_mode: str,
) -> str:
+
+ # update env variables: schema and example_prompt
+ if prompt.rag_schema != schema or prompt.schema_example_prompt !=
example_prompt:
+ prompt.rag_schema = schema
+ prompt.schema_example_prompt = example_prompt
+ prompt.update_yaml_file()
+
if isinstance(files, NamedString):
files = [files]
texts = []
@@ -482,49 +497,16 @@ def init_rag_ui() -> gr.Interface:
"""
)
- schema = """{
- "vertexlabels": [
- {
- "id":1,
- "name": "person",
- "id_strategy": "PRIMARY_KEY",
- "primary_keys":["name"],
- "properties": ["name","age","occupation"]
- },
- {
- "id":2,
- "name": "webpage",
- "id_strategy":"PRIMARY_KEY",
- "primary_keys":["name"],
- "properties": ["name","url"]
- }
- ],
- "edgelabels": [
- {
- "id": 1,
- "name": "roommate",
- "source_label": "person",
- "target_label": "person",
- "properties": ["date"]
- },
- {
- "id": 2,
- "name": "link",
- "source_label": "webpage",
- "target_label": "person",
- "properties": []
- }
- ]
-}"""
-
+ schema = prompt.rag_schema
+
with gr.Row():
input_file = gr.File(
value=[os.path.join(resource_path, "demo", "test.txt")],
label="Docs (multi-files can be selected together)",
file_count="multiple",
)
- input_schema = gr.Textbox(value=schema, label="Schema")
- info_extract_template = gr.Textbox(value=SCHEMA_EXAMPLE_PROMPT,
label="Info extract head")
+ input_schema = gr.Textbox(value=schema, label="Schema", lines=2)
+ info_extract_template = gr.Textbox(value=SCHEMA_EXAMPLE_PROMPT,
label="Info extract head", lines=2)
with gr.Column():
mode = gr.Radio(
choices=["Test Mode", "Import Mode", "Clear and Import",
"Rebuild Vector"],
@@ -543,7 +525,7 @@ def init_rag_ui() -> gr.Interface:
gr.Markdown("""## 2. RAG with HugeGraph 📖""")
with gr.Row():
with gr.Column(scale=2):
- inp = gr.Textbox(value="Tell me about Sarah.",
label="Question", show_copy_button=True)
+ inp = gr.Textbox(value=prompt.question, label="Question",
show_copy_button=True, lines=2)
raw_out = gr.Textbox(label="Basic LLM Answer",
show_copy_button=True)
vector_only_out = gr.Textbox(label="Vector-only Answer",
show_copy_button=True)
graph_only_out = gr.Textbox(label="Graph-only Answer",
show_copy_button=True)
@@ -551,7 +533,7 @@ def init_rag_ui() -> gr.Interface:
from hugegraph_llm.operators.llm_op.answer_synthesize import
DEFAULT_ANSWER_TEMPLATE
answer_prompt_input = gr.Textbox(
- value=DEFAULT_ANSWER_TEMPLATE, label="Custom Prompt",
show_copy_button=True
+ value=DEFAULT_ANSWER_TEMPLATE, label="Custom Prompt",
show_copy_button=True, lines=2
)
with gr.Column(scale=1):
with gr.Row():
@@ -581,7 +563,7 @@ def init_rag_ui() -> gr.Interface:
info="One-depth neighbors > two-depth neighbors",
)
custom_related_information = gr.Text(
- "",
+ prompt.custom_related_information,
label="Custom related information(Optional)",
)
btn = gr.Button("Answer Question", variant="primary")
@@ -747,7 +729,7 @@ if __name__ == "__main__":
app.include_router(app_auth)
auth_enabled = os.getenv("ENABLE_LOGIN", "False").lower() == "true"
- log.info("Authentication is %s.", "enabled" if auth_enabled else
"disabled")
+ log.info("(Status) Authentication is %s now.", "enabled" if auth_enabled
else "disabled")
# TODO: support multi-user login when need
app = gr.mount_gradio_app(app, hugegraph_llm, path="/", auth=("rag",
os.getenv("TOKEN")) if auth_enabled else None)
diff --git
a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
index 2d05160..129b77b 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/answer_synthesize.py
@@ -21,22 +21,10 @@ from typing import Any, Dict, Optional
from hugegraph_llm.models.llms.base import BaseLLM
from hugegraph_llm.models.llms.init_llm import LLMs
+from hugegraph_llm.config import prompt
-# TODO: we need enhance the template to answer the question (put it in a
separate file)
-DEFAULT_ANSWER_TEMPLATE = f"""
-You are an expert in knowledge graphs and natural language processing.
-Your task is to provide a precise and accurate answer based on the given
context.
-
-Context information is below.
----------------------
-{{context_str}}
----------------------
-
-Given the context information and without using fictive knowledge,
-answer the following query in a concise and professional manner.
-Query: {{query_str}}
-Answer:
-"""
+
+DEFAULT_ANSWER_TEMPLATE = prompt.default_answer_template
class AnswerSynthesize:
diff --git
a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
index 5f802af..d080d56 100644
--- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
+++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py
@@ -20,52 +20,15 @@ import json
import re
from typing import List, Any, Dict
+from hugegraph_llm.config import prompt
from hugegraph_llm.document.chunk_split import ChunkSplitter
from hugegraph_llm.models.llms.base import BaseLLM
from hugegraph_llm.utils.log import log
-# TODO: put in a separate file for users to customize the content
-SCHEMA_EXAMPLE_PROMPT = """## Main Task
-Given the following graph schema and a piece of text, your task is to analyze
the text and extract information that fits into the schema's structure,
formatting the information into vertices and edges as specified.
-
-## Basic Rules
-### Schema Format
-Graph Schema:
-- Vertices: [List of vertex labels and their properties]
-- Edges: [List of edge labels, their source and target vertex labels, and
properties]
-
-### Content Rule
-Please read the provided text carefully and identify any information that
corresponds to the vertices and edges defined in the schema. For each piece of
information that matches a vertex or edge, format it according to the following
JSON structures:
-#### Vertex Format:
-{"id":"vertexLabelID:entityName","label":"vertexLabel","type":"vertex","properties":{"propertyName":"propertyValue",
-...}}
-
-#### Edge Format:
-{"label":"edgeLabel","type":"edge","outV":"sourceVertexId","outVLabel":"sourceVertexLabel","inV":"targetVertexId","inVLabel":"targetVertexLabel","properties":{"propertyName":"propertyValue",...}}
-
-Also follow the rules:
-1. Don't extract property fields that do not exist in the given schema
-2. Ensure the extract property is in the same type as the schema (like 'age'
should be a number)
-3. If there are multiple primarykeys provided, then the generating strategy of
VID is: vertexlabelID:pk1!pk2!pk3 (pk means primary key, and '!' is the
separator, no extra space between them)
-4. Your output should be a list of such JSON objects, each representing either
a vertex or an edge, extracted and formatted based on the text and the provided
schema.
-5. Translate the given schema filed into Chinese if the given text is Chinese
but the schema is in English (Optional)
-
-
-## Example
-### Input example:
-#### text
-Meet Sarah, a 30-year-old attorney, and her roommate, James, whom she's shared
a home with since 2010. James, in his professional life, works as a journalist.
-#### graph schema
-{"vertices":[{"vertex_label":"person","properties":["name","age","occupation"]}],
"edges":[{"edge_label":"roommate",
"source_vertex_label":"person","target_vertex_label":"person","properties":["date"]]}
-
-### Output example:
-[{"id":"1:Sarah","label":"person","type":"vertex","properties":{"name":"Sarah","age":30,"occupation":"attorney"}},{"id":"1:James","label":"person","type":"vertex","properties":{"name":"James","occupation":"journalist"}},{"label":"roommate","type":"edge","outV":"1:Sarah","outVLabel":"person","inV":"1:James","inVLabel":"person","properties":{"date":"2010"}}]
-"""
-
+SCHEMA_EXAMPLE_PROMPT = prompt.schema_example_prompt
def generate_extract_property_graph_prompt(text, schema=None) -> str:
return f"""---
-
Following the full instructions above, try to extract the following text from
the given schema, output the JSON result:
# Input
## Text: