This is an automated email from the ASF dual-hosted git repository.
jin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-hugegraph-ai.git
The following commit(s) were added to refs/heads/main by this push:
new a0cc3f9 refactor(llm): return schema.groovy first when backup graph
data (#161)
a0cc3f9 is described below
commit a0cc3f93a6a739c7433a982d4864e172296259d0
Author: SoJGooo <[email protected]>
AuthorDate: Fri Feb 7 15:44:41 2025 +0800
refactor(llm): return schema.groovy first when backup graph data (#161)
Note: for non-groovy mode, return JSON format
---------
Co-authored-by: imbajin <[email protected]>
---
.../src/hugegraph_llm/utils/hugegraph_utils.py | 30 +++++++++++++++++++---
1 file changed, 26 insertions(+), 4 deletions(-)
diff --git a/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py
b/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py
index dc94900..4b90943 100644
--- a/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py
+++ b/hugegraph-llm/src/hugegraph_llm/utils/hugegraph_utils.py
@@ -111,13 +111,14 @@ def backup_data():
"vertices.json": f"g.V().limit({MAX_VERTICES})"
f".aggregate('vertices').count().as('count').select('count','vertices')",
"edges.json":
f"g.E().limit({MAX_EDGES}).aggregate('edges').count().as('count').select('count','edges')",
- "schema.json": client.schema().getSchema()
+ "schema.json": client.schema().getSchema(_format="groovy")
}
+ vertexlabels = client.schema().getSchema()["vertexlabels"]
+ all_pk_flag = all(data.get('id_strategy') == 'PRIMARY_KEY' for data in
vertexlabels)
+
for filename, query in files.items():
- with open(os.path.join(backup_subdir, filename), "w",
encoding="utf-8") as f:
- data = client.gremlin().exec(query)["data"] if "schema" not in
filename else query
- json.dump(data, f, ensure_ascii=False)
+ write_backup_file(client, backup_subdir, filename, query,
all_pk_flag)
log.info("Backup successfully in %s.", backup_subdir)
relative_backup_subdir = os.path.relpath(backup_subdir,
start=resource_path)
@@ -128,6 +129,27 @@ def backup_data():
raise Exception("Failed to execute backup") from e
+def write_backup_file(client, backup_subdir, filename, query, all_pk_flag):
+ with open(os.path.join(backup_subdir, filename), "w", encoding="utf-8") as
f:
+ if filename == "edges.json":
+ data = client.gremlin().exec(query)["data"][0]["edges"]
+ json.dump(data, f, ensure_ascii=False)
+ elif filename == "vertices.json":
+ data_full = client.gremlin().exec(query)["data"][0]["vertices"]
+ data = [{key: value for key, value in vertex.items() if key !=
"id"}
+ for vertex in data_full] if all_pk_flag else data_full
+ json.dump(data, f, ensure_ascii=False)
+ elif filename == "schema.json":
+ data_full = query
+ if isinstance(data_full, dict) and "schema" in data_full:
+ groovy_filename = filename.replace(".json", ".groovy")
+ with open(os.path.join(backup_subdir, groovy_filename), "w",
encoding="utf-8") as groovy_file:
+ groovy_file.write(str(data_full["schema"]))
+ else:
+ data = data_full
+ json.dump(data, f, ensure_ascii=False)
+
+
def manage_backup_retention():
try:
backup_dirs = [