Copilot commented on code in PR #265:
URL:
https://github.com/apache/incubator-hugegraph-ai/pull/265#discussion_r2247728738
##########
hugegraph-llm/src/hugegraph_llm/operators/index_op/vector_index_query.py:
##########
@@ -19,18 +19,21 @@
import os
from typing import Dict, Any
-from hugegraph_llm.config import resource_path, huge_settings
+from hugegraph_llm.config import resource_path, huge_settings, llm_settings
from hugegraph_llm.indices.vector_index import VectorIndex
from hugegraph_llm.models.embeddings.base import BaseEmbedding
+from hugegraph_llm.utils.embedding_utils import get_filename_prefix,
get_index_folder_name
from hugegraph_llm.utils.log import log
class VectorIndexQuery:
def __init__(self, embedding: BaseEmbedding, topk: int = 3):
self.embedding = embedding
self.topk = topk
- self.index_dir = str(os.path.join(resource_path,
huge_settings.graph_name, "chunks"))
- self.vector_index = VectorIndex.from_index_file(self.index_dir)
+ self.folder_name = get_index_folder_name(huge_settings.graph_name,
huge_settings.graph_space)
+ self.index_dir = str(os.path.join(resource_path, self.folder_name,
"chunks"))
+ self.filename_prefix =
get_filename_prefix(llm_settings.embedding_type, getattr(embedding,
"model_name", None))
+ self.vector_index =
VectorIndex.from_index_file(self.index_dir,self.filename_prefix)
Review Comment:
Missing space after comma. Should be
`VectorIndex.from_index_file(self.index_dir, self.filename_prefix)` for
consistent formatting.
```suggestion
self.vector_index = VectorIndex.from_index_file(self.index_dir,
self.filename_prefix)
```
##########
hugegraph-llm/src/hugegraph_llm/utils/embedding_utils.py:
##########
@@ -71,3 +71,21 @@ async def get_embeddings_parallel(embedding: BaseEmbedding,
vids: list[str]) ->
embeddings.extend(batch_embeddings)
return embeddings
+
+
+def get_filename_prefix(embedding_type: str = None, model_name: str = None) ->
str:
+ """Generate filename based on model name."""
+ if not model_name or model_name.strip() == "" or not embedding_type or
embedding_type.strip() == "":
Review Comment:
[nitpick] The condition can be simplified using truthiness checks. Consider:
`if not (model_name and model_name.strip() and embedding_type and
embedding_type.strip()):`
```suggestion
if not (model_name and model_name.strip() and embedding_type and
embedding_type.strip()):
```
##########
hugegraph-llm/src/hugegraph_llm/utils/embedding_utils.py:
##########
@@ -71,3 +71,21 @@ async def get_embeddings_parallel(embedding: BaseEmbedding,
vids: list[str]) ->
embeddings.extend(batch_embeddings)
return embeddings
+
+
+def get_filename_prefix(embedding_type: str = None, model_name: str = None) ->
str:
+ """Generate filename based on model name."""
+ if not model_name or model_name.strip() == "" or not embedding_type or
embedding_type.strip() == "":
+ return ""
+ # Sanitize model_name to prevent path traversal or invalid filename chars
+ safe_embedding_type = embedding_type.replace("/", "_").replace("\\",
"_").strip()
+ safe_model_name = model_name.replace("/", "_").replace("\\", "_").strip()
+ return f"{safe_embedding_type}_{safe_model_name}"
+
+
+def get_index_folder_name(graph_name: str, space_name: str = None) -> str:
+ if not space_name or space_name.strip() == "":
Review Comment:
[nitpick] Inconsistent empty string checking pattern. Consider using the
same pattern as line 78 or simplify to `if not (space_name and
space_name.strip()):`
```suggestion
if not (space_name and space_name.strip()):
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]