utkarsharma2 commented on code in PR #35919: URL: https://github.com/apache/airflow/pull/35919#discussion_r1420221714
########## airflow/providers/weaviate/hooks/weaviate.py: ########## @@ -133,20 +140,201 @@ def create_class(self, class_json: dict[str, Any]) -> None: client = self.conn client.schema.create_class(class_json) - def create_schema(self, schema_json: dict[str, Any]) -> None: + @retry(reraise=True, stop=stop_after_attempt(3), retry=retry_if_exception_type(requests.ConnectionError)) + def create_schema(self, schema_json: dict[str, Any] | str) -> None: """ Create a new Schema. Instead of adding classes one by one , you can upload a full schema in JSON format at once. - :param schema_json: The schema to create + :param schema_json: Schema as a Python dict or the path to a JSON file, or the URL of a JSON file. """ client = self.conn client.schema.create(schema_json) + @retry(reraise=True, stop=stop_after_attempt(3), retry=retry_if_exception_type(requests.ConnectionError)) + def get_schema(self, class_name: str | None = None): + """Get the schema from Weaviate. + + :param class_name: The class for which to return the schema. If NOT provided the whole schema is + returned, otherwise only the schema of this class is returned. By default None. + """ + client = self.get_client() + return client.schema.get(class_name) + + def delete_classes(self, class_names: list[str] | str, if_error: str = "stop") -> list[str] | None: + """Deletes all or specific classes if class_names are provided. + + :param class_names: list of class names to be deleted. + :param if_error: define the actions to be taken if there is an error while deleting a class, possible + options are `stop` and `continue` + :return: if `if_error=continue` return list of classes which we failed to delete. + if `if_error=stop` returns None. + """ + client = self.get_client() + class_names = [class_names] if class_names and isinstance(class_names, str) else class_names + + failed_class_list = [] + for class_name in class_names: + try: + for attempt in Retrying( + stop=stop_after_attempt(3), + retry=retry_if_exception(lambda exc: self.check_http_error_is_retryable(exc)), + ): + with attempt: + client.schema.delete_class(class_name) + except Exception as e: + if if_error == "continue": + self.log.error(e) + failed_class_list.append(class_name) + elif if_error == "stop": + raise e + + if if_error == "continue": + return failed_class_list + return None + + def delete_all_schema(self): + """Remove the entire schema from the Weaviate instance and all data associated with it.""" + client = self.get_client() + return client.schema.delete_all() + + def update_config(self, class_name: str, config: dict): + """Update a schema configuration for a specific class.""" + client = self.get_client() + client.schema.update_config(class_name=class_name, config=config) + + def upsert_classes(self, schema_json: dict[str, Any] | str, existing: ExitingSchemaOptions = "ignore"): + """ + Create or update the classes in schema of Weaviate database. + + :param schema_json: Json containing the schema. Format {"class_name": "class_dict"} + .. seealso:: `example of class_dict <https://weaviate-python-client.readthedocs.io/en/v3.25.2/weaviate.schema.html#weaviate.schema.Schema.create>`_. + :param existing: Options to handle the case when the classes exist, possible options + 'replace', 'fail', 'ignore'. + """ + existing_schema_options = ["replace", "fail", "ignore"] + if existing not in existing_schema_options: + raise ValueError(f"Param 'existing' should be one of the {existing_schema_options} values.") + if isinstance(schema_json, str): + schema_json = cast(dict, json.load(open(schema_json))) + set__exiting_classes = {class_object["class"] for class_object in self.get_schema()["classes"]} + set__to_be_added_classes = {key for key, _ in schema_json.items()} + intersection_classes = set__exiting_classes.intersection(set__to_be_added_classes) + classes_to_create = set() + if existing == "fail" and intersection_classes: + raise ValueError( + f"Trying to create class {intersection_classes}" f" but this class already exists." + ) + elif existing == "ignore": + classes_to_create = set__to_be_added_classes - set__exiting_classes + elif existing == "replace": + error_list = self.delete_classes(class_names=list(intersection_classes)) Review Comment: Yes, this operation is not atomic since the operation takes place in different transactions. But the whole operation is idempotent, so a user can retry the task. But this is a very common operation when building LLM workflows as we have observed and will be useful to users while developing the LLM pipeline. But to make the risk explicit we can maybe add a proper disclaimer in the docstring, regarding the same. WDYT? Also, I'll change the name of the method to `create_or_replace_classes` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org