SameerMesiah97 commented on code in PR #66549: URL: https://github.com/apache/airflow/pull/66549#discussion_r3204333241
########## providers/amazon/src/airflow/providers/amazon/aws/operators/opensearch_serverless.py: ########## @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Amazon OpenSearch Serverless operators.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal + +from botocore.exceptions import ClientError + +from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook +from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator +from airflow.utils.helpers import prune_dict + +if TYPE_CHECKING: + from airflow.sdk import Context + + +class OpenSearchServerlessCreateCollectionOperator(AwsBaseOperator[AwsBaseHook]): + """ + Create an Amazon OpenSearch Serverless collection. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:OpenSearchServerlessCreateCollectionOperator` + + :param collection_name: The name of the collection. (templated) + :param collection_type: The type of collection (SEARCH, TIMESERIES, VECTORSEARCH). (templated) + :param description: Optional description. (templated) + :param standby_replicas: Whether to use standby replicas (ENABLED or DISABLED). + :param tags: Optional list of tag dicts. + :param if_exists: Behavior when the collection already exists. + ``"fail"`` raises an error, ``"skip"`` logs and returns. + """ + + aws_hook_class = AwsBaseHook + template_fields: tuple[str, ...] = ( + *AwsBaseOperator.template_fields, + "collection_name", + "collection_type", + "description", + ) + + def __init__( + self, + *, + collection_name: str, + collection_type: str = "SEARCH", + description: str | None = None, + standby_replicas: str | None = None, + tags: list[dict[str, str]] | None = None, + if_exists: Literal["fail", "skip"] = "skip", + **kwargs, + ) -> None: + super().__init__(**kwargs) + self.collection_name = collection_name + self.collection_type = collection_type + self.description = description + self.standby_replicas = standby_replicas + self.tags = tags + self.if_exists = if_exists + + @property + def _hook_parameters(self) -> dict[str, Any]: + return {**super()._hook_parameters, "client_type": "opensearchserverless"} Review Comment: Why re-implement this here when `OpenSearchServerlessHook` is effectively doing the same thing? ########## providers/amazon/docs/operators/opensearchserverless.rst: ########## @@ -52,6 +52,18 @@ To wait on the state of an Amazon Bedrock customize model job until it reaches a :start-after: [START howto_sensor_opensearch_collection_active] :end-before: [END howto_sensor_opensearch_collection_active] + +Operators +--------- + +.. _howto/operator:OpenSearchServerlessCreateCollectionOperator: + +Create a Collection +=================== + +To create an Amazon OpenSearch Serverless collection, use +:class:`~airflow.providers.amazon.aws.operators.opensearch_serverless.OpenSearchServerlessCreateCollectionOperator`. + Review Comment: Same issue that I pointed out in the other PRs. The example block is missing. ########## providers/amazon/src/airflow/providers/amazon/aws/operators/opensearch_serverless.py: ########## @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Amazon OpenSearch Serverless operators.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal + +from botocore.exceptions import ClientError + +from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook +from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator +from airflow.utils.helpers import prune_dict + +if TYPE_CHECKING: + from airflow.sdk import Context + + +class OpenSearchServerlessCreateCollectionOperator(AwsBaseOperator[AwsBaseHook]): + """ + Create an Amazon OpenSearch Serverless collection. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:OpenSearchServerlessCreateCollectionOperator` + + :param collection_name: The name of the collection. (templated) + :param collection_type: The type of collection (SEARCH, TIMESERIES, VECTORSEARCH). (templated) + :param description: Optional description. (templated) + :param standby_replicas: Whether to use standby replicas (ENABLED or DISABLED). + :param tags: Optional list of tag dicts. + :param if_exists: Behavior when the collection already exists. + ``"fail"`` raises an error, ``"skip"`` logs and returns. + """ + + aws_hook_class = AwsBaseHook Review Comment: Why use `AwsBaseHook` when `OpenSearchServerlessHook` exists? ########## providers/amazon/tests/unit/amazon/aws/operators/test_opensearch_serverless.py: ########## @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from unittest import mock +from unittest.mock import MagicMock + +import pytest +from botocore.exceptions import ClientError + +from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook +from airflow.providers.amazon.aws.operators.opensearch_serverless import ( + OpenSearchServerlessCreateCollectionOperator, +) + +from unit.amazon.aws.utils.test_template_fields import validate_template_fields + +COLLECTION_NAME = "test-collection" +COLLECTION_ID = "abc123def456" + + +class TestOpenSearchServerlessCreateCollectionOperator: + def setup_method(self): + self.operator = OpenSearchServerlessCreateCollectionOperator( + task_id="create_collection", + collection_name=COLLECTION_NAME, + collection_type="VECTORSEARCH", + ) + + @mock.patch.object(AwsBaseHook, "conn", new_callable=mock.PropertyMock) + def test_execute(self, mock_conn): + mock_client = MagicMock() + mock_client.create_collection.return_value = { + "createCollectionDetail": {"id": COLLECTION_ID, "name": COLLECTION_NAME} + } + mock_conn.return_value = mock_client + + result = self.operator.execute({}) + + mock_client.create_collection.assert_called_once_with(name=COLLECTION_NAME, type="VECTORSEARCH") + assert result == COLLECTION_ID + + @mock.patch.object(AwsBaseHook, "conn", new_callable=mock.PropertyMock) + def test_execute_skip_existing(self, mock_conn): + mock_client = MagicMock() + mock_client.create_collection.side_effect = ClientError( + {"Error": {"Code": "ConflictException", "Message": "exists"}}, "CreateCollection" + ) + mock_client.batch_get_collection.return_value = { + "collectionDetails": [{"id": COLLECTION_ID, "name": COLLECTION_NAME}] + } + mock_conn.return_value = mock_client + + result = self.operator.execute({}) Review Comment: I would add these 2 asserts here: ``` mock_client.create_collection.assert_called_once_with( name=COLLECTION_NAME, type="VECTORSEARCH", ) mock_client.batch_get_collection.assert_called_once_with( names=[COLLECTION_NAME] ) ``` I have not tested them so feel free to correct them where necessary to get CI to pass. ########## providers/amazon/tests/system/amazon/aws/example_opensearch_serverless.py: ########## @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from datetime import datetime + +from airflow.providers.amazon.aws.operators.opensearch_serverless import ( + OpenSearchServerlessCreateCollectionOperator, +) +from airflow.providers.common.compat.sdk import DAG, chain + +from system.amazon.aws.utils import ENV_ID_KEY, SystemTestContextBuilder +from tests_common.test_utils.version_compat import AIRFLOW_V_3_0_PLUS + +if AIRFLOW_V_3_0_PLUS: + from airflow.sdk import TriggerRule, task +else: + from airflow.decorators import task # type: ignore[attr-defined,no-redef] + from airflow.utils.trigger_rule import TriggerRule # type: ignore[no-redef,attr-defined] + +DAG_ID = "example_opensearch_serverless" + +sys_test_context_task = SystemTestContextBuilder().build() + + +@task(trigger_rule=TriggerRule.ALL_DONE) +def delete_collection(collection_name: str): + import boto3 + + boto3.client("opensearchserverless").delete_collection(id=collection_name) Review Comment: nit: perhaps `collection_id` would be more accurate than `collection_name`? ########## providers/amazon/src/airflow/providers/amazon/aws/operators/opensearch_serverless.py: ########## @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Amazon OpenSearch Serverless operators.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal + +from botocore.exceptions import ClientError + +from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook +from airflow.providers.amazon.aws.operators.base_aws import AwsBaseOperator +from airflow.utils.helpers import prune_dict + +if TYPE_CHECKING: + from airflow.sdk import Context + + +class OpenSearchServerlessCreateCollectionOperator(AwsBaseOperator[AwsBaseHook]): + """ + Create an Amazon OpenSearch Serverless collection. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:OpenSearchServerlessCreateCollectionOperator` + + :param collection_name: The name of the collection. (templated) + :param collection_type: The type of collection (SEARCH, TIMESERIES, VECTORSEARCH). (templated) + :param description: Optional description. (templated) + :param standby_replicas: Whether to use standby replicas (ENABLED or DISABLED). + :param tags: Optional list of tag dicts. + :param if_exists: Behavior when the collection already exists. + ``"fail"`` raises an error, ``"skip"`` logs and returns. + """ + + aws_hook_class = AwsBaseHook + template_fields: tuple[str, ...] = ( + *AwsBaseOperator.template_fields, + "collection_name", + "collection_type", + "description", + ) + + def __init__( + self, + *, + collection_name: str, + collection_type: str = "SEARCH", + description: str | None = None, + standby_replicas: str | None = None, + tags: list[dict[str, str]] | None = None, + if_exists: Literal["fail", "skip"] = "skip", + **kwargs, + ) -> None: + super().__init__(**kwargs) + self.collection_name = collection_name + self.collection_type = collection_type + self.description = description + self.standby_replicas = standby_replicas + self.tags = tags + self.if_exists = if_exists + + @property + def _hook_parameters(self) -> dict[str, Any]: + return {**super()._hook_parameters, "client_type": "opensearchserverless"} + + def execute(self, context: Context) -> str: + self.log.info("Creating OpenSearch Serverless collection %s", self.collection_name) + kwargs: dict[str, Any] = prune_dict( + { + "name": self.collection_name, + "type": self.collection_type, + "description": self.description, + "standbyReplicas": self.standby_replicas, + "tags": self.tags, + } + ) + try: + response = self.hook.conn.create_collection(**kwargs) + collection_id = response["createCollectionDetail"]["id"] + except ClientError as e: + if e.response["Error"]["Code"] == "ConflictException" and self.if_exists == "skip": + self.log.info("Collection %s already exists, skipping.", self.collection_name) + collections = self.hook.conn.batch_get_collection(names=[self.collection_name]) Review Comment: I would add the below just to be extra-defensive in case we get an empty list for "collectionDetails": ``` details = collections.get("collectionDetails", []) if not details: raise RuntimeError( f"Collection {self.collection_name} exists but could not be retrieved." ) ``` This is of course non-blocking. ########## providers/amazon/tests/unit/amazon/aws/operators/test_opensearch_serverless.py: ########## @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from unittest import mock +from unittest.mock import MagicMock + +import pytest +from botocore.exceptions import ClientError + +from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook +from airflow.providers.amazon.aws.operators.opensearch_serverless import ( + OpenSearchServerlessCreateCollectionOperator, +) + +from unit.amazon.aws.utils.test_template_fields import validate_template_fields + +COLLECTION_NAME = "test-collection" +COLLECTION_ID = "abc123def456" + + +class TestOpenSearchServerlessCreateCollectionOperator: + def setup_method(self): + self.operator = OpenSearchServerlessCreateCollectionOperator( + task_id="create_collection", + collection_name=COLLECTION_NAME, + collection_type="VECTORSEARCH", + ) + + @mock.patch.object(AwsBaseHook, "conn", new_callable=mock.PropertyMock) + def test_execute(self, mock_conn): + mock_client = MagicMock() + mock_client.create_collection.return_value = { + "createCollectionDetail": {"id": COLLECTION_ID, "name": COLLECTION_NAME} + } + mock_conn.return_value = mock_client + + result = self.operator.execute({}) + + mock_client.create_collection.assert_called_once_with(name=COLLECTION_NAME, type="VECTORSEARCH") + assert result == COLLECTION_ID + + @mock.patch.object(AwsBaseHook, "conn", new_callable=mock.PropertyMock) + def test_execute_skip_existing(self, mock_conn): + mock_client = MagicMock() + mock_client.create_collection.side_effect = ClientError( + {"Error": {"Code": "ConflictException", "Message": "exists"}}, "CreateCollection" + ) + mock_client.batch_get_collection.return_value = { + "collectionDetails": [{"id": COLLECTION_ID, "name": COLLECTION_NAME}] + } + mock_conn.return_value = mock_client + + result = self.operator.execute({}) + assert result == COLLECTION_ID + + @mock.patch.object(AwsBaseHook, "conn", new_callable=mock.PropertyMock) + def test_execute_fail_on_conflict(self, mock_conn): + op = OpenSearchServerlessCreateCollectionOperator( + task_id="create_collection", + collection_name=COLLECTION_NAME, + if_exists="fail", + ) + mock_client = MagicMock() + mock_client.create_collection.side_effect = ClientError( + {"Error": {"Code": "ConflictException", "Message": "exists"}}, "CreateCollection" + ) + mock_conn.return_value = mock_client + + with pytest.raises(ClientError): + op.execute({}) Review Comment: Right now, this test asserts `ClientError` of all types when we are specifically interested in "ConflictException". I think you could strengthen this test by replacing lines 84-85 with the below: ``` with pytest.raises(ClientError) as exc_info: op.execute({}) assert exc_info.value.response["Error"]["Code"] == "ConflictException" ``` ########## providers/amazon/tests/unit/amazon/aws/operators/test_opensearch_serverless.py: ########## @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from unittest import mock +from unittest.mock import MagicMock + +import pytest +from botocore.exceptions import ClientError + +from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook +from airflow.providers.amazon.aws.operators.opensearch_serverless import ( + OpenSearchServerlessCreateCollectionOperator, +) + +from unit.amazon.aws.utils.test_template_fields import validate_template_fields + +COLLECTION_NAME = "test-collection" +COLLECTION_ID = "abc123def456" + + +class TestOpenSearchServerlessCreateCollectionOperator: + def setup_method(self): + self.operator = OpenSearchServerlessCreateCollectionOperator( + task_id="create_collection", + collection_name=COLLECTION_NAME, + collection_type="VECTORSEARCH", + ) + + @mock.patch.object(AwsBaseHook, "conn", new_callable=mock.PropertyMock) + def test_execute(self, mock_conn): + mock_client = MagicMock() + mock_client.create_collection.return_value = { + "createCollectionDetail": {"id": COLLECTION_ID, "name": COLLECTION_NAME} + } + mock_conn.return_value = mock_client + + result = self.operator.execute({}) + + mock_client.create_collection.assert_called_once_with(name=COLLECTION_NAME, type="VECTORSEARCH") + assert result == COLLECTION_ID + + @mock.patch.object(AwsBaseHook, "conn", new_callable=mock.PropertyMock) + def test_execute_skip_existing(self, mock_conn): + mock_client = MagicMock() + mock_client.create_collection.side_effect = ClientError( + {"Error": {"Code": "ConflictException", "Message": "exists"}}, "CreateCollection" + ) + mock_client.batch_get_collection.return_value = { + "collectionDetails": [{"id": COLLECTION_ID, "name": COLLECTION_NAME}] + } + mock_conn.return_value = mock_client + + result = self.operator.execute({}) + assert result == COLLECTION_ID + + @mock.patch.object(AwsBaseHook, "conn", new_callable=mock.PropertyMock) + def test_execute_fail_on_conflict(self, mock_conn): + op = OpenSearchServerlessCreateCollectionOperator( + task_id="create_collection", + collection_name=COLLECTION_NAME, + if_exists="fail", + ) + mock_client = MagicMock() + mock_client.create_collection.side_effect = ClientError( + {"Error": {"Code": "ConflictException", "Message": "exists"}}, "CreateCollection" + ) + mock_conn.return_value = mock_client + Review Comment: I would add this assert too: ``` mock_client.create_collection.assert_called_once_with( name=COLLECTION_NAME, ) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
