o-nikolas commented on code in PR #35163:
URL: https://github.com/apache/airflow/pull/35163#discussion_r1370676442


##########
tests/providers/amazon/aws/hooks/test_hooks_signature.py:
##########


Review Comment:
   I think keeping the constructor args clean could be nice, but I'm not sure 
we want to commit to this level of inflexibility and complication. I'm a bit 
worried Hooks are becoming a bit too Enterprise Edition. But I could go either 
way on this one. 



##########
tests/providers/amazon/aws/hooks/test_hooks_signature.py:
##########
@@ -0,0 +1,190 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import inspect
+from importlib import import_module
+from pathlib import Path
+
+import pytest
+
+from airflow.exceptions import AirflowOptionalProviderFeatureException
+from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
+
+BASE_AWS_HOOKS = ["AwsGenericHook", "AwsBaseHook"]
+ALLOWED_THICK_HOOKS_PARAMETERS: dict[str, set[str]] = {
+    # There are should be good reasons for extend this list
+    "AthenaHook": {"sleep_time", "log_query"},
+    "BatchClientHook": {"status_retries", "max_retries"},
+    "BatchWaitersHook": {"waiter_config"},
+    "DataSyncHook": {"wait_interval_seconds"},
+    "DynamoDBHook": {"table_name", "table_keys"},
+    "EC2Hook": {"api_type"},
+    "ElastiCacheReplicationGroupHook": {
+        "exponential_back_off_factor",
+        "max_retries",
+        "initial_poke_interval",
+    },
+    "EmrHook": {"emr_conn_id"},
+    "EmrContainerHook": {"virtual_cluster_id"},
+    "FirehoseHook": {"delivery_stream"},
+    "GlacierHook": {"virtual_cluster_id"},
+    "GlueJobHook": {
+        "job_name",
+        "concurrent_run_limit",
+        "job_poll_interval",
+        "create_job_kwargs",
+        "desc",
+        "iam_role_arn",
+        "s3_bucket",
+        "iam_role_name",
+        "update_config",
+        "retry_limit",
+        "num_of_dpus",
+        "script_location",
+    },
+    "S3Hook": {"transfer_config_args", "aws_conn_id", "extra_args"},
+}
+
+
+def get_aws_hooks_modules():
+    """Parse Amazon Provider metadata and find all hooks based on 
`AwsGenericHook` and return it."""
+    hooks_dir = Path(__file__).absolute().parents[5] / "airflow" / "providers" 
/ "amazon" / "aws" / "hooks"
+    if not hooks_dir.exists():
+        msg = f"Amazon Provider hooks directory not found: 
{hooks_dir.__fspath__()!r}"
+        raise FileNotFoundError(msg)
+    elif not hooks_dir.is_dir():
+        raise NotADirectoryError(hooks_dir.__fspath__())
+
+    for module in hooks_dir.glob("*.py"):
+        name = module.stem
+        if name.startswith("_"):
+            continue
+        module_string = f"airflow.providers.amazon.aws.hooks.{name}"
+
+        yield pytest.param(module_string, id=name)
+
+
+def get_aws_hooks_from_module(hook_module: str):
+    try:
+        imported_module = import_module(hook_module)
+    except AirflowOptionalProviderFeatureException as ex:
+        pytest.skip(str(ex))
+    else:
+        hooks = []
+        for name, o in vars(imported_module).items():
+            if name in BASE_AWS_HOOKS:
+                continue
+
+            if isinstance(o, type) and o.__module__ != "builtins" and 
issubclass(o, AwsGenericHook):
+                hooks.append((o, name))
+        return hooks
+
+
+def validate_hook(hook: type[AwsGenericHook], hook_name: str, hook_module: 
str):
+    hook_parameters = inspect.signature(hook).parameters
+    hook_extra_parameters = set(pn for pn in hook_parameters if pn not in 
("self", "args", "kwargs"))
+
+    allowed_parameters = ALLOWED_THICK_HOOKS_PARAMETERS.get(hook_name, set())
+    if allowed_parameters:
+        # Remove historically allowed parameters for Thick Wrapped Hooks
+        hook_extra_parameters -= allowed_parameters
+
+    if not hook_extra_parameters:
+        # No additional arguments found
+        return True, None
+
+    if not allowed_parameters:
+        msg = (
+            f"'{hook_module}.{hook_name}' has additional attributes "
+            f"{', '.join(map(repr, hook_extra_parameters))}. "
+            "Expected that all `boto3` related hooks (based on 
`AwsGenericHook` or `AwsBaseHook`) "
+            "should not use additional attributes in class constructor, please 
move it in methods. "
+            f"Make sure that {hook_name!r} has signature `def __init__(self, 
*args, **kwargs):`"

Review Comment:
   ```suggestion
               f"'{hook_module}.{hook_name}' has additional attributes "
               f"{', '.join(map(repr, hook_extra_parameters))}. "
               "Expected that all `boto3` related hooks (based on 
`AwsGenericHook` or `AwsBaseHook`) "
               "should not use additional attributes in class constructor, 
please move them to method signatures. "
               f"Make sure that {hook_name!r} constructor has signature `def 
__init__(self, *args, **kwargs):`"
   ```



##########
tests/providers/amazon/aws/hooks/test_hooks_signature.py:
##########
@@ -0,0 +1,190 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import inspect
+from importlib import import_module
+from pathlib import Path
+
+import pytest
+
+from airflow.exceptions import AirflowOptionalProviderFeatureException
+from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
+
+BASE_AWS_HOOKS = ["AwsGenericHook", "AwsBaseHook"]
+ALLOWED_THICK_HOOKS_PARAMETERS: dict[str, set[str]] = {
+    # There are should be good reasons for extend this list
+    "AthenaHook": {"sleep_time", "log_query"},
+    "BatchClientHook": {"status_retries", "max_retries"},
+    "BatchWaitersHook": {"waiter_config"},
+    "DataSyncHook": {"wait_interval_seconds"},
+    "DynamoDBHook": {"table_name", "table_keys"},
+    "EC2Hook": {"api_type"},
+    "ElastiCacheReplicationGroupHook": {
+        "exponential_back_off_factor",
+        "max_retries",
+        "initial_poke_interval",
+    },
+    "EmrHook": {"emr_conn_id"},
+    "EmrContainerHook": {"virtual_cluster_id"},
+    "FirehoseHook": {"delivery_stream"},
+    "GlacierHook": {"virtual_cluster_id"},
+    "GlueJobHook": {
+        "job_name",
+        "concurrent_run_limit",
+        "job_poll_interval",
+        "create_job_kwargs",
+        "desc",
+        "iam_role_arn",
+        "s3_bucket",
+        "iam_role_name",
+        "update_config",
+        "retry_limit",
+        "num_of_dpus",
+        "script_location",
+    },
+    "S3Hook": {"transfer_config_args", "aws_conn_id", "extra_args"},
+}
+
+
+def get_aws_hooks_modules():
+    """Parse Amazon Provider metadata and find all hooks based on 
`AwsGenericHook` and return it."""
+    hooks_dir = Path(__file__).absolute().parents[5] / "airflow" / "providers" 
/ "amazon" / "aws" / "hooks"
+    if not hooks_dir.exists():
+        msg = f"Amazon Provider hooks directory not found: 
{hooks_dir.__fspath__()!r}"
+        raise FileNotFoundError(msg)
+    elif not hooks_dir.is_dir():
+        raise NotADirectoryError(hooks_dir.__fspath__())
+
+    for module in hooks_dir.glob("*.py"):
+        name = module.stem
+        if name.startswith("_"):
+            continue
+        module_string = f"airflow.providers.amazon.aws.hooks.{name}"
+
+        yield pytest.param(module_string, id=name)
+
+
+def get_aws_hooks_from_module(hook_module: str):
+    try:
+        imported_module = import_module(hook_module)
+    except AirflowOptionalProviderFeatureException as ex:
+        pytest.skip(str(ex))
+    else:
+        hooks = []
+        for name, o in vars(imported_module).items():
+            if name in BASE_AWS_HOOKS:
+                continue
+
+            if isinstance(o, type) and o.__module__ != "builtins" and 
issubclass(o, AwsGenericHook):
+                hooks.append((o, name))
+        return hooks
+
+
+def validate_hook(hook: type[AwsGenericHook], hook_name: str, hook_module: 
str):
+    hook_parameters = inspect.signature(hook).parameters
+    hook_extra_parameters = set(pn for pn in hook_parameters if pn not in 
("self", "args", "kwargs"))
+
+    allowed_parameters = ALLOWED_THICK_HOOKS_PARAMETERS.get(hook_name, set())
+    if allowed_parameters:
+        # Remove historically allowed parameters for Thick Wrapped Hooks
+        hook_extra_parameters -= allowed_parameters
+
+    if not hook_extra_parameters:
+        # No additional arguments found
+        return True, None
+
+    if not allowed_parameters:
+        msg = (
+            f"'{hook_module}.{hook_name}' has additional attributes "
+            f"{', '.join(map(repr, hook_extra_parameters))}. "
+            "Expected that all `boto3` related hooks (based on 
`AwsGenericHook` or `AwsBaseHook`) "
+            "should not use additional attributes in class constructor, please 
move it in methods. "
+            f"Make sure that {hook_name!r} has signature `def __init__(self, 
*args, **kwargs):`"
+        )
+    else:
+        msg = (
+            f"'{hook_module}.{hook_name}' allowed only "
+            f"{', '.join(map(repr, allowed_parameters))} additional 
attributes, "
+            f"but got extra parameters {', '.join(map(repr, 
hook_extra_parameters))}. "
+            "Please move additional attributes from class constructor into the 
methods. "

Review Comment:
   ```suggestion
               f"'{hook_module}.{hook_name}' allowed only "
               f"{', '.join(map(repr, allowed_parameters))} additional 
attributes, "
               f"but got extra parameters {', '.join(map(repr, 
hook_extra_parameters))}. "
               "Please move additional attributes from class constructor into 
method signatures. "
   ```



##########
tests/providers/amazon/aws/hooks/test_hooks_signature.py:
##########
@@ -0,0 +1,190 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import inspect
+from importlib import import_module
+from pathlib import Path
+
+import pytest
+
+from airflow.exceptions import AirflowOptionalProviderFeatureException
+from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
+
+BASE_AWS_HOOKS = ["AwsGenericHook", "AwsBaseHook"]
+ALLOWED_THICK_HOOKS_PARAMETERS: dict[str, set[str]] = {
+    # There are should be good reasons for extend this list

Review Comment:
   ```suggestion
       # This list should only be reduced not extended with new parameters, 
unless there is an exceptional reason.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to