This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 3dc0d9e7a77 Add provider class registration checks to provider.yaml 
validation (#64127)
3dc0d9e7a77 is described below

commit 3dc0d9e7a772ba14e45aa6b857c435780314ed85
Author: Jarek Potiuk <[email protected]>
AuthorDate: Tue Mar 31 02:15:56 2026 +0200

    Add provider class registration checks to provider.yaml validation (#64127)
    
    * Add provider class registration checks to provider.yaml validation
    
    Add two new checks to run_provider_yaml_files_check.py:
    
    1. Check that hook classes defining conn_type are registered in
       connection-types — catches hooks with unique conn_type values that
       are not listed in provider.yaml (async variants sharing conn_type
       with a registered sync hook are allowed).
    
    2. Check that all provider Hook/Operator/Sensor/Trigger/Executor/Notifier
       classes are registered in provider.yaml — walks all provider source
       files, imports them, and verifies every concrete subclass of base
       types has its module (or class path for executors/notifications)
       registered in the appropriate section.
    
    Also register the missing LocalKubernetesExecutor in the cncf.kubernetes
    provider.yaml, which was found by the new check.
    
    * Fix mypy errors: use correct BaseHook import path
    
    * Add validation for secrets-backends, auth-managers, logging, and 
db-managers
    
    Extends the provider class registration checks to also cover
    BaseSecretsBackend, BaseAuthManager, FileTaskHandler (logging),
    BaseDagBundle, and BaseDBManager subclasses, addressing the gap
    noted in review.
    
    Adds both directions of validation:
    - yaml-to-code: classes listed in provider.yaml exist and are importable
    - code-to-yaml: classes in provider code are registered in provider.yaml
    
    * Fix provider.yaml logging registration for Redis and HDFS
    
    - Redis: Fix incorrect module path (missing .log. segment)
    - HDFS: Add missing logging section for HdfsTaskHandler
---
 providers/apache/hdfs/provider.yaml                |   3 +
 .../providers/apache/hdfs/get_provider_info.py     |   1 +
 providers/cncf/kubernetes/provider.yaml            |   1 +
 .../providers/cncf/kubernetes/get_provider_info.py |   5 +-
 providers/google/provider.yaml                     |   2 +
 .../airflow/providers/google/get_provider_info.py  |   4 +
 providers/redis/provider.yaml                      |   2 +-
 .../airflow/providers/redis/get_provider_info.py   |   2 +-
 .../in_container/run_provider_yaml_files_check.py  | 277 +++++++++++++++++++++
 9 files changed, 294 insertions(+), 3 deletions(-)

diff --git a/providers/apache/hdfs/provider.yaml 
b/providers/apache/hdfs/provider.yaml
index 3a8141ca20b..ee3c7192fcd 100644
--- a/providers/apache/hdfs/provider.yaml
+++ b/providers/apache/hdfs/provider.yaml
@@ -96,6 +96,9 @@ hooks:
     python-modules:
       - airflow.providers.apache.hdfs.hooks.webhdfs
 
+logging:
+  - airflow.providers.apache.hdfs.log.hdfs_task_handler.HdfsTaskHandler
+
 connection-types:
   - hook-class-name: airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook
     connection-type: webhdfs
diff --git 
a/providers/apache/hdfs/src/airflow/providers/apache/hdfs/get_provider_info.py 
b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/get_provider_info.py
index 9a9a7973cae..349b78a9b9e 100644
--- 
a/providers/apache/hdfs/src/airflow/providers/apache/hdfs/get_provider_info.py
+++ 
b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/get_provider_info.py
@@ -49,6 +49,7 @@ def get_provider_info():
         "hooks": [
             {"integration-name": "WebHDFS", "python-modules": 
["airflow.providers.apache.hdfs.hooks.webhdfs"]}
         ],
+        "logging": 
["airflow.providers.apache.hdfs.log.hdfs_task_handler.HdfsTaskHandler"],
         "connection-types": [
             {
                 "hook-class-name": 
"airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook",
diff --git a/providers/cncf/kubernetes/provider.yaml 
b/providers/cncf/kubernetes/provider.yaml
index ea06edb2b5b..2c1b88dc3a1 100644
--- a/providers/cncf/kubernetes/provider.yaml
+++ b/providers/cncf/kubernetes/provider.yaml
@@ -464,6 +464,7 @@ config:
 
 executors:
   - 
airflow.providers.cncf.kubernetes.executors.kubernetes_executor.KubernetesExecutor
+  - 
airflow.providers.cncf.kubernetes.executors.local_kubernetes_executor.LocalKubernetesExecutor
 
 cli:
   - 
airflow.providers.cncf.kubernetes.cli.definition.get_kubernetes_cli_commands
diff --git 
a/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/get_provider_info.py
 
b/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/get_provider_info.py
index 9e4d433827e..d4b174bf218 100644
--- 
a/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/get_provider_info.py
+++ 
b/providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/get_provider_info.py
@@ -311,6 +311,9 @@ def get_provider_info():
                 },
             },
         },
-        "executors": 
["airflow.providers.cncf.kubernetes.executors.kubernetes_executor.KubernetesExecutor"],
+        "executors": [
+            
"airflow.providers.cncf.kubernetes.executors.kubernetes_executor.KubernetesExecutor",
+            
"airflow.providers.cncf.kubernetes.executors.local_kubernetes_executor.LocalKubernetesExecutor",
+        ],
         "cli": 
["airflow.providers.cncf.kubernetes.cli.definition.get_kubernetes_cli_commands"],
     }
diff --git a/providers/google/provider.yaml b/providers/google/provider.yaml
index 04541d5569d..3ef456692b6 100644
--- a/providers/google/provider.yaml
+++ b/providers/google/provider.yaml
@@ -1199,6 +1199,8 @@ connection-types:
         schema:
           type: ["boolean", "null"]
           default: false
+  - hook-class-name: airflow.providers.google.cloud.hooks.spanner.SpannerHook
+    connection-type: gcpspanner
   - hook-class-name: 
airflow.providers.google.cloud.hooks.dataprep.GoogleDataprepHook
     connection-type: dataprep
   - hook-class-name: 
airflow.providers.google.cloud.hooks.cloud_sql.CloudSQLHook
diff --git a/providers/google/src/airflow/providers/google/get_provider_info.py 
b/providers/google/src/airflow/providers/google/get_provider_info.py
index fec76eb9d5f..5a4d040788c 100644
--- a/providers/google/src/airflow/providers/google/get_provider_info.py
+++ b/providers/google/src/airflow/providers/google/get_provider_info.py
@@ -1436,6 +1436,10 @@ def get_provider_info():
                     },
                 },
             },
+            {
+                "hook-class-name": 
"airflow.providers.google.cloud.hooks.spanner.SpannerHook",
+                "connection-type": "gcpspanner",
+            },
             {
                 "hook-class-name": 
"airflow.providers.google.cloud.hooks.dataprep.GoogleDataprepHook",
                 "connection-type": "dataprep",
diff --git a/providers/redis/provider.yaml b/providers/redis/provider.yaml
index 03d9c8f03c6..e67c28a4da4 100644
--- a/providers/redis/provider.yaml
+++ b/providers/redis/provider.yaml
@@ -158,4 +158,4 @@ connection-types:
           default: false
 
 logging:
-  - airflow.providers.redis.redis_task_handler.RedisTaskHandler
+  - airflow.providers.redis.log.redis_task_handler.RedisTaskHandler
diff --git a/providers/redis/src/airflow/providers/redis/get_provider_info.py 
b/providers/redis/src/airflow/providers/redis/get_provider_info.py
index d93d7d7bea3..53c4a21b0c4 100644
--- a/providers/redis/src/airflow/providers/redis/get_provider_info.py
+++ b/providers/redis/src/airflow/providers/redis/get_provider_info.py
@@ -84,5 +84,5 @@ def get_provider_info():
                 },
             }
         ],
-        "logging": 
["airflow.providers.redis.redis_task_handler.RedisTaskHandler"],
+        "logging": 
["airflow.providers.redis.log.redis_task_handler.RedisTaskHandler"],
     }
diff --git a/scripts/in_container/run_provider_yaml_files_check.py 
b/scripts/in_container/run_provider_yaml_files_check.py
index 7dbb485ca48..ebea8260e80 100755
--- a/scripts/in_container/run_provider_yaml_files_check.py
+++ b/scripts/in_container/run_provider_yaml_files_check.py
@@ -473,6 +473,257 @@ def 
check_hook_class_name_entries_in_connection_types(yaml_files: dict[str, dict
     return num_connection_types, num_errors
 
 
+@run_check("Checking that hook classes defining conn_type are registered in 
connection-types")
+def check_hook_classes_with_conn_type_are_registered(yaml_files: dict[str, 
dict]) -> tuple[int, int]:
+    """Find Hook subclasses that define conn_type but are not listed in 
connection-types."""
+    from airflow.sdk.bases.hook import BaseHook
+
+    num_checks = 0
+    num_errors = 0
+    for yaml_file_path, provider_data in yaml_files.items():
+        connection_types = provider_data.get("connection-types", [])
+        registered_hook_classes = {ct["hook-class-name"] for ct in 
connection_types}
+        # Collect conn_type values that are already covered by a registered 
hook class
+        registered_conn_types = {ct["connection-type"] for ct in 
connection_types}
+        hook_modules = [
+            mod for entry in provider_data.get("hooks", []) for mod in 
entry.get("python-modules", [])
+        ]
+        for module_name in hook_modules:
+            try:
+                with warnings.catch_warnings(record=True):
+                    module = importlib.import_module(module_name)
+            except (ImportError, AirflowOptionalProviderFeatureException):
+                continue  # Import errors are caught by other checks
+
+            for attr_name in dir(module):
+                if attr_name.startswith("_"):
+                    continue
+                obj = getattr(module, attr_name, None)
+                if not (inspect.isclass(obj) and issubclass(obj, BaseHook) and 
obj is not BaseHook):
+                    continue
+                # Only check classes defined in this module, not re-exported 
ones
+                if obj.__module__ != module_name:
+                    continue
+                # Skip abstract classes — they are base classes, not concrete 
hooks
+                if inspect.isabstract(obj):
+                    continue
+                num_checks += 1
+                # Check conn_type defined directly on the class, not inherited
+                if "conn_type" not in obj.__dict__:
+                    continue
+                conn_type = obj.__dict__["conn_type"]
+                if not conn_type:
+                    continue
+                full_class_name = f"{module_name}.{attr_name}"
+                if full_class_name in registered_hook_classes:
+                    continue
+                # If another hook class already registered the same conn_type, 
this is fine
+                # (e.g. async variants sharing conn_type with sync hooks)
+                if conn_type in registered_conn_types:
+                    continue
+                errors.append(
+                    f"Hook class `{full_class_name}` defines 
conn_type='{conn_type}' "
+                    f"but no hook class is registered for this connection type 
"
+                    f"in 'connection-types' in {yaml_file_path}.\n"
+                    f"[yellow]How to fix it[/]: Add an entry with "
+                    f"hook-class-name: {full_class_name} to the 
connection-types "
+                    f"section of {yaml_file_path}."
+                )
+                num_errors += 1
+    return num_checks, num_errors
+
+
+@run_check(
+    "Checking that all provider Hook/Operator/Sensor/Trigger/Executor/Notifier"
+    " classes are registered in provider.yaml"
+)
+def check_all_provider_classes_are_registered(yaml_files: dict[str, dict]) -> 
tuple[int, int]:
+    """
+    Walk all provider source files, find 
Hook/Operator/Sensor/Trigger/Executor/Notifier/
+    SecretsBackend/AuthManager/LoggingHandler/DagBundle/DBManager subclasses, 
and verify
+    they are registered in the appropriate provider.yaml section.
+
+    This catches classes placed in non-standard directories or modules that 
were missed
+    when updating provider.yaml.
+    """
+    from airflow.api_fastapi.auth.managers.base_auth_manager import 
BaseAuthManager
+    from airflow.dag_processing.bundles.base import BaseDagBundle
+    from airflow.executors.base_executor import BaseExecutor
+    from airflow.models.baseoperator import BaseOperator
+    from airflow.sdk.bases.hook import BaseHook
+    from airflow.sdk.bases.notifier import BaseNotifier
+    from airflow.secrets.base_secrets import BaseSecretsBackend
+    from airflow.sensors.base import BaseSensorOperator
+    from airflow.triggers.base import BaseTrigger
+    from airflow.utils.db_manager import BaseDBManager
+    from airflow.utils.log.file_task_handler import FileTaskHandler
+
+    # Most specific first — BaseSensorOperator is a BaseOperator subclass
+    base_class_resource_map: list[tuple[type, str]] = [
+        (BaseSensorOperator, "sensors"),
+        (BaseHook, "hooks"),
+        (BaseTrigger, "triggers"),
+        (BaseNotifier, "notifications"),
+        (BaseExecutor, "executors"),
+        (BaseOperator, "operators"),
+        (BaseSecretsBackend, "secrets-backends"),
+        (BaseAuthManager, "auth-managers"),
+        (FileTaskHandler, "logging"),
+        (BaseDagBundle, "bundles"),
+        (BaseDBManager, "db-managers"),
+    ]
+
+    # Resource types where registration is by class path (not module)
+    class_level_resource_types = {
+        "executors",
+        "notifications",
+        "secrets-backends",
+        "auth-managers",
+        "logging",
+        "db-managers",
+    }
+
+    num_checks = 0
+    num_errors = 0
+
+    # Directories that are not expected to contain registered provider classes
+    skip_dirs = {"tests", "example_dags", "decorators"}
+
+    for yaml_file_path, provider_data in yaml_files.items():
+        provider_dir = pathlib.Path(yaml_file_path).parent
+        package_dir = AIRFLOW_ROOT_PATH.joinpath(provider_dir)
+
+        # Collect all modules registered in provider.yaml across all resource 
types
+        registered_modules: set[str] = set()
+        for resource_type in ("hooks", "operators", "sensors", "triggers", 
"bundles"):
+            for entry in provider_data.get(resource_type, []):
+                registered_modules.update(entry.get("python-modules", []))
+        for entry in provider_data.get("transfers", []):
+            python_module = entry.get("python-module")
+            if python_module:
+                registered_modules.add(python_module)
+
+        # Collect class paths for class-level registrations
+        registered_classes: set[str] = set()
+        for resource_type in (
+            "executors",
+            "notifications",
+            "secrets-backends",
+            "auth-managers",
+            "logging",
+            "db-managers",
+        ):
+            for class_path in provider_data.get(resource_type, []):
+                registered_classes.add(class_path)
+
+        # Find the src directory for the provider
+        src_dir = package_dir / "src"
+        if not src_dir.exists():
+            src_dir = package_dir
+
+        # Track unregistered modules and their classes
+        # module_name -> [(class_name, suggested_resource_type)]
+        unregistered: dict[str, list[tuple[str, str]]] = {}
+
+        for py_file in sorted(src_dir.rglob("*.py")):
+            if py_file.name == "__init__.py":
+                continue
+            if skip_dirs & set(py_file.parts):
+                continue
+
+            try:
+                module_name = _filepath_to_module(py_file)
+            except ValueError:
+                continue
+
+            if module_name in DEPRECATED_MODULES:
+                continue
+
+            is_registered = module_name in registered_modules
+
+            try:
+                with warnings.catch_warnings(record=True):
+                    module = importlib.import_module(module_name)
+            except (ImportError, AirflowOptionalProviderFeatureException):
+                continue
+            except Exception:
+                continue
+
+            # Track seen classes by identity to skip aliases
+            # (e.g. send_chime_notification = ChimeNotifier)
+            seen_classes: set[int] = set()
+
+            for attr_name in dir(module):
+                if attr_name.startswith("_"):
+                    continue
+                obj = getattr(module, attr_name, None)
+                if not inspect.isclass(obj):
+                    continue
+                # Only check classes defined in this module, not re-exported 
ones
+                if obj.__module__ != module_name:
+                    continue
+                # Skip if this is an alias for a class we already checked
+                if id(obj) in seen_classes:
+                    continue
+                seen_classes.add(id(obj))
+                # Skip abstract classes — they are base classes, not concrete 
implementations
+                if inspect.isabstract(obj):
+                    continue
+
+                for base_class, resource_type in base_class_resource_map:
+                    if issubclass(obj, base_class) and obj is not base_class:
+                        num_checks += 1
+                        full_class_name = f"{module_name}.{attr_name}"
+                        # Executors and notifications are registered by class 
path;
+                        # other types are registered by module path.
+                        if resource_type in class_level_resource_types:
+                            # Check both the full path and any registered path
+                            # that ends with the class name (handles 
__init__.py
+                            # re-exports like 
airflow.providers.edge3.executors.EdgeExecutor)
+                            is_ok = full_class_name in registered_classes or 
any(
+                                rc.endswith(f".{attr_name}") for rc in 
registered_classes
+                            )
+                        else:
+                            is_ok = is_registered
+                        if is_ok:
+                            console.print(
+                                f"  [green]OK[/] {full_class_name} 
({resource_type}, {base_class.__name__})"
+                            )
+                        else:
+                            unregistered.setdefault(module_name, 
[]).append((attr_name, resource_type))
+                            console.print(
+                                f"  [red]MISSING[/] {full_class_name} "
+                                f"({resource_type}, {base_class.__name__})"
+                            )
+                        break  # Most specific match wins, don't double-report
+
+        # Report one error per unregistered module
+        for module_name, class_info in unregistered.items():
+            class_names = ", ".join(f"`{name}`" for name, _ in class_info)
+            suggested_type = class_info[0][1]
+            if suggested_type in class_level_resource_types:
+                full_paths = ", ".join(f"`{module_name}.{name}`" for name, _ 
in class_info)
+                errors.append(
+                    f"Class(es) {full_paths} not registered in the "
+                    f"{suggested_type} section of {yaml_file_path}.\n"
+                    f"[yellow]How to fix it[/]: Add the class path(s) to the "
+                    f"{suggested_type} list in {yaml_file_path}."
+                )
+            else:
+                errors.append(
+                    f"Module `{module_name}` contains {suggested_type} "
+                    f"class(es) ({class_names}) but is not registered in any "
+                    f"resource section of {yaml_file_path}.\n"
+                    f"[yellow]How to fix it[/]: Add `{module_name}` to the "
+                    f"python-modules list in the {suggested_type} section "
+                    f"of {yaml_file_path}, or to the transfers section "
+                    f"if it is a transfer operator."
+                )
+            num_errors += 1
+
+    return num_checks, num_errors
+
+
 @run_check("Checking plugin classes belong to package are importable and 
belong to package")
 def check_plugin_classes(yaml_files: dict[str, dict]) -> tuple[int, int]:
     resource_type = "plugins"
@@ -527,6 +778,26 @@ def check_queue_classes(yaml_files: dict[str, dict]) -> 
tuple[int, int]:
     return _check_simple_class_list("queues", yaml_files)
 
 
+@run_check("Checking secrets-backends belong to package, exist and are 
classes")
+def check_secrets_backend_classes(yaml_files: dict[str, dict]) -> tuple[int, 
int]:
+    return _check_simple_class_list("secrets-backends", yaml_files)
+
+
+@run_check("Checking auth-managers belong to package, exist and are classes")
+def check_auth_manager_classes(yaml_files: dict[str, dict]) -> tuple[int, int]:
+    return _check_simple_class_list("auth-managers", yaml_files)
+
+
+@run_check("Checking logging handlers belong to package, exist and are 
classes")
+def check_logging_classes(yaml_files: dict[str, dict]) -> tuple[int, int]:
+    return _check_simple_class_list("logging", yaml_files)
+
+
+@run_check("Checking db-managers belong to package, exist and are classes")
+def check_db_manager_classes(yaml_files: dict[str, dict]) -> tuple[int, int]:
+    return _check_simple_class_list("db-managers", yaml_files)
+
+
 @run_check("Checking for duplicates in list of transfers")
 def check_duplicates_in_list_of_transfers(yaml_files: dict[str, dict]) -> 
tuple[int, int]:
     resource_type = "transfers"
@@ -775,11 +1046,17 @@ if __name__ == "__main__":
 
     check_completeness_of_list_of_transfers(all_parsed_yaml_files)
     check_hook_class_name_entries_in_connection_types(all_parsed_yaml_files)
+    check_hook_classes_with_conn_type_are_registered(all_parsed_yaml_files)
     check_executor_classes(all_parsed_yaml_files)
     check_queue_classes(all_parsed_yaml_files)
     check_plugin_classes(all_parsed_yaml_files)
     check_extra_link_classes(all_parsed_yaml_files)
+    check_secrets_backend_classes(all_parsed_yaml_files)
+    check_auth_manager_classes(all_parsed_yaml_files)
+    check_logging_classes(all_parsed_yaml_files)
+    check_db_manager_classes(all_parsed_yaml_files)
     
check_correctness_of_list_of_sensors_operators_hook_trigger_modules(all_parsed_yaml_files)
+    check_all_provider_classes_are_registered(all_parsed_yaml_files)
     check_notification_classes(all_parsed_yaml_files)
     check_unique_provider_name(all_parsed_yaml_files)
     check_providers_have_all_documentation_files(all_parsed_yaml_files)

Reply via email to