Copilot commented on code in PR #64209:
URL: https://github.com/apache/airflow/pull/64209#discussion_r2993281653
##########
shared/configuration/src/airflow_shared/configuration/parser.py:
##########
@@ -279,24 +279,77 @@ def _lookup_sequence(self) -> list[Callable]:
self._get_option_from_provider_metadata_config_fallbacks,
]
- def _get_config_sources_for_as_dict(self) -> list[tuple[str,
ConfigParser]]:
+ @functools.cached_property
+ def configuration_description(self) -> dict[str, dict[str, Any]]:
+ """
+ Return configuration description from multiple sources.
+
+ Respects the ``_use_providers_configuration`` flag to decide whether
to include
+ provider configuration.
+
+ The priority order is as follows (later sources override earlier ones):
+
+ 1. The base configuration description provided in ``__init__``,
usually loaded
+ from ``config.yml`` in core.
+ 2. ``_provider_cfg_config_fallback_default_values``, loaded from
+ ``provider_config_fallback_defaults.cfg``.
+ 3. ``_provider_metadata_config_fallback_default_values``, loaded from
provider
+ packages' ``get_provider_info`` method (via ProvidersManager /
+ RuntimeProvidersManager's ``.provider_configs`` property).
+
+ We use ``cached_property`` to cache the merged result; clear this
cache (via
+ ``invalidate_cache``) when toggling ``_use_providers_configuration``.
+ """
+ if not self._use_providers_configuration:
+ return self._configuration_description
+
+ merged_description: dict[str, dict[str, Any]] =
deepcopy(self._configuration_description)
+
+ # Merge full provider config descriptions (with metadata like
sensitive, description, etc.)
+ # from provider packages' get_provider_info method, reusing the cached
raw dict.
+ for section, section_content in
self._provider_metadata_configuration_description.items():
+ if section not in merged_description:
+ merged_description[section] = deepcopy(section_content)
+ else:
+ existing_options =
merged_description[section].setdefault("options", {})
+ for option, option_content in section_content.get("options",
{}).items():
+ if option not in existing_options:
+ existing_options[option] = deepcopy(option_content)
Review Comment:
`configuration_description` now pulls in provider metadata via
`self._provider_manager_type().provider_configs`. Since `get()` calls
`_resolve_deprecated_lookup()` which reads `self.configuration_description`
(parser.py:1213), the *first* `conf.get()` will trigger provider
discovery/initialization even for unrelated core options. This can
significantly increase startup cost and may undermine the existing “load
providers only when needed” pattern. Consider keeping
`configuration_description` as the core config.yml view for deprecation
lookups, and only consulting provider metadata/cfg fallbacks in the actual
value-lookup chain (or otherwise avoid provider discovery on every first
`get()`).
```suggestion
#
# IMPORTANT: Accessing
``_provider_metadata_configuration_description`` directly may
# trigger provider discovery/initialization (via
ProvidersManager.provider_configs).
# To avoid doing this on the first ``conf.get()`` (e.g. during
deprecation lookup),
# only merge provider metadata if the underlying attribute has
already been
# initialized on the instance.
provider_metadata =
self.__dict__.get("_provider_metadata_configuration_description")
if provider_metadata:
for section, section_content in provider_metadata.items():
if section not in merged_description:
merged_description[section] = deepcopy(section_content)
else:
existing_options =
merged_description[section].setdefault("options", {})
for option, option_content in
section_content.get("options", {}).items():
if option not in existing_options:
existing_options[option] =
deepcopy(option_content)
```
##########
shared/configuration/src/airflow_shared/configuration/parser.py:
##########
@@ -279,24 +279,77 @@ def _lookup_sequence(self) -> list[Callable]:
self._get_option_from_provider_metadata_config_fallbacks,
]
- def _get_config_sources_for_as_dict(self) -> list[tuple[str,
ConfigParser]]:
+ @functools.cached_property
+ def configuration_description(self) -> dict[str, dict[str, Any]]:
+ """
+ Return configuration description from multiple sources.
+
+ Respects the ``_use_providers_configuration`` flag to decide whether
to include
+ provider configuration.
+
+ The priority order is as follows (later sources override earlier ones):
+
+ 1. The base configuration description provided in ``__init__``,
usually loaded
+ from ``config.yml`` in core.
+ 2. ``_provider_cfg_config_fallback_default_values``, loaded from
+ ``provider_config_fallback_defaults.cfg``.
+ 3. ``_provider_metadata_config_fallback_default_values``, loaded from
provider
+ packages' ``get_provider_info`` method (via ProvidersManager /
+ RuntimeProvidersManager's ``.provider_configs`` property).
+
+ We use ``cached_property`` to cache the merged result; clear this
cache (via
+ ``invalidate_cache``) when toggling ``_use_providers_configuration``.
+ """
+ if not self._use_providers_configuration:
+ return self._configuration_description
+
+ merged_description: dict[str, dict[str, Any]] =
deepcopy(self._configuration_description)
+
+ # Merge full provider config descriptions (with metadata like
sensitive, description, etc.)
+ # from provider packages' get_provider_info method, reusing the cached
raw dict.
+ for section, section_content in
self._provider_metadata_configuration_description.items():
+ if section not in merged_description:
+ merged_description[section] = deepcopy(section_content)
+ else:
+ existing_options =
merged_description[section].setdefault("options", {})
+ for option, option_content in section_content.get("options",
{}).items():
+ if option not in existing_options:
+ existing_options[option] = deepcopy(option_content)
+
+ # Merge default values from cfg-based fallbacks (key=value only, no
metadata).
+ # Uses setdefault so provider metadata values above take priority.
+ cfg = self._provider_cfg_config_fallback_default_values
+ for section in cfg.sections():
+ section_options = merged_description.setdefault(section,
{"options": {}}).setdefault(
+ "options", {}
+ )
+ for option in cfg.options(section):
+ opt_dict = section_options.setdefault(option, {})
+ opt_dict.setdefault("default", cfg.get(section, option))
+ # For cfg-only options with no provider metadata, infer
sensitivity from name.
+ if "sensitive" not in opt_dict and
option.endswith(("password", "secret")):
+ opt_dict["sensitive"] = True
+
+ return merged_description
+
+ @property
+ def _config_sources_for_as_dict(self) -> list[tuple[str, ConfigParser]]:
"""Override the base method to add provider fallbacks when providers
are loaded."""
sources: list[tuple[str, ConfigParser]] = [
("default", self._default_values),
("airflow.cfg", self),
]
- if self._providers_configuration_loaded:
- sources.insert(
- 0,
- (
- "provider-metadata-fallback-defaults",
- self._provider_metadata_config_fallback_default_values,
- ),
- )
- sources.insert(
- 0,
- ("provider-cfg-fallback-defaults",
self._provider_cfg_config_fallback_default_values),
- )
+ if not self._use_providers_configuration:
+ return sources
+
+ # Provider fallback defaults are added as the last source, so they
have the lowest priority.
+ sources += [
+ ("provider-cfg-fallback-defaults",
self._provider_cfg_config_fallback_default_values),
+ (
+ "provider-metadata-fallback-defaults",
+ self._provider_metadata_config_fallback_default_values,
+ ),
+ ]
Review Comment:
`as_dict()` resolves values by iterating sources in order and letting the
*last* source win (see comment at parser.py:1703-1704 and
`_replace_config_with_display_sources`). Here, provider fallback sources are
appended last, which makes provider fallbacks override `airflow.cfg` and core
defaults in `as_dict()` (and any consumer of it), contradicting the comment
that they have the lowest priority. Provider fallbacks should be placed
*before* core defaults (or otherwise ordered so they have the lowest
precedence).
##########
shared/configuration/src/airflow_shared/configuration/parser.py:
##########
@@ -279,24 +279,77 @@ def _lookup_sequence(self) -> list[Callable]:
self._get_option_from_provider_metadata_config_fallbacks,
]
- def _get_config_sources_for_as_dict(self) -> list[tuple[str,
ConfigParser]]:
+ @functools.cached_property
+ def configuration_description(self) -> dict[str, dict[str, Any]]:
+ """
+ Return configuration description from multiple sources.
+
+ Respects the ``_use_providers_configuration`` flag to decide whether
to include
+ provider configuration.
+
+ The priority order is as follows (later sources override earlier ones):
+
+ 1. The base configuration description provided in ``__init__``,
usually loaded
+ from ``config.yml`` in core.
+ 2. ``_provider_cfg_config_fallback_default_values``, loaded from
+ ``provider_config_fallback_defaults.cfg``.
+ 3. ``_provider_metadata_config_fallback_default_values``, loaded from
provider
+ packages' ``get_provider_info`` method (via ProvidersManager /
+ RuntimeProvidersManager's ``.provider_configs`` property).
+
+ We use ``cached_property`` to cache the merged result; clear this
cache (via
+ ``invalidate_cache``) when toggling ``_use_providers_configuration``.
Review Comment:
The docstring for `configuration_description` describes a priority order
where later sources override earlier ones, but the implementation only fills
missing sections/options (it never overrides existing entries) and it merges
provider metadata before the cfg fallback. Consider updating the docstring to
match the actual merge behavior/precedence to avoid confusion for future
maintainers.
```suggestion
The merged description is built as follows:
1. Start from the base configuration description provided in
``__init__``, usually
loaded from ``config.yml`` in core. Values defined here are never
overridden.
2. Merge full provider configuration descriptions from
``_provider_metadata_configuration_description``, loaded from
provider packages'
``get_provider_info`` method (via ProvidersManager /
RuntimeProvidersManager's ``.provider_configs`` property). This
step only adds
missing sections/options and does not overwrite any existing
entries from the
base configuration.
3. Merge default values from
``_provider_cfg_config_fallback_default_values``,
loaded from ``provider_config_fallback_defaults.cfg``. This step
only sets the
``"default"`` (and, heuristically, ``"sensitive"``) keys for
options that do not
already define them, and it does not overwrite values coming from
either the base
configuration or provider metadata.
In effect, base configuration values take precedence, then provider
metadata fills
in any missing descriptions/options, and finally cfg-based fallbacks
provide default
values only where none are defined. We use ``cached_property`` to
cache the merged
result; clear this cache (via ``invalidate_cache``) when toggling
``_use_providers_configuration``.
```
##########
scripts/in_container/run_check_default_configuration.py:
##########
@@ -36,23 +36,37 @@
if __name__ == "__main__":
with tempfile.TemporaryDirectory() as tmp_dir:
+ # We need to explicitly set the logging level to ERROR to avoid debug
logs from "airflow config lint" command that can spoil the output and make the
test fail.
+ # This is needed in case the default config has logging level set to
DEBUG, but it does not hurt to set it explicitly in any case to avoid any
unexpected debug logs from the command.
+ env = os.environ.copy()
+ env["AIRFLOW__LOGGING__LOGGING_LEVEL"] = "ERROR"
+
# Write default config cmd output to a temporary file
default_config_file = os.path.join(tmp_dir, "airflow.cfg")
with open(default_config_file, "w") as f:
- result = subprocess.run(list_default_config_cmd, check=False,
stdout=f)
+ result = subprocess.run(list_default_config_cmd, check=False,
stdout=f, env=env)
if result.returncode != 0:
print(f"\033[0;31mERROR: when running `{'
'.join(list_default_config_cmd)}`\033[0m\n")
+ print(result.stdout.decode())
Review Comment:
When `list_default_config_cmd` fails, `result.stdout` will be `None` because
stdout is redirected to a file (`stdout=f`) and not captured. Calling
`result.stdout.decode()` will raise an exception and hide the original failure.
Either capture output (e.g. `capture_output=True` / `stderr=PIPE`) or print a
different diagnostic (like `result.stderr` if captured, or the path to the
generated file).
```suggestion
result = subprocess.run(list_default_config_cmd, check=False,
stdout=f, stderr=subprocess.PIPE, env=env)
if result.returncode != 0:
print(f"\033[0;31mERROR: when running `{'
'.join(list_default_config_cmd)}`\033[0m\n")
if result.stderr:
print(result.stderr.decode())
print(f"Default config (if any) was written to:
{default_config_file}")
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]