This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push: new 3b8db612ff Add information about Qubole removal and make it possible to release it (#35492) 3b8db612ff is described below commit 3b8db612ff39abbf9e965081c859e9e439ed832d Author: Jarek Potiuk <ja...@potiuk.com> AuthorDate: Tue Nov 7 18:16:34 2023 +0100 Add information about Qubole removal and make it possible to release it (#35492) The Qubole provider has been agreed to be removed. We need to release one more relese for it with the message in the documentation and readme that it has been removed. This change adds necessary changes to release management process that release manager can specify such suspended provider to be released - both as a package and as documentation being built - by manually specifying such provider when: * preparing documentation * preparing relases * building documentation * publishing documentation Generally it works in the way that: * suspended providers are not visible as options or autocomplete in the corresponding commands or is not included in case "all" providers are chosen by any of the commands. * but when you specifically type the suspended provider as argument or filter in any of the relevant commands, the action will be performed for that provider. --- PROVIDERS.rst | 8 +- airflow/provider.yaml.schema.json | 4 + ...VIDERS.rst => MANAGING_PROVIDERS_LIFECYCLE.rst} | 173 ++++++++++++++++++++- .../SUSPENDING_AND_RESUMING_PROVIDERS.rst | 150 ------------------ airflow/providers/apache/hdfs/CHANGELOG.rst | 2 +- airflow/providers/apache/pig/CHANGELOG.rst | 2 +- airflow/providers/apache/spark/CHANGELOG.rst | 2 +- airflow/providers/qubole/CHANGELOG.rst | 25 ++- airflow/providers/qubole/provider.yaml | 7 +- dev/README_RELEASE_PROVIDER_PACKAGES.md | 84 ++++++---- .../commands/release_management_commands.py | 6 +- .../src/airflow_breeze/utils/common_options.py | 4 +- .../airflow_breeze/utils/publish_docs_builder.py | 2 +- .../airflow_breeze/utils/publish_docs_helpers.py | 21 +-- .../airflow_breeze/utils/suspended_providers.py | 22 ++- .../PROVIDER_INDEX_TEMPLATE.rst.jinja2 | 12 ++ .../PROVIDER_README_TEMPLATE.rst.jinja2 | 11 ++ dev/provider_packages/prepare_provider_packages.py | 54 ++++++- docs/apache-airflow-providers-qubole/index.rst | 2 +- docs/apache-airflow-providers/index.rst | 3 +- docs/build_docs.py | 2 - docs/conf.py | 2 +- docs/exts/docs_build/code_utils.py | 1 + docs/exts/docs_build/dev_index_generator.py | 4 +- docs/exts/docs_build/docs_builder.py | 12 +- docs/exts/docs_build/package_filter.py | 28 +++- docs/exts/provider_yaml_utils.py | 4 +- images/breeze/output-commands-hash.txt | 12 +- .../in_container/run_provider_yaml_files_check.py | 21 +++ 29 files changed, 430 insertions(+), 250 deletions(-) diff --git a/PROVIDERS.rst b/PROVIDERS.rst index f76593c859..93bdc719be 100644 --- a/PROVIDERS.rst +++ b/PROVIDERS.rst @@ -81,6 +81,12 @@ Community providers lifecycle This document describes the complete life-cycle of community providers - from inception and approval to Airflow main branch to being decommissioned and removed from the main branch in Airflow repository. +.. note:: + + Technical details on how to manage lifecycle of providers are described in the document: + + `Managing provider's lifecycle <https://github.com/apache/airflow/blob/main/airflow/providers/MANGING_PROVIDERS_LIFECYCLE.rst>`_ + Accepting new community providers --------------------------------- @@ -281,7 +287,7 @@ more updates to the providers done by the community - except maybe potentially s might be various reasons for the providers to be removed: * the service they connect to is no longer available -* the dependencies for the provider are not maintained any more and there is no viable alternative +* the dependencies for the provider are not maintained anymore and there is no viable alternative * there is another, more popular provider that supersedes community provider * etc. etc. diff --git a/airflow/provider.yaml.schema.json b/airflow/provider.yaml.schema.json index 567a44db73..da32cd1c40 100644 --- a/airflow/provider.yaml.schema.json +++ b/airflow/provider.yaml.schema.json @@ -25,6 +25,10 @@ "description": "If set to true, the provider is suspended and it's not a candidate for release nor contributes dependencies to constraint calculations/CI image. Tests are excluded.", "type:": "boolean" }, + "removed": { + "description": "If set to true, the provider is also removed and will be soon removed from the code", + "type:": "boolean" + }, "dependencies": { "description": "Dependencies that should be added to the provider", "type": "array", diff --git a/airflow/providers/CREATING_COMMUNITY_PROVIDERS.rst b/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst similarity index 55% rename from airflow/providers/CREATING_COMMUNITY_PROVIDERS.rst rename to airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst index d4e14e138c..0fd2e16215 100644 --- a/airflow/providers/CREATING_COMMUNITY_PROVIDERS.rst +++ b/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst @@ -1,4 +1,3 @@ - .. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -16,11 +15,11 @@ specific language governing permissions and limitations under the License. -Community Providers -=================== -How-to creating a new community provider ----------------------------------------- +.. contents:: :local: + +Creating a new community provider +================================= This document gathers the necessary steps to create a new community provider and also guidelines for updating the existing ones. You should be aware that providers may have distinctions that may not be covered in @@ -47,7 +46,7 @@ When you see this placeholder you must change for your provider name. Initial Code and Unit Tests -^^^^^^^^^^^^^^^^^^^^^^^^^^^ +--------------------------- Most likely you have developed a version of the provider using some local customization and now you need to transfer this code to the Airflow project. Below is described all the initial code structure that @@ -111,13 +110,13 @@ breeze and I'll run unit tests for my Hook. root@fafd8d630e46:/opt/airflow# python -m pytest tests/providers/<NEW_PROVIDER>/hook/<NEW_PROVIDER>.py Integration tests -^^^^^^^^^^^^^^^^^ +----------------- See `Airflow Integration Tests <https://github.com/apache/airflow/blob/main/TESTING.rst#airflow-integration-tests>`_ Documentation -^^^^^^^^^^^^^ +------------- An important part of building a new provider is the documentation. Some steps for documentation occurs automatically by ``pre-commit`` see `Installing pre-commit guide <https://github.com/apache/airflow/blob/main/CONTRIBUTORS_QUICK_START.rst#pre-commit>`_ @@ -285,3 +284,161 @@ main Airflow documentation that involves some steps with the providers is also w breeze build-docs --package-filter apache-airflow-providers-<NEW_PROVIDER> breeze build-docs --package-filter apache-airflow + + +Suspending providers +==================== + +As of April 2023, we have the possibility to suspend individual providers, so that they are not holding +back dependencies for Airflow and other providers. The process of suspending providers is described +in `description of the process <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#suspending-releases-for-providers>`_ + +Technically, suspending a provider is done by setting ``suspended : true``, in the provider.yaml of the +provider. This should be followed by committing the change and either automatically or manually running +pre-commit checks that will either update derived configuration files or ask you to update them manually. +Note that you might need to run pre-commit several times until all the static checks pass, +because modification from one pre-commit might impact other pre-commits. + +If you have pre-commit installed, pre-commit will be run automatically on commit. If you want to run it +manually after commit, you can run it via ``breeze static-checks --last-commit`` some of the tests might fail +because suspension of the provider might cause changes in the dependencies, so if you see errors about +missing dependencies imports, non-usable classes etc., you will need to build the CI image locally +via ``breeze build-image --python 3.8 --upgrade-to-newer-dependencies`` after the first pre-commit run +and then run the static checks again. + +If you want to be absolutely sure to run all static checks you can always do this via +``pre-commit run --all-files`` or ``breeze static-checks --all-files``. + +Some of the manual modifications you will have to do (in both cases ``pre-commit`` will guide you on what +to do. + +* You will have to run ``breeze setup regenerate-command-images`` to regenerate breeze help files +* you will need to update ``extra-packages-ref.rst`` and in some cases - when mentioned there explicitly - + ``setup.py`` to remove the provider from list of dependencies. + +What happens under-the-hood as a result, is that ``generated/providers.json`` file is updated with +the information about available providers and their dependencies and it is used by our tooling to +exclude suspended providers from all relevant parts of the build and CI system (such as building CI image +with dependencies, building documentation, running tests, etc.) + + +Additional changes needed for cross-dependent providers +======================================================= + +Those steps above are usually enough for most providers that are "standalone" and not imported or used by +other providers (in most cases we will not suspend such providers). However some extra steps might be needed +for providers that are used by other providers, or that are part of the default PROD Dockerfile: + +* Most of the tests for the suspended provider, will be automatically excluded by pytest collection. However, + in case a provider is dependent on by another provider, the relevant tests might fail to be collected or + run by ``pytest``. In such cases you should skip the whole test module failing to be collected by + adding ``pytest.importorskip`` at the top of the test module. + For example if your tests fail because they need to import ``apache.airflow.providers.google`` + and you have suspended it, you should add this line at the top of the test module that fails. + +Example failing collection after ``google`` provider has been suspended: + + .. code-block:: txt + + _____ ERROR collecting tests/providers/apache/beam/operators/test_beam.py ______ + ImportError while importing test module '/opt/airflow/tests/providers/apache/beam/operators/test_beam.py'. + Hint: make sure your test modules/packages have valid Python names. + Traceback: + /usr/local/lib/python3.8/importlib/__init__.py:127: in import_module + return _bootstrap._gcd_import(name[level:], package, level) + tests/providers/apache/beam/operators/test_beam.py:25: in <module> + from airflow.providers.apache.beam.operators.beam import ( + airflow/providers/apache/beam/operators/beam.py:35: in <module> + from airflow.providers.google.cloud.hooks.dataflow import ( + airflow/providers/google/cloud/hooks/dataflow.py:32: in <module> + from google.cloud.dataflow_v1beta3 import GetJobRequest, Job, JobState, JobsV1Beta3AsyncClient, JobView + E ModuleNotFoundError: No module named 'google.cloud.dataflow_v1beta3' + _ ERROR collecting tests/providers/microsoft/azure/transfers/test_azure_blob_to_gcs.py _ + + +The fix is to add this line at the top of the ``tests/providers/apache/beam/operators/test_beam.py`` module: + + .. code-block:: python + + pytest.importorskip("apache.airflow.providers.google") + + +* Some of the other providers might also just import unconditionally the suspended provider and they will + fail during the provider verification step in CI. In this case you should turn the provider imports + into conditional imports. For example when import fails after ``amazon`` provider has been suspended: + + .. code-block:: txt + + Traceback (most recent call last): + File "/opt/airflow/scripts/in_container/verify_providers.py", line 266, in import_all_classes + _module = importlib.import_module(modinfo.name) + File "/usr/local/lib/python3.8/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name, package, level) + File "<frozen importlib._bootstrap>", line 1006, in _gcd_import + File "<frozen importlib._bootstrap>", line 983, in _find_and_load + File "<frozen importlib._bootstrap>", line 967, in _find_and_load_unlocked + File "<frozen importlib._bootstrap>", line 677, in _load_unlocked + File "<frozen importlib._bootstrap_external>", line 728, in exec_module + File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed + File "/usr/local/lib/python3.8/site-packages/airflow/providers/mysql/transfers/s3_to_mysql.py", line 23, in <module> + from airflow.providers.amazon.aws.hooks.s3 import S3Hook + ModuleNotFoundError: No module named 'airflow.providers.amazon' + +or: + + .. code-block:: txt + + Error: The ``airflow.providers.microsoft.azure.transfers.azure_blob_to_gcs`` object in transfers list in + airflow/providers/microsoft/azure/provider.yaml does not exist or is not a module: + No module named 'gcloud.aio.storage' + +The fix for that is to turn the feature into an optional provider feature (in the place where the excluded +``airflow.providers`` import happens: + + .. code-block:: python + + try: + from airflow.providers.amazon.aws.hooks.s3 import S3Hook + except ImportError as e: + from airflow.exceptions import AirflowOptionalProviderFeatureException + + raise AirflowOptionalProviderFeatureException(e) + + +* In case we suspend an important provider, which is part of the default Dockerfile you might want to + update the tests for PROD docker image in ``docker_tests/test_prod_image.py``. + +* Some of the suspended providers might also fail ``breeze`` unit tests that expect a fixed set of providers. + Those tests should be adjusted (but this is not very likely to happen, because the tests are using only + the most common providers that we will not be likely to suspend). + + +Resuming providers +================== + +Resuming providers is done by reverting the original change that suspended it. In case there are changes +needed to fix problems in the reverted provider, our CI will detect them and you will have to fix them +as part of the PR reverting the suspension. + + +Removing providers +================== + +When removing providers from Airflow code, we need to make one last release where we mark the provider as +removed - in documentation and in description of the PyPI package. In order to that release manager has to +add "removed: true" flag in the provider yaml file and include the provider in the next wave of the +providers (and then remove all the code and documentation related to the provider). + +The "removed: true" flag will cause the provider to be available for the following commands (note that such +provider has to be explicitly added as selected to the package - such provider will not be included in +the available list of providers): + +* ``breeze build-docs`` +* ``breeze release-management prepare-provider-documentation`` +* ``breeze release-management prepare-provider-packages`` +* ``breeze release-management publish-docs`` + +For all those commands, release manager needs to specify such to-be-removed provider explicitly as extra +command during the release process. Except the changelog that needs to be maintained manually, all other +documentation (main page of the provider documentation, PyPI README), will be automatically updated +to include removal notice. diff --git a/airflow/providers/SUSPENDING_AND_RESUMING_PROVIDERS.rst b/airflow/providers/SUSPENDING_AND_RESUMING_PROVIDERS.rst deleted file mode 100644 index 111b550633..0000000000 --- a/airflow/providers/SUSPENDING_AND_RESUMING_PROVIDERS.rst +++ /dev/null @@ -1,150 +0,0 @@ - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - -Suspending providers -==================== - -As of April 2023, we have the possibility to suspend individual providers, so that they are not holding -back dependencies for Airflow and other providers. The process of suspending providers is described -in `description of the process <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#suspending-releases-for-providers>`_ - -Technically, suspending a provider is done by setting ``suspended : true``, in the provider.yaml of the -provider. This should be followed by committing the change and either automatically or manually running -pre-commit checks that will either update derived configuration files or ask you to update them manually. -Note that you might need to run pre-commit several times until all the static checks pass, -because modification from one pre-commit might impact other pre-commits. - -If you have pre-commit installed, pre-commit will be run automatically on commit. If you want to run it -manually after commit, you can run it via ``breeze static-checks --last-commit`` some of the tests might fail -because suspension of the provider might cause changes in the dependencies, so if you see errors about -missing dependencies imports, non-usable classes etc., you will need to build the CI image locally -via ``breeze build-image --python 3.8 --upgrade-to-newer-dependencies`` after the first pre-commit run -and then run the static checks again. - -If you want to be absolutely sure to run all static checks you can always do this via -``pre-commit run --all-files`` or ``breeze static-checks --all-files``. - -Some of the manual modifications you will have to do (in both cases ``pre-commit`` will guide you on what -to do. - -* You will have to run ``breeze setup regenerate-command-images`` to regenerate breeze help files -* you will need to update ``extra-packages-ref.rst`` and in some cases - when mentioned there explicitly - - ``setup.py`` to remove the provider from list of dependencies. - -What happens under-the-hood as the result, is that ``generated/providers.json`` file is updated with -the information about available providers and their dependencies and it is used by our tooling to -exclude suspended providers from all relevant parts of the build and CI system (such as building CI image -with dependencies, building documentation, running tests, etc.) - - -Additional changes needed for cross-dependent providers -======================================================= - -Those steps above are usually enough for most providers that are "standalone" and not imported or used by -other providers (in most cases we will not suspend such providers). However some extra steps might be needed -for providers that are used by other providers, or that are part of the default PROD Dockerfile: - -* Most of the tests for the suspended provider, will be automatically excluded by pytest collection. However, - in case a provider is dependent on by another provider, the relevant tests might fail to be collected or - run by ``pytest``. In such cases you should skip the whole test module failing to be collected by - adding ``pytest.importorskip`` at the top of the test module. - For example if your tests fail because they need to import ``apache.airflow.providers.google`` - and you have suspended it, you should add this line at the top of the test module that fails. - -Example failing collection after ``google`` provider has been suspended: - - .. code-block:: txt - - _____ ERROR collecting tests/providers/apache/beam/operators/test_beam.py ______ - ImportError while importing test module '/opt/airflow/tests/providers/apache/beam/operators/test_beam.py'. - Hint: make sure your test modules/packages have valid Python names. - Traceback: - /usr/local/lib/python3.8/importlib/__init__.py:127: in import_module - return _bootstrap._gcd_import(name[level:], package, level) - tests/providers/apache/beam/operators/test_beam.py:25: in <module> - from airflow.providers.apache.beam.operators.beam import ( - airflow/providers/apache/beam/operators/beam.py:35: in <module> - from airflow.providers.google.cloud.hooks.dataflow import ( - airflow/providers/google/cloud/hooks/dataflow.py:32: in <module> - from google.cloud.dataflow_v1beta3 import GetJobRequest, Job, JobState, JobsV1Beta3AsyncClient, JobView - E ModuleNotFoundError: No module named 'google.cloud.dataflow_v1beta3' - _ ERROR collecting tests/providers/microsoft/azure/transfers/test_azure_blob_to_gcs.py _ - - -The fix is to add this line at the top of the ``tests/providers/apache/beam/operators/test_beam.py`` module: - - .. code-block:: python - - pytest.importorskip("apache.airflow.providers.google") - - -* Some of the other providers might also just import unconditionally the suspended provider and they will - fail during provider verification step in CI. In this case you should turn the provider imports - into conditional imports. For example when import fails after ``amazon`` provider has been suspended: - - .. code-block:: txt - - Traceback (most recent call last): - File "/opt/airflow/scripts/in_container/verify_providers.py", line 266, in import_all_classes - _module = importlib.import_module(modinfo.name) - File "/usr/local/lib/python3.8/importlib/__init__.py", line 127, in import_module - return _bootstrap._gcd_import(name, package, level) - File "<frozen importlib._bootstrap>", line 1006, in _gcd_import - File "<frozen importlib._bootstrap>", line 983, in _find_and_load - File "<frozen importlib._bootstrap>", line 967, in _find_and_load_unlocked - File "<frozen importlib._bootstrap>", line 677, in _load_unlocked - File "<frozen importlib._bootstrap_external>", line 728, in exec_module - File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed - File "/usr/local/lib/python3.8/site-packages/airflow/providers/mysql/transfers/s3_to_mysql.py", line 23, in <module> - from airflow.providers.amazon.aws.hooks.s3 import S3Hook - ModuleNotFoundError: No module named 'airflow.providers.amazon' - -or: - - .. code-block:: txt - - Error: The ``airflow.providers.microsoft.azure.transfers.azure_blob_to_gcs`` object in transfers list in - airflow/providers/microsoft/azure/provider.yaml does not exist or is not a module: - No module named 'gcloud.aio.storage' - -The fix for that is to turn the feature into an optional provider feature (in the place where the excluded -``airflow.providers`` import happens: - - .. code-block:: python - - try: - from airflow.providers.amazon.aws.hooks.s3 import S3Hook - except ImportError as e: - from airflow.exceptions import AirflowOptionalProviderFeatureException - - raise AirflowOptionalProviderFeatureException(e) - - -* In case we suspend an important provider, which is part of the default Dockerfile you might want to - update the tests for PROD docker image in ``docker_tests/test_prod_image.py``. - -* Some of the suspended providers might also fail ``breeze`` unit tests that expect a fixed set of providers. - Those tests should be adjusted (but this is not very likely to happen, because the tests are using only - the most common providers that we will not be likely to suspend). - - -Resuming providers -================== - -Resuming providers is done by reverting the original change that suspended it. In case there are changes -needed to fix problems in the reverted provider, our CI will detect them and you will have to fix them -as part of the PR reverting the suspension. diff --git a/airflow/providers/apache/hdfs/CHANGELOG.rst b/airflow/providers/apache/hdfs/CHANGELOG.rst index 9ef49f6c23..15fd23033e 100644 --- a/airflow/providers/apache/hdfs/CHANGELOG.rst +++ b/airflow/providers/apache/hdfs/CHANGELOG.rst @@ -97,7 +97,7 @@ The 3.* version of the provider is still available and can be used if you need t sensors. The ``HDFSHook``, ``HDFSSensor``, ``HdfsRegexSensor``, ``HdfsRegexSensor`` that have been removed from -this provider and they are not available any more. If you want to continue using them, +this provider and they are not available anymore. If you want to continue using them, you can use 3.* version of the provider, but the recommendation is to switch to the new ``WebHDFSHook`` and ``WebHDFSSensor`` that use the ``WebHDFS`` API. diff --git a/airflow/providers/apache/pig/CHANGELOG.rst b/airflow/providers/apache/pig/CHANGELOG.rst index a6c81be6a5..642c0631a0 100644 --- a/airflow/providers/apache/pig/CHANGELOG.rst +++ b/airflow/providers/apache/pig/CHANGELOG.rst @@ -108,7 +108,7 @@ Misc Breaking changes ~~~~~~~~~~~~~~~~ -You cannot use ``pig_properties`` any more as connection extras. If you want to add extra parameters +You cannot use ``pig_properties`` anymore as connection extras. If you want to add extra parameters to ``pig`` command, you need to do it via ``pig_properties`` (string list) of the PigCliHook (new parameter) or via ``pig_opts`` (string with options separated by spaces) or ``pig_properties`` (string list) in the PigOperator . Any use of ``pig_properties`` extras in connection will raise an exception, diff --git a/airflow/providers/apache/spark/CHANGELOG.rst b/airflow/providers/apache/spark/CHANGELOG.rst index cd23992d98..e1115cb425 100644 --- a/airflow/providers/apache/spark/CHANGELOG.rst +++ b/airflow/providers/apache/spark/CHANGELOG.rst @@ -171,7 +171,7 @@ Breaking changes The ``spark-binary`` connection extra could be set to any binary, but with 4.0.0 version only two values are allowed for it ``spark-submit`` and ``spark2-submit``. -The ``spark-home`` connection extra is not allowed any more - the binary should be available on the +The ``spark-home`` connection extra is not allowed anymore - the binary should be available on the PATH in order to use SparkSubmitHook and SparkSubmitOperator. * ``Remove custom spark home and custom binaries for spark (#27646)`` diff --git a/airflow/providers/qubole/CHANGELOG.rst b/airflow/providers/qubole/CHANGELOG.rst index 065e088ca1..f676e31531 100644 --- a/airflow/providers/qubole/CHANGELOG.rst +++ b/airflow/providers/qubole/CHANGELOG.rst @@ -27,13 +27,32 @@ Changelog --------- -.. note:: - This release of provider is only available for Airflow 2.5+ as explained in the - `Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_. +3.4.3 +..... + +.. warning:: + + This is the last release of the provider. + + This provider is not maintained anymore by the community. It has been removed and is not going to be + updated anymore. The removal was done according to the process described in + `Removing community providers <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#removing-community-providers>`_ + + Feel free to contact Airflow Development Mailing List if you have any questions. + +Misc +~~~~ + +* ``The provider is removed and not maintained anymore by the Airflow Community`` + 3.4.2 ..... +.. note:: + This release of provider is only available for Airflow 2.5+ as explained in the + `Apache Airflow providers support policy <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#minimum-supported-version-of-airflow-for-community-managed-providers>`_. + Misc ~~~~ diff --git a/airflow/providers/qubole/provider.yaml b/airflow/providers/qubole/provider.yaml index 7f6c80ba14..3ffcdbe518 100644 --- a/airflow/providers/qubole/provider.yaml +++ b/airflow/providers/qubole/provider.yaml @@ -21,12 +21,11 @@ name: Qubole description: | `Qubole <https://www.qubole.com/>`__ -# Qubole has been acquired and seems that maintainers have left the project -# https://github.com/qubole/qds-sdk-py#where-are-the-maintainers- -# the package has been unmaintained for a long time and it's likely no-one uses it -# until someone steps up to maintain it, we suspend it suspended: true +removed: true # TODO: The provider is marked for removal after it's last release is prepared + versions: + - 3.4.3 - 3.4.2 - 3.4.1 - 3.4.0 diff --git a/dev/README_RELEASE_PROVIDER_PACKAGES.md b/dev/README_RELEASE_PROVIDER_PACKAGES.md index ae0598cf8b..4eee0d1c35 100644 --- a/dev/README_RELEASE_PROVIDER_PACKAGES.md +++ b/dev/README_RELEASE_PROVIDER_PACKAGES.md @@ -133,6 +133,17 @@ Details about maintaining the SEMVER version are going to be discussed and imple breeze release-management prepare-provider-documentation [packages] ``` +NOTE! When you want to release a provider marked for removal (needed in order to prepare last release of the +provider), documentation for the provider will not be prepared when you prepare documentation for +all providers - you have to specifically use the provider name in a separate command. +For example to prepare documentation for `qubole` provider marked for removal you need to run +separately this command: + +```shell script +breeze release-management prepare-provider-documentation qubole +``` + + This command will not only prepare documentation but will also help the release manager to review changes implemented in all providers, and determine which of the providers should be released. For each provider details will be printed on what changes were implemented since the last release including @@ -205,6 +216,18 @@ if you only build few packages, run: breeze release-management prepare-provider-packages --package-format both PACKAGE PACKAGE .... ``` + +NOTE! When you want to release a provider marked for removal (needed in order to prepare last release of the +provider), package for the provider will not be prepared when you prepare documentation for +all providers - you have to specifically use the provider name in a separate command. +For example to prepare documentation for `qubole` provider marked for removal you need to run +separately this command: + +```shell script +breeze release-management prepare-provider-packages --package-format both qubole +``` + + * Sign all your packages ```shell script @@ -274,6 +297,17 @@ if you only build few packages, run: breeze release-management prepare-provider-packages --version-suffix-for-pypi rc1 --package-format both PACKAGE PACKAGE .... ``` +NOTE! When you want to release a provider marked for removal (needed in order to prepare last release of the +provider), package for the provider will not be prepared when you prepare documentation for +all providers - you have to specifically use the provider name in a separate command. +For example to prepare documentation for `qubole` provider marked for removal you need to run +separately this command: + +```shell script +breeze release-management prepare-provider-packages --package-format both qubole +``` + + * Verify the artifacts that would be uploaded: ```shell script @@ -353,39 +387,32 @@ git pull --rebase ```shell script cd "${AIRFLOW_REPO_ROOT}" -breeze build-docs --clean-build --package-filter apache-airflow-providers \ - --package-filter 'apache-airflow-providers-*' +breeze build-docs --clean-build providers-index --package-filter 'apache-airflow-providers-*' ``` Usually when we release packages we also build documentation for the "documentation-only" packages. This means that unless we release just few selected packages or if we need to deliberately skip some packages we should release documentation for all provider packages and the above command is the one to use. -If we want to just release some providers you can release them in this way: - -```shell script -cd "${AIRFLOW_REPO_ROOT}" -breeze build-docs --clean-build \ - --package-filter apache-airflow-providers \ - --package-filter 'apache-airflow-providers-PACKAGE1' \ - --package-filter 'apache-airflow-providers-PACKAGE2' \ - ... -``` - -You can also use shorthand names as arguments instead of using the full names -for airflow providers. Example: +If we want to just release some providers you can release them using package names: ```shell script cd "${AIRFLOW_REPO_ROOT}" breeze build-docs providers-index cncf.kubernetes sftp --clean-build ``` -If you have providers as list of provider ids because you just released them, you can build them with + +NOTE! When you want to release a provider marked for removal (needed in order to prepare last release of the +provider), doc for the provider will not be built when you prepare documentation for +all providers - you have to specifically use the provider name in a separate command. +For example to prepare documentation for `qubole` provider marked for removal you need to run +separately this command: ```shell script -breeze build-docs --clean-build amazon apache.beam google .... +breeze build-docs qubole ``` + - Now you can preview the documentation. ```shell script @@ -401,9 +428,7 @@ way faster on multi-cpu machines when you are publishing multiple providers: ```shell script cd "${AIRFLOW_REPO_ROOT}" -breeze release-management publish-docs \ - --package-filter apache-airflow-providers \ - --package-filter 'apache-airflow-providers-*' \ +breeze release-management publish-docs providers-index --package-filter 'apache-airflow-providers-*' \ --override-versioned --run-in-parallel breeze release-management add-back-references all-providers @@ -420,22 +445,21 @@ If you have providers as list of provider ids because you just released them you ```shell script cd "${AIRFLOW_REPO_ROOT}" -breeze release-management publish-docs providers-index amazon cncf.kubernetes --override-versioned --run-in-parallel - -breeze release-management add-back-references amazon cncf.kubernetes +breeze release-management publish-docs amazon apache.beam google .... +breeze release-management add-back-references all-providers ``` -or with +NOTE! When you want to release a provider marked for removal (needed in order to prepare last release of the +provider), docs for the provider will not be published when you prepare documentation for +all providers - you have to specifically use the provider name in a separate command. +For example to prepare documentation for `qubole` provider marked for removal you need to run +separately this command: ```shell script -cd "${AIRFLOW_REPO_ROOT}" - -./dev/provider_packages/publish_provider_documentation.sh amazon apache.beam google .... - -# No need to add back references as the script has this step as integral part +breeze release-management publish-docs qubole +breeze release-management add-back-references all-providers ``` - - If you publish a new package, you must add it to [the docs index](https://github.com/apache/airflow-site/blob/master/landing-pages/site/content/en/docs/_index.md): diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index af81359fe5..414529e761 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -121,7 +121,7 @@ from airflow_breeze.utils.run_utils import ( run_compile_www_assets, ) from airflow_breeze.utils.shared_options import get_dry_run, get_forced_answer, get_verbose -from airflow_breeze.utils.suspended_providers import get_suspended_provider_ids +from airflow_breeze.utils.suspended_providers import get_removed_provider_ids option_debug_release_management = click.option( "--debug", @@ -311,13 +311,13 @@ def prepare_provider_packages( cleanup_python_generated_files() packages_list = list(packages) - suspended_provider_ids = get_suspended_provider_ids() + removed_provider_ids = get_removed_provider_ids() if package_list_file: packages_list.extend( [ package.strip() for package in package_list_file.readlines() - if package.strip() not in suspended_provider_ids + if package.strip() not in removed_provider_ids ] ) shell_params = ShellParams( diff --git a/dev/breeze/src/airflow_breeze/utils/common_options.py b/dev/breeze/src/airflow_breeze/utils/common_options.py index 7d8351bef4..3da2e19d8d 100644 --- a/dev/breeze/src/airflow_breeze/utils/common_options.py +++ b/dev/breeze/src/airflow_breeze/utils/common_options.py @@ -458,7 +458,7 @@ argument_packages = click.argument( "packages", nargs=-1, required=False, - type=BetterChoice(get_available_documentation_packages(short_version=True)), + type=NotVerifiedBetterChoice(get_available_documentation_packages(short_version=True)), ) argument_short_doc_packages = click.argument( "short_doc_packages", @@ -471,7 +471,7 @@ argument_short_doc_packages_with_providers_index = click.argument( "short_doc_packages", nargs=-1, required=False, - type=BetterChoice( + type=NotVerifiedBetterChoice( ["all-providers", PROVIDERS_INDEX_KEY, *get_available_documentation_packages(short_version=True)] ), ) diff --git a/dev/breeze/src/airflow_breeze/utils/publish_docs_builder.py b/dev/breeze/src/airflow_breeze/utils/publish_docs_builder.py index e5dcc1be22..b8e64d66f3 100644 --- a/dev/breeze/src/airflow_breeze/utils/publish_docs_builder.py +++ b/dev/breeze/src/airflow_breeze/utils/publish_docs_builder.py @@ -95,7 +95,7 @@ class PublishDocsBuilder: if self.package_name == "apache-airflow": return get_airflow_version() if self.package_name.startswith("apache-airflow-providers-"): - all_providers_yaml = load_package_data() + all_providers_yaml = load_package_data(include_suspended=True) provider = next(p for p in all_providers_yaml if p["package-name"] == self.package_name) return provider["versions"][0] if self.package_name == "helm-chart": diff --git a/dev/breeze/src/airflow_breeze/utils/publish_docs_helpers.py b/dev/breeze/src/airflow_breeze/utils/publish_docs_helpers.py index e43eca408d..d81bc23904 100644 --- a/dev/breeze/src/airflow_breeze/utils/publish_docs_helpers.py +++ b/dev/breeze/src/airflow_breeze/utils/publish_docs_helpers.py @@ -27,6 +27,7 @@ from typing import Any import yaml from airflow_breeze.utils.general_utils import get_docs_filter_name_from_short_hand +from airflow_breeze.utils.suspended_providers import get_removed_provider_ids CONSOLE_WIDTH = 180 @@ -59,7 +60,7 @@ def get_provider_yaml_paths(): return sorted(glob(f"{ROOT_DIR}/airflow/providers/**/provider.yaml", recursive=True)) -def load_package_data() -> list[dict[str, Any]]: +def load_package_data(include_suspended: bool = False) -> list[dict[str, Any]]: """ Load all data from providers files @@ -76,7 +77,7 @@ def load_package_data() -> list[dict[str, Any]]: jsonschema.validate(provider, schema=schema) except jsonschema.ValidationError: raise Exception(f"Unable to parse: {provider_yaml_path}.") - if provider["suspended"]: + if provider["suspended"] and not include_suspended: continue provider_yaml_dir = os.path.dirname(provider_yaml_path) provider["python-module"] = _filepath_to_module(provider_yaml_dir) @@ -86,12 +87,10 @@ def load_package_data() -> list[dict[str, Any]]: return result -def get_available_packages(): +def get_available_packages(include_suspended: bool = False): """Get list of all available packages to build.""" - all_providers_yaml = load_package_data() - provider_package_names = [ - provider["package-name"] for provider in all_providers_yaml if not provider.get("suspended") - ] + all_providers_yaml = load_package_data(include_suspended=include_suspended) + provider_package_names = [provider["package-name"] for provider in all_providers_yaml] return [ "apache-airflow", "docker-stack", @@ -113,15 +112,19 @@ def process_package_filters( package_filters = list(package_filters + get_docs_filter_name_from_short_hand(packages_short_form)) + removed_packages = [ + f"apache-airflow-providers-{provider.replace('.','-')}" for provider in get_removed_provider_ids() + ] + all_packages_including_removed = available_packages + removed_packages invalid_filters = [ - f for f in package_filters if not any(fnmatch.fnmatch(p, f) for p in available_packages) + f for f in package_filters if not any(fnmatch.fnmatch(p, f) for p in all_packages_including_removed) ] if invalid_filters: raise SystemExit( f"Some filters did not find any package: {invalid_filters}, Please check if they are correct." ) - return [p for p in available_packages if any(fnmatch.fnmatch(p, f) for f in package_filters)] + return [p for p in all_packages_including_removed if any(fnmatch.fnmatch(p, f) for f in package_filters)] def pretty_format_path(path: str, start: str) -> str: diff --git a/dev/breeze/src/airflow_breeze/utils/suspended_providers.py b/dev/breeze/src/airflow_breeze/utils/suspended_providers.py index 490393e86f..be918e3565 100644 --- a/dev/breeze/src/airflow_breeze/utils/suspended_providers.py +++ b/dev/breeze/src/airflow_breeze/utils/suspended_providers.py @@ -16,6 +16,9 @@ # under the License. from __future__ import annotations +import sys + +from airflow_breeze.utils.console import get_console from airflow_breeze.utils.path_utils import AIRFLOW_PROVIDERS_ROOT, AIRFLOW_SOURCES_ROOT @@ -38,17 +41,22 @@ def get_suspended_providers_folders() -> list[str]: return suspended_providers -def get_suspended_provider_ids() -> list[str]: +def get_removed_provider_ids() -> list[str]: """ Yields the ids of suspended providers. """ import yaml - suspended_provider_ids = [] + removed_provider_ids = [] for provider_path in AIRFLOW_PROVIDERS_ROOT.rglob("provider.yaml"): provider_yaml = yaml.safe_load(provider_path.read_text()) - if provider_yaml.get("suspended"): - suspended_provider_ids.append( - provider_yaml["package-name"][len("apache-airflow-providers-") :].replace("-", ".") - ) - return suspended_provider_ids + package_name = provider_yaml.get("package-name") + if provider_yaml.get("removed", False): + if not provider_yaml.get("suspended"): + get_console().print( + f"[error]The provider {package_name} is marked for removal in provider.yaml, but " + f"not suspended. Please suspend the provider first before removing it.\n" + ) + sys.exit(1) + removed_provider_ids.append(package_name[len("apache-airflow-providers-") :].replace("-", ".")) + return removed_provider_ids diff --git a/dev/provider_packages/PROVIDER_INDEX_TEMPLATE.rst.jinja2 b/dev/provider_packages/PROVIDER_INDEX_TEMPLATE.rst.jinja2 index bd34551a48..4e7943ced2 100644 --- a/dev/provider_packages/PROVIDER_INDEX_TEMPLATE.rst.jinja2 +++ b/dev/provider_packages/PROVIDER_INDEX_TEMPLATE.rst.jinja2 @@ -45,6 +45,18 @@ Provider package This is a provider package for ``{{PROVIDER_PACKAGE_ID}}`` provider. All classes for this provider package are in ``{{FULL_PACKAGE_NAME}}`` python package. +{%- if PROVIDER_REMOVED %} + + .. warning:: + + This provider is not maintained anymore by the community. It has been removed and is not going to be + updated anymore. The removal was done according to the process described in + `Removing community providers <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#removing-community-providers>`_ + + Feel free to contact Airflow Development Mailing List if you have any questions. + +{%- endif %} + Installation ------------ diff --git a/dev/provider_packages/PROVIDER_README_TEMPLATE.rst.jinja2 b/dev/provider_packages/PROVIDER_README_TEMPLATE.rst.jinja2 index 83ad288e25..13e088aa94 100644 --- a/dev/provider_packages/PROVIDER_README_TEMPLATE.rst.jinja2 +++ b/dev/provider_packages/PROVIDER_README_TEMPLATE.rst.jinja2 @@ -57,6 +57,17 @@ are in ``{{FULL_PACKAGE_NAME}}`` python package. You can find package information and changelog for the provider in the `documentation <https://airflow.apache.org/docs/{{ PACKAGE_PIP_NAME }}/{{RELEASE}}/>`_. +{%- if PROVIDER_REMOVED %} + + .. warning:: + + This provider is not maintained anymore by the community. It has been removed and is not going to be + updated anymore. The removal was done according to the process described in + `Removing community providers <https://github.com/apache/airflow/blob/main/PROVIDERS.rst#removing-community-providers>`_ + + Feel free to contact Airflow Development Mailing List if you have any questions. + +{%- endif %} Installation ------------ diff --git a/dev/provider_packages/prepare_provider_packages.py b/dev/provider_packages/prepare_provider_packages.py index dc95c54950..05789e65e9 100755 --- a/dev/provider_packages/prepare_provider_packages.py +++ b/dev/provider_packages/prepare_provider_packages.py @@ -147,6 +147,7 @@ class ProviderPackageDetails(NamedTuple): versions: list[str] excluded_python_versions: list[str] plugins: list[PluginInfo] + removed: bool class EntityType(Enum): @@ -345,9 +346,12 @@ def get_install_requirements(provider_package_id: str, version_suffix: str) -> s return install_clause + ".dev0" return install_clause - install_requires = [ - apply_version_suffix(clause) for clause in ALL_DEPENDENCIES[provider_package_id][DEPS] - ] + if provider_package_id in get_removed_provider_ids(): + provider_info = get_provider_info_from_provider_yaml(provider_package_id) + dependencies = provider_info["dependencies"] + else: + dependencies = ALL_DEPENDENCIES[provider_package_id][DEPS] + install_requires = [apply_version_suffix(clause) for clause in dependencies] return "".join(f"\n {ir}" for ir in install_requires) @@ -370,6 +374,8 @@ def get_package_extras(provider_package_id: str) -> dict[str, list[str]]: """ if provider_package_id == "providers": return {} + if provider_package_id in get_removed_provider_ids(): + return {} extras_dict: dict[str, list[str]] = { module: [get_pip_package_name(module)] for module in ALL_DEPENDENCIES[provider_package_id][CROSS_PROVIDERS_DEPS] @@ -674,6 +680,8 @@ def get_cross_provider_dependent_packages(provider_package_id: str) -> list[str] :param provider_package_id: package id :return: list of cross-provider dependencies """ + if provider_package_id in get_removed_provider_ids(): + return [] return ALL_DEPENDENCIES[provider_package_id][CROSS_PROVIDERS_DEPS] @@ -1077,6 +1085,7 @@ def get_provider_details(provider_package_id: str) -> ProviderPackageDetails: versions=provider_info["versions"], excluded_python_versions=provider_info.get("excluded-python-versions") or [], plugins=plugins, + removed=provider_info.get("removed", False), ) @@ -1162,6 +1171,7 @@ def get_provider_jinja_context( "PLUGINS": provider_details.plugins, "MIN_AIRFLOW_VERSION": min_airflow_version, "PREINSTALLED_PROVIDER": provider_details.provider_package_id in PREINSTALLED_PROVIDERS, + "PROVIDER_REMOVED": provider_details.removed, } return context @@ -1670,16 +1680,46 @@ def get_all_providers() -> list[str]: return list(ALL_PROVIDERS) +def get_removed_provider_ids() -> list[str]: + """ + Yields the ids of suspended providers. + """ + import yaml + + removed_provider_ids = [] + for provider_path in PROVIDERS_PATH.rglob("provider.yaml"): + provider_yaml = yaml.safe_load(provider_path.read_text()) + package_name = provider_yaml.get("package-name") + if provider_yaml.get("removed", False): + if not provider_yaml.get("suspended"): + console.print( + f"[error]The provider {package_name} is marked for removal in provider.yaml, but " + f"not suspended. Please suspend the provider first before removing it.\n" + ) + sys.exit(1) + removed_provider_ids.append(package_name[len("apache-airflow-providers-") :].replace("-", ".")) + return removed_provider_ids + + def verify_provider_package(provider_package_id: str) -> None: """Verifies if the provider package is good. :param provider_package_id: package id to verify """ if provider_package_id not in get_all_providers(): - console.print(f"[red]Wrong package name: {provider_package_id}[/]") - console.print("Use one of:") - console.print(get_all_providers()) - raise Exception(f"The package {provider_package_id} is not a provider package.") + if provider_package_id in get_removed_provider_ids(): + console.print() + console.print( + f"[yellow]The package: {provider_package_id} is suspended, but " + f"since you asked for it, it will be built [/]" + ) + console.print() + else: + console.print(f"[red]Wrong package name: {provider_package_id}[/]") + console.print("Use one of:") + console.print(get_all_providers()) + console.print(f"[red]The package {provider_package_id} is not a provider package.") + sys.exit(1) def verify_changelog_exists(package: str) -> str: diff --git a/docs/apache-airflow-providers-qubole/index.rst b/docs/apache-airflow-providers-qubole/index.rst index 8b0c91bac2..f4d9db0353 100644 --- a/docs/apache-airflow-providers-qubole/index.rst +++ b/docs/apache-airflow-providers-qubole/index.rst @@ -59,6 +59,7 @@ PyPI Repository <https://pypi.org/project/apache-airflow-providers-qubole/> Installing from sources <installing-providers-from-sources> + .. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME! @@ -75,7 +76,6 @@ Package apache-airflow-providers-qubole `Qubole <https://www.qubole.com/>`__ - Release: 3.4.2 Provider package diff --git a/docs/apache-airflow-providers/index.rst b/docs/apache-airflow-providers/index.rst index 8b8390e5e6..2d4cf4012e 100644 --- a/docs/apache-airflow-providers/index.rst +++ b/docs/apache-airflow-providers/index.rst @@ -184,7 +184,8 @@ provider packages are automatically documented in the release notes of every pro If you want to contribute to ``Apache Airflow``, you can see how to build and extend community -managed providers in ``https://github.com/apache/airflow/blob/main/airflow/providers/CREATING_COMMUNITY_PROVIDERS.rst``. +managed providers in +``https://github.com/apache/airflow/blob/main/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst``. .. toctree:: :hidden: diff --git a/docs/build_docs.py b/docs/build_docs.py index c5e16d46ce..aaba029b60 100755 --- a/docs/build_docs.py +++ b/docs/build_docs.py @@ -430,7 +430,6 @@ def main(): disable_provider_checks = args.disable_provider_checks disable_checks = args.disable_checks package_filters = args.package_filter - with with_group("Available packages"): for pkg in sorted(available_packages): console.print(f" - {pkg}") @@ -438,7 +437,6 @@ def main(): if package_filters: console.print("Current package filters: ", package_filters) current_packages = process_package_filters(available_packages, package_filters) - with with_group("Fetching inventories"): # Inventories that could not be retrieved should be built first. This may mean this is a # new package. diff --git a/docs/conf.py b/docs/conf.py index d70845cda4..3c7cfa26b0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -63,7 +63,7 @@ if PACKAGE_NAME == "apache-airflow": elif PACKAGE_NAME.startswith("apache-airflow-providers-"): from provider_yaml_utils import load_package_data - ALL_PROVIDER_YAMLS = load_package_data() + ALL_PROVIDER_YAMLS = load_package_data(include_suspended=True) try: CURRENT_PROVIDER = next( provider_yaml diff --git a/docs/exts/docs_build/code_utils.py b/docs/exts/docs_build/code_utils.py index c7eef91c7d..6aef1ab1f3 100644 --- a/docs/exts/docs_build/code_utils.py +++ b/docs/exts/docs_build/code_utils.py @@ -29,6 +29,7 @@ DOCS_DIR = os.path.join(ROOT_PROJECT_DIR, "docs") AIRFLOW_DIR = os.path.join(ROOT_PROJECT_DIR, "airflow") ALL_PROVIDER_YAMLS = load_package_data() +ALL_PROVIDER_YAMLS_WITH_SUSPENDED = load_package_data(include_suspended=True) AIRFLOW_SITE_DIR: str = os.environ.get("AIRFLOW_SITE_DIRECTORY") or "" PROCESS_TIMEOUT = 15 * 60 diff --git a/docs/exts/docs_build/dev_index_generator.py b/docs/exts/docs_build/dev_index_generator.py index 0b9e9072ab..0aed2ec70b 100644 --- a/docs/exts/docs_build/dev_index_generator.py +++ b/docs/exts/docs_build/dev_index_generator.py @@ -31,7 +31,7 @@ from docs.exts.provider_yaml_utils import load_package_data CURRENT_DIR = os.path.abspath(os.path.dirname(__file__)) DOCS_DIR = os.path.abspath(os.path.join(CURRENT_DIR, os.pardir, os.pardir)) BUILD_DIR = os.path.abspath(os.path.join(DOCS_DIR, "_build")) -ALL_PROVIDER_YAMLS = load_package_data() +ALL_PROVIDER_YAMLS_WITH_SUSPENDED = load_package_data(include_suspended=True) def _get_jinja_env(): @@ -46,7 +46,7 @@ def _render_template(template_name, **kwargs): def _render_content(): providers = [] - provider_yamls = {p["package-name"]: p for p in ALL_PROVIDER_YAMLS} + provider_yamls = {p["package-name"]: p for p in ALL_PROVIDER_YAMLS_WITH_SUSPENDED} for path in sorted(Path(BUILD_DIR).glob("docs/apache-airflow-providers-*/")): package_name = path.name try: diff --git a/docs/exts/docs_build/docs_builder.py b/docs/exts/docs_build/docs_builder.py index 375e9db8c5..d72c8941f6 100644 --- a/docs/exts/docs_build/docs_builder.py +++ b/docs/exts/docs_build/docs_builder.py @@ -28,6 +28,7 @@ from rich.console import Console from .code_utils import ( AIRFLOW_SITE_DIR, ALL_PROVIDER_YAMLS, + ALL_PROVIDER_YAMLS_WITH_SUSPENDED, CONSOLE_WIDTH, DOCS_DIR, PROCESS_TIMEOUT, @@ -291,14 +292,17 @@ class AirflowDocsBuilder: console.print() -def get_available_providers_packages(): +def get_available_providers_packages(include_suspended: bool = False): """Get list of all available providers packages to build.""" - return [provider["package-name"] for provider in ALL_PROVIDER_YAMLS if not provider.get("suspended")] + return [ + provider["package-name"] + for provider in (ALL_PROVIDER_YAMLS_WITH_SUSPENDED if include_suspended else ALL_PROVIDER_YAMLS) + ] -def get_available_packages(): +def get_available_packages(include_suspended: bool = False): """Get list of all available packages to build.""" - provider_package_names = get_available_providers_packages() + provider_package_names = get_available_providers_packages(include_suspended=include_suspended) return [ "apache-airflow", *provider_package_names, diff --git a/docs/exts/docs_build/package_filter.py b/docs/exts/docs_build/package_filter.py index 53316374f9..d1e497e15c 100644 --- a/docs/exts/docs_build/package_filter.py +++ b/docs/exts/docs_build/package_filter.py @@ -17,6 +17,25 @@ from __future__ import annotations import fnmatch +from pathlib import Path + +PROVIDERS_DIR = Path(__file__).parents[3].resolve() / "airflow" / "providers" + + +def get_removed_provider_ids() -> list[str]: + """ + Yields the ids of suspended providers. + """ + import yaml + + removed_provider_ids = [] + for provider_path in PROVIDERS_DIR.rglob("provider.yaml"): + provider_yaml = yaml.safe_load(provider_path.read_text()) + if provider_yaml.get("removed"): + removed_provider_ids.append( + provider_yaml["package-name"][len("apache-airflow-providers-") :].replace("-", ".") + ) + return removed_provider_ids def process_package_filters(available_packages: list[str], package_filters: list[str] | None): @@ -27,12 +46,15 @@ def process_package_filters(available_packages: list[str], package_filters: list if not package_filters: return available_packages + suspended_packages = [ + f"apache-airflow-providers-{provider.replace('.','-')}" for provider in get_removed_provider_ids() + ] + all_packages_with_suspended = available_packages + suspended_packages invalid_filters = [ - f for f in package_filters if not any(fnmatch.fnmatch(p, f) for p in available_packages) + f for f in package_filters if not any(fnmatch.fnmatch(p, f) for p in all_packages_with_suspended) ] if invalid_filters: raise SystemExit( f"Some filters did not find any package: {invalid_filters}, Please check if they are correct." ) - - return [p for p in available_packages if any(fnmatch.fnmatch(p, f) for f in package_filters)] + return [p for p in all_packages_with_suspended if any(fnmatch.fnmatch(p, f) for f in package_filters)] diff --git a/docs/exts/provider_yaml_utils.py b/docs/exts/provider_yaml_utils.py index accd5a4c72..2a49cf4652 100644 --- a/docs/exts/provider_yaml_utils.py +++ b/docs/exts/provider_yaml_utils.py @@ -54,7 +54,7 @@ def get_provider_yaml_paths(): return sorted(glob(f"{ROOT_DIR}/airflow/providers/**/provider.yaml", recursive=True)) -def load_package_data() -> list[dict[str, Any]]: +def load_package_data(include_suspended: bool = False) -> list[dict[str, Any]]: """ Load all data from providers files @@ -69,7 +69,7 @@ def load_package_data() -> list[dict[str, Any]]: jsonschema.validate(provider, schema=schema) except jsonschema.ValidationError: raise Exception(f"Unable to parse: {provider_yaml_path}.") - if provider["suspended"]: + if provider["suspended"] and not include_suspended: continue provider_yaml_dir = os.path.dirname(provider_yaml_path) provider["python-module"] = _filepath_to_module(provider_yaml_dir) diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index c287c24c4b..df6136ebe1 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -2,7 +2,7 @@ # Please do not solve it but run `breeze setup regenerate-command-images`. # This command should fix the conflict and regenerate help images that you have conflict with. main:96b4884054753db922cb8ca2cc555368 -build-docs:2ff0e1725d71f0b16244507483a43d52 +build-docs:9acb6b6cb8a0930f252813e78cd30848 ci:find-backtracking-candidates:17fe56b867a745e5032a08dfcd3f73ee ci:fix-ownership:3e5a73533cc96045e72cb258783cfc96 ci:free-space:49af17b032039c05c41a7a8283f365cc @@ -39,19 +39,19 @@ prod-image:6011405076eb0e1049d87e971e3adce1 release-management:add-back-references:824cb5a426c88b262c357d8b20b06bb9 release-management:create-minor-branch:a3834afc4aa5d1e98002c9e9e7a9931d release-management:generate-constraints:01aef235b11e59ed7f10c970a5cdaba7 -release-management:generate-issue-content-providers:54d7c46992d256b87227cea87ef73ab0 +release-management:generate-issue-content-providers:57e91dce9df385fe93e1b4c6b88c65ba release-management:generate-providers-metadata:d4e8e5cfaa024e3963af02d7a873048d release-management:install-provider-packages:34c38aca17d23dbb454fe7a6bfd8e630 release-management:prepare-airflow-package:85d01c57e5b5ee0fb9e5f9d9706ed3b5 -release-management:prepare-provider-documentation:519f830d76013ca5313e72eecea24421 -release-management:prepare-provider-packages:d3fb4f06f7b67f9824abdebc52ef561b -release-management:publish-docs:be9b9b57777c23a8bb5fe59860c8d18c +release-management:prepare-provider-documentation:2f310457cfa075508aa8a926258a5f28 +release-management:prepare-provider-packages:da7b617d250d0bbc1567d83ef8f70dc4 +release-management:publish-docs:d433c5844046cd1d6da48acc3c3ece90 release-management:release-prod-images:cfbfe8b19fee91fd90718f98ef2fd078 release-management:start-rc-process:b27bd524dd3c89f50a747b60a7e892c1 release-management:start-release:419f48f6a4ff4457cb9de7ff496aebbe release-management:update-constraints:02ec4b119150e3fdbac52026e94820ef release-management:verify-provider-packages:96dce5644aad6b37080acf77b3d8de3a -release-management:1b0ee6fcf999ed59408e1a294889eb86 +release-management:63701dcf965ebf8c2c75e703379db9d6 sbom:build-all-airflow-images:32f8acade299c2b112e986bae99846db sbom:generate-providers-requirements:344968a060e00ace19738cca47d70641 sbom:update-sbom-information:653be48be70b4b7ff5172d491aadc694 diff --git a/scripts/in_container/run_provider_yaml_files_check.py b/scripts/in_container/run_provider_yaml_files_check.py index 83cda02e4b..8f0e18009b 100755 --- a/scripts/in_container/run_provider_yaml_files_check.py +++ b/scripts/in_container/run_provider_yaml_files_check.py @@ -680,6 +680,26 @@ def check_providers_have_all_documentation_files(yaml_files: dict[str, dict]): return num_providers, num_errors +@run_check("Checking remove flag only set for suspended providers") +def check_removed_flag_only_set_for_suspended_providers(yaml_files: dict[str, dict]): + num_errors = 0 + num_providers = 0 + for package_info in yaml_files.values(): + num_providers += 1 + package_name = package_info["package-name"] + suspended = package_info["suspended"] + removed = package_info.get("removed", False) + if removed and not suspended: + errors.append( + f"The provider {package_name} has removed set to True in their provider.yaml file " + f"but suspended flag is set to false. You should only set removed flag in order to " + f"prepare last release for a provider that has been previously suspended. " + f"[yellow]How to fix it[/]: Please suspend the provider first before removing it." + ) + num_errors += 1 + return num_providers, num_errors + + if __name__ == "__main__": ProvidersManager().initialize_providers_configuration() architecture = Architecture.get_current() @@ -706,6 +726,7 @@ if __name__ == "__main__": check_notification_classes(all_parsed_yaml_files) check_unique_provider_name(all_parsed_yaml_files) check_providers_have_all_documentation_files(all_parsed_yaml_files) + check_removed_flag_only_set_for_suspended_providers(all_parsed_yaml_files) if all_files_loaded: # Only check those if all provider files are loaded