This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push: new d45840cf0f2 Move Apache HDFS to new provider structure (#46140) d45840cf0f2 is described below commit d45840cf0f283aaaa70a54b614275442ec5e80e7 Author: LIU ZHE YOU <68415893+jason810...@users.noreply.github.com> AuthorDate: Fri Feb 7 23:15:22 2025 +0800 Move Apache HDFS to new provider structure (#46140) * Move Apache HDFS to new provider structure * Fix check-for-inclusive-language ignore * fixup! Fix check-for-inclusive-language ignore --------- Co-authored-by: Jarek Potiuk <ja...@potiuk.com> --- .github/boring-cyborg.yml | 4 +- .pre-commit-config.yaml | 3 +- .../airflow_breeze/commands/kubernetes_commands.py | 2 +- dev/moving_providers/move_providers.py | 4 + docs/.gitignore | 1 + .../changelog.rst | 25 --- .../index.rst => providers/apache/hdfs/README.rst | 77 +++----- .../hdfs/docs}/.latest-doc-only-change.txt | 0 .../hdfs/docs/changelog.rst} | 0 .../apache/hdfs/docs}/commits.rst | 0 .../apache/hdfs/docs}/connections.rst | 0 .../apache/hdfs/docs}/index.rst | 0 .../docs}/installing-providers-from-sources.rst | 0 .../apache/hdfs/docs/integration-logos}/hadoop.png | Bin .../hdfs/docs}/logging/hdfs-task-handler.rst | 0 .../apache/hdfs/docs}/logging/index.rst | 0 .../apache/hdfs/docs}/operators/index.rst | 0 .../apache/hdfs/docs}/operators/webhdfs.rst | 0 .../apache/hdfs/docs}/security.rst | 0 .../providers => }/apache/hdfs/provider.yaml | 10 +- providers/apache/hdfs/pyproject.toml | 80 ++++++++ .../hdfs/src/airflow/providers/apache/hdfs/LICENSE | 201 +++++++++++++++++++++ .../src/airflow/providers/apache/hdfs/__init__.py | 0 .../providers/apache/hdfs/get_provider_info.py | 97 ++++++++++ .../providers/apache/hdfs/hooks/__init__.py | 0 .../airflow/providers/apache/hdfs/hooks/hdfs.py | 0 .../airflow/providers/apache/hdfs/hooks/webhdfs.py | 2 +- .../airflow/providers/apache/hdfs/log/__init__.py | 0 .../providers/apache/hdfs/log/hdfs_task_handler.py | 0 .../providers/apache/hdfs/sensors/__init__.py | 0 .../airflow/providers/apache/hdfs/sensors/hdfs.py | 4 +- .../providers/apache/hdfs/sensors/web_hdfs.py | 3 +- .../__init__.py => apache/hdfs/tests/conftest.py} | 17 +- .../hdfs/tests/provider_tests}/__init__.py | 2 +- .../hdfs/tests/provider_tests/apache}/__init__.py | 2 +- .../tests/provider_tests}/apache/hdfs/__init__.py | 0 .../provider_tests}/apache/hdfs/hooks/__init__.py | 0 .../apache/hdfs/hooks/test_webhdfs.py | 0 .../apache/hdfs/sensors/__init__.py | 0 .../apache/hdfs/sensors/test_web_hdfs.py | 0 pyproject.toml | 3 + scripts/ci/docker-compose/remove-sources.yml | 1 + scripts/ci/docker-compose/tests-sources.yml | 1 + 43 files changed, 438 insertions(+), 101 deletions(-) diff --git a/.github/boring-cyborg.yml b/.github/boring-cyborg.yml index c9da2a47a6d..4d2ff366586 100644 --- a/.github/boring-cyborg.yml +++ b/.github/boring-cyborg.yml @@ -46,9 +46,7 @@ labelPRBasedOnFilePath: - providers/apache/flink/** provider:apache-hdfs: - - providers/src/airflow/providers/apache/hdfs/**/* - - docs/apache-airflow-providers-apache-hdfs/**/* - - providers/tests/apache/hdfs/**/* + - providers/apache/hdfs/** provider:apache-hive: - providers/apache/hive/** diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e58166f9d38..f567c13d06e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -659,8 +659,9 @@ repos: ^dev/| ^docs/README.rst$| ^docs/apache-airflow-providers-amazon/secrets-backends/aws-ssm-parameter-store.rst$| - ^docs/apache-airflow-providers-apache-hdfs/connections.rst$| ^providers/apache/kafka/docs/connections/kafka.rst$| + ^providers/apache/hdfs/docs/connections.rst$| + ^docs/apache-airflow-providers-apache-kafka/connections/kafka.rst$| ^providers/apache/spark/docs/decorators/pyspark.rst$| ^providers/fab/docs/auth-manager/webserver-authentication.rst$| ^providers/google/docs/operators/cloud/kubernetes_engine.rst$| diff --git a/dev/breeze/src/airflow_breeze/commands/kubernetes_commands.py b/dev/breeze/src/airflow_breeze/commands/kubernetes_commands.py index ac6d070a4d4..e69b4a91160 100644 --- a/dev/breeze/src/airflow_breeze/commands/kubernetes_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/kubernetes_commands.py @@ -1469,7 +1469,7 @@ def _run_tests( ) return 1, f"Tests {kubectl_cluster_name}" the_tests: list[str] = ["kubernetes_tests/test_kubernetes_executor.py::TestKubernetesExecutor"] - command_to_run = " ".join([quote(arg) for arg in ["uv", "run", "pytest", *the_tests, *test_args]]) + command_to_run = " ".join([quote(arg) for arg in ["python3", "-m", "pytest", *the_tests, *test_args]]) get_console(output).print(f"[info] Command to run:[/] {command_to_run}") result = run_command( [shell_binary, *extra_shell_args, "-c", command_to_run], diff --git a/dev/moving_providers/move_providers.py b/dev/moving_providers/move_providers.py index da1213e4ffb..0481235ea9e 100755 --- a/dev/moving_providers/move_providers.py +++ b/dev/moving_providers/move_providers.py @@ -366,6 +366,7 @@ def move_provider_yaml(provider_id: str) -> tuple[list[str], list[str], list[str dependencies = [] optional_dependencies = [] devel_dependencies = [] + already_moved_logos = set() for line in original_content: if line.startswith(" logo: "): logo_path = line[len(" logo: ") :] @@ -374,6 +375,9 @@ def move_provider_yaml(provider_id: str) -> tuple[list[str], list[str], list[str PROVIDERS_DIR_PATH / _get_provider_only_path(provider_id) / "docs" / "integration-logos" ) new_logo_path = new_logo_dir / logo_name + if logo_name in already_moved_logos: + continue + already_moved_logos.add(logo_name) _do_stuff( syntax="none", from_path=DOCS_DIR_PATH / Path(logo_path[1:]), diff --git a/docs/.gitignore b/docs/.gitignore index ed5439da700..9d8fc074d1b 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -7,6 +7,7 @@ apache-airflow-providers-apache-cassandra apache-airflow-providers-apache-drill apache-airflow-providers-apache-druid apache-airflow-providers-apache-flink +apache-airflow-providers-apache-hdfs apache-airflow-providers-apache-hive apache-airflow-providers-apache-iceberg apache-airflow-providers-apache-impala diff --git a/docs/apache-airflow-providers-apache-hdfs/changelog.rst b/docs/apache-airflow-providers-apache-hdfs/changelog.rst deleted file mode 100644 index 3c984d0e112..00000000000 --- a/docs/apache-airflow-providers-apache-hdfs/changelog.rst +++ /dev/null @@ -1,25 +0,0 @@ - - .. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - .. http://www.apache.org/licenses/LICENSE-2.0 - - .. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE - OVERWRITTEN WHEN PREPARING PACKAGES. - - .. IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE - `PROVIDER_CHANGELOG_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY - -.. include:: ../../providers/src/airflow/providers/apache/hdfs/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-apache-hdfs/index.rst b/providers/apache/hdfs/README.rst similarity index 51% copy from docs/apache-airflow-providers-apache-hdfs/index.rst copy to providers/apache/hdfs/README.rst index b1921ebb2f7..9aeb12342de 100644 --- a/docs/apache-airflow-providers-apache-hdfs/index.rst +++ b/providers/apache/hdfs/README.rst @@ -1,3 +1,4 @@ + .. Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -15,82 +16,50 @@ specific language governing permissions and limitations under the License. -``apache-airflow-providers-apache-hdfs`` -======================================== - - - -.. toctree:: - :hidden: - :maxdepth: 1 - :caption: Basics - - Home <self> - Changelog <changelog> - Security <security> - -.. toctree:: - :hidden: - :maxdepth: 1 - :caption: Guides - - Connection types <connections> - Operators <operators/index> - Logging for Tasks <logging/index> - -.. toctree:: - :hidden: - :maxdepth: 1 - :caption: References + .. NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! - Python API <_api/airflow/providers/apache/hdfs/index> - PyPI Repository <https://pypi.org/project/apache-airflow-providers-apache-hdfs/> - Installing from sources <installing-providers-from-sources> + .. IF YOU WANT TO MODIFY TEMPLATE FOR THIS FILE, YOU SHOULD MODIFY THE TEMPLATE + `PROVIDER_README_TEMPLATE.rst.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY -.. THE REMAINDER OF THE FILE IS AUTOMATICALLY GENERATED. IT WILL BE OVERWRITTEN AT RELEASE TIME! +Package ``apache-airflow-providers-apache-hdfs`` -.. toctree:: - :hidden: - :maxdepth: 1 - :caption: Commits +Release: ``4.7.0`` - Detailed list of commits <commits> - - -apache-airflow-providers-apache-hdfs package ------------------------------------------------------- `Hadoop Distributed File System (HDFS) <https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html>`__ and `WebHDFS <https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html>`__. -Release: 4.7.0 - Provider package ---------------- -This package is for the ``apache.hdfs`` provider. -All classes for this package are included in the ``airflow.providers.apache.hdfs`` python package. +This is a provider package for ``apache.hdfs`` provider. All classes for this provider package +are in ``airflow.providers.apache.hdfs`` python package. + +You can find package information and changelog for the provider +in the `documentation <https://airflow.apache.org/docs/apache-airflow-providers-apache-hdfs/4.7.0/>`_. Installation ------------ -You can install this package on top of an existing Airflow 2 installation via -``pip install apache-airflow-providers-apache-hdfs``. -For the minimum Airflow version supported, see ``Requirements`` below. +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-apache-hdfs`` + +The package supports the following python versions: 3.9,3.10,3.11,3.12 Requirements ------------ -The minimum Apache Airflow version supported by this provider package is ``2.9.0``. - -================================= ========================================= +================================= ===================================== PIP package Version required -================================= ========================================= +================================= ===================================== ``apache-airflow`` ``>=2.9.0`` ``hdfs[avro,dataframe,kerberos]`` ``>=2.5.4; python_version < "3.12"`` ``hdfs[avro,dataframe,kerberos]`` ``>=2.7.3; python_version >= "3.12"`` -``pandas`` ``>=2.1.2,<2.2; python_version >= "3.9"`` -``pandas`` ``>=1.5.3,<2.2; python_version < "3.9"`` -================================= ========================================= +``pandas`` ``>=2.1.2,<2.2`` +================================= ===================================== + +The changelog for the provider package can be found in the +`changelog <https://airflow.apache.org/docs/apache-airflow-providers-apache-hdfs/4.7.0/changelog.html>`_. diff --git a/providers/src/airflow/providers/apache/hdfs/.latest-doc-only-change.txt b/providers/apache/hdfs/docs/.latest-doc-only-change.txt similarity index 100% rename from providers/src/airflow/providers/apache/hdfs/.latest-doc-only-change.txt rename to providers/apache/hdfs/docs/.latest-doc-only-change.txt diff --git a/providers/src/airflow/providers/apache/hdfs/CHANGELOG.rst b/providers/apache/hdfs/docs/changelog.rst similarity index 100% rename from providers/src/airflow/providers/apache/hdfs/CHANGELOG.rst rename to providers/apache/hdfs/docs/changelog.rst diff --git a/docs/apache-airflow-providers-apache-hdfs/commits.rst b/providers/apache/hdfs/docs/commits.rst similarity index 100% rename from docs/apache-airflow-providers-apache-hdfs/commits.rst rename to providers/apache/hdfs/docs/commits.rst diff --git a/docs/apache-airflow-providers-apache-hdfs/connections.rst b/providers/apache/hdfs/docs/connections.rst similarity index 100% rename from docs/apache-airflow-providers-apache-hdfs/connections.rst rename to providers/apache/hdfs/docs/connections.rst diff --git a/docs/apache-airflow-providers-apache-hdfs/index.rst b/providers/apache/hdfs/docs/index.rst similarity index 100% rename from docs/apache-airflow-providers-apache-hdfs/index.rst rename to providers/apache/hdfs/docs/index.rst diff --git a/docs/apache-airflow-providers-apache-hdfs/installing-providers-from-sources.rst b/providers/apache/hdfs/docs/installing-providers-from-sources.rst similarity index 100% rename from docs/apache-airflow-providers-apache-hdfs/installing-providers-from-sources.rst rename to providers/apache/hdfs/docs/installing-providers-from-sources.rst diff --git a/docs/integration-logos/apache/hadoop.png b/providers/apache/hdfs/docs/integration-logos/hadoop.png similarity index 100% rename from docs/integration-logos/apache/hadoop.png rename to providers/apache/hdfs/docs/integration-logos/hadoop.png diff --git a/docs/apache-airflow-providers-apache-hdfs/logging/hdfs-task-handler.rst b/providers/apache/hdfs/docs/logging/hdfs-task-handler.rst similarity index 100% rename from docs/apache-airflow-providers-apache-hdfs/logging/hdfs-task-handler.rst rename to providers/apache/hdfs/docs/logging/hdfs-task-handler.rst diff --git a/docs/apache-airflow-providers-apache-hdfs/logging/index.rst b/providers/apache/hdfs/docs/logging/index.rst similarity index 100% rename from docs/apache-airflow-providers-apache-hdfs/logging/index.rst rename to providers/apache/hdfs/docs/logging/index.rst diff --git a/docs/apache-airflow-providers-apache-hdfs/operators/index.rst b/providers/apache/hdfs/docs/operators/index.rst similarity index 100% rename from docs/apache-airflow-providers-apache-hdfs/operators/index.rst rename to providers/apache/hdfs/docs/operators/index.rst diff --git a/docs/apache-airflow-providers-apache-hdfs/operators/webhdfs.rst b/providers/apache/hdfs/docs/operators/webhdfs.rst similarity index 100% rename from docs/apache-airflow-providers-apache-hdfs/operators/webhdfs.rst rename to providers/apache/hdfs/docs/operators/webhdfs.rst diff --git a/docs/apache-airflow-providers-apache-hdfs/security.rst b/providers/apache/hdfs/docs/security.rst similarity index 100% rename from docs/apache-airflow-providers-apache-hdfs/security.rst rename to providers/apache/hdfs/docs/security.rst diff --git a/providers/src/airflow/providers/apache/hdfs/provider.yaml b/providers/apache/hdfs/provider.yaml similarity index 88% rename from providers/src/airflow/providers/apache/hdfs/provider.yaml rename to providers/apache/hdfs/provider.yaml index acafe5525d7..46da73a1302 100644 --- a/providers/src/airflow/providers/apache/hdfs/provider.yaml +++ b/providers/apache/hdfs/provider.yaml @@ -56,23 +56,15 @@ versions: - 1.0.1 - 1.0.0 -dependencies: - - apache-airflow>=2.9.0 - - hdfs[avro,dataframe,kerberos]>=2.5.4;python_version<"3.12" - - hdfs[avro,dataframe,kerberos]>=2.7.3;python_version>="3.12" - - pandas>=2.1.2,<2.2 - - integrations: - integration-name: Hadoop Distributed File System (HDFS) external-doc-url: https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html - logo: /integration-logos/apache/hadoop.png + logo: /docs/integration-logos/hadoop.png tags: [apache] - integration-name: WebHDFS external-doc-url: https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html how-to-guide: - /docs/apache-airflow-providers-apache-hdfs/operators/webhdfs.rst - logo: /integration-logos/apache/hadoop.png tags: [apache] sensors: diff --git a/providers/apache/hdfs/pyproject.toml b/providers/apache/hdfs/pyproject.toml new file mode 100644 index 00000000000..709e1641f2e --- /dev/null +++ b/providers/apache/hdfs/pyproject.toml @@ -0,0 +1,80 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! + +# IF YOU WANT TO MODIFY THIS FILE EXCEPT DEPENDENCIES, YOU SHOULD MODIFY THE TEMPLATE +# `pyproject_TEMPLATE.toml.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY +[build-system] +requires = ["flit_core==3.10.1"] +build-backend = "flit_core.buildapi" + +[project] +name = "apache-airflow-providers-apache-hdfs" +version = "4.7.0" +description = "Provider package apache-airflow-providers-apache-hdfs for Apache Airflow" +readme = "README.rst" +authors = [ + {name="Apache Software Foundation", email="d...@airflow.apache.org"}, +] +maintainers = [ + {name="Apache Software Foundation", email="d...@airflow.apache.org"}, +] +keywords = [ "airflow-provider", "apache.hdfs", "airflow", "integration" ] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Intended Audience :: System Administrators", + "Framework :: Apache Airflow", + "Framework :: Apache Airflow :: Provider", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: System :: Monitoring", +] +requires-python = "~=3.9" + +# The dependencies should be modified in place in the generated file +# Any change in the dependencies is preserved when the file is regenerated +dependencies = [ + "apache-airflow>=2.9.0", + 'hdfs[avro,dataframe,kerberos]>=2.5.4;python_version<"3.12"', + 'hdfs[avro,dataframe,kerberos]>=2.7.3;python_version>="3.12"', + "pandas>=2.1.2,<2.2", +] + +[project.urls] +"Documentation" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-hdfs/4.7.0" +"Changelog" = "https://airflow.apache.org/docs/apache-airflow-providers-apache-hdfs/4.7.0/changelog.html" +"Bug Tracker" = "https://github.com/apache/airflow/issues" +"Source Code" = "https://github.com/apache/airflow" +"Slack Chat" = "https://s.apache.org/airflow-slack" +"Twitter" = "https://x.com/ApacheAirflow" +"YouTube" = "https://www.youtube.com/channel/UCSXwxpWZQ7XZ1WL3wqevChA/" + +[project.entry-points."apache_airflow_provider"] +provider_info = "airflow.providers.apache.hdfs.get_provider_info:get_provider_info" + +[tool.flit.module] +name = "airflow.providers.apache.hdfs" + +[tool.pytest.ini_options] +ignore = "tests/system/" diff --git a/providers/apache/hdfs/src/airflow/providers/apache/hdfs/LICENSE b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/LICENSE new file mode 100644 index 00000000000..11069edd790 --- /dev/null +++ b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/providers/src/airflow/providers/apache/hdfs/__init__.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/hdfs/__init__.py rename to providers/apache/hdfs/src/airflow/providers/apache/hdfs/__init__.py diff --git a/providers/apache/hdfs/src/airflow/providers/apache/hdfs/get_provider_info.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/get_provider_info.py new file mode 100644 index 00000000000..67a72683f12 --- /dev/null +++ b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/get_provider_info.py @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE! THIS FILE IS AUTOMATICALLY GENERATED AND WILL BE OVERWRITTEN! +# +# IF YOU WANT TO MODIFY THIS FILE, YOU SHOULD MODIFY THE TEMPLATE +# `get_provider_info_TEMPLATE.py.jinja2` IN the `dev/breeze/src/airflow_breeze/templates` DIRECTORY + + +def get_provider_info(): + return { + "package-name": "apache-airflow-providers-apache-hdfs", + "name": "Apache HDFS", + "description": "`Hadoop Distributed File System (HDFS) <https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html>`__\nand `WebHDFS <https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html>`__.\n", + "state": "ready", + "source-date-epoch": 1734527971, + "versions": [ + "4.7.0", + "4.6.0", + "4.5.1", + "4.5.0", + "4.4.2", + "4.4.1", + "4.4.0", + "4.3.3", + "4.3.2", + "4.3.1", + "4.3.0", + "4.2.0", + "4.1.1", + "4.1.0", + "4.0.0", + "3.2.1", + "3.2.0", + "3.1.0", + "3.0.1", + "3.0.0", + "2.2.3", + "2.2.2", + "2.2.1", + "2.2.0", + "2.1.1", + "2.1.0", + "2.0.0", + "1.0.1", + "1.0.0", + ], + "integrations": [ + { + "integration-name": "Hadoop Distributed File System (HDFS)", + "external-doc-url": "https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html", + "logo": "/docs/integration-logos/hadoop.png", + "tags": ["apache"], + }, + { + "integration-name": "WebHDFS", + "external-doc-url": "https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html", + "how-to-guide": ["/docs/apache-airflow-providers-apache-hdfs/operators/webhdfs.rst"], + "tags": ["apache"], + }, + ], + "sensors": [ + { + "integration-name": "WebHDFS", + "python-modules": ["airflow.providers.apache.hdfs.sensors.web_hdfs"], + } + ], + "hooks": [ + {"integration-name": "WebHDFS", "python-modules": ["airflow.providers.apache.hdfs.hooks.webhdfs"]} + ], + "connection-types": [ + { + "hook-class-name": "airflow.providers.apache.hdfs.hooks.webhdfs.WebHDFSHook", + "connection-type": "webhdfs", + } + ], + "dependencies": [ + "apache-airflow>=2.9.0", + 'hdfs[avro,dataframe,kerberos]>=2.5.4;python_version<"3.12"', + 'hdfs[avro,dataframe,kerberos]>=2.7.3;python_version>="3.12"', + "pandas>=2.1.2,<2.2", + ], + } diff --git a/providers/src/airflow/providers/apache/hdfs/hooks/__init__.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/hooks/__init__.py similarity index 100% copy from providers/src/airflow/providers/apache/hdfs/hooks/__init__.py copy to providers/apache/hdfs/src/airflow/providers/apache/hdfs/hooks/__init__.py diff --git a/providers/src/airflow/providers/apache/hdfs/hooks/hdfs.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/hooks/hdfs.py similarity index 100% rename from providers/src/airflow/providers/apache/hdfs/hooks/hdfs.py rename to providers/apache/hdfs/src/airflow/providers/apache/hdfs/hooks/hdfs.py diff --git a/providers/src/airflow/providers/apache/hdfs/hooks/webhdfs.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/hooks/webhdfs.py similarity index 100% rename from providers/src/airflow/providers/apache/hdfs/hooks/webhdfs.py rename to providers/apache/hdfs/src/airflow/providers/apache/hdfs/hooks/webhdfs.py index 47ab97db4e8..f1ff1e1655f 100644 --- a/providers/src/airflow/providers/apache/hdfs/hooks/webhdfs.py +++ b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/hooks/webhdfs.py @@ -24,11 +24,11 @@ import socket from typing import Any import requests -from hdfs import HdfsError, InsecureClient from airflow.configuration import conf from airflow.exceptions import AirflowException from airflow.hooks.base import BaseHook +from hdfs import HdfsError, InsecureClient log = logging.getLogger(__name__) diff --git a/providers/src/airflow/providers/apache/hdfs/log/__init__.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/log/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/hdfs/log/__init__.py rename to providers/apache/hdfs/src/airflow/providers/apache/hdfs/log/__init__.py diff --git a/providers/src/airflow/providers/apache/hdfs/log/hdfs_task_handler.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/log/hdfs_task_handler.py similarity index 100% rename from providers/src/airflow/providers/apache/hdfs/log/hdfs_task_handler.py rename to providers/apache/hdfs/src/airflow/providers/apache/hdfs/log/hdfs_task_handler.py diff --git a/providers/src/airflow/providers/apache/hdfs/sensors/__init__.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/sensors/__init__.py similarity index 100% rename from providers/src/airflow/providers/apache/hdfs/sensors/__init__.py rename to providers/apache/hdfs/src/airflow/providers/apache/hdfs/sensors/__init__.py diff --git a/providers/src/airflow/providers/apache/hdfs/sensors/hdfs.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/sensors/hdfs.py similarity index 93% rename from providers/src/airflow/providers/apache/hdfs/sensors/hdfs.py rename to providers/apache/hdfs/src/airflow/providers/apache/hdfs/sensors/hdfs.py index 53a20204a12..2d924821f99 100644 --- a/providers/src/airflow/providers/apache/hdfs/sensors/hdfs.py +++ b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/sensors/hdfs.py @@ -42,11 +42,11 @@ class HdfsSensor(BaseSensorOperator): raise RuntimeError(_EXCEPTION_MESSAGE) -class HdfsRegexSensor(HdfsSensor): # noqa: D101 ignore missing docstring +class HdfsRegexSensor(HdfsSensor): def __init__(self, *args, **kwargs): raise RuntimeError(_EXCEPTION_MESSAGE) -class HdfsFolderSensor(HdfsSensor): # noqa: D101 ignore missing docstring +class HdfsFolderSensor(HdfsSensor): def __init__(self, *args, **kwargs): raise RuntimeError(_EXCEPTION_MESSAGE) diff --git a/providers/src/airflow/providers/apache/hdfs/sensors/web_hdfs.py b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/sensors/web_hdfs.py similarity index 99% rename from providers/src/airflow/providers/apache/hdfs/sensors/web_hdfs.py rename to providers/apache/hdfs/src/airflow/providers/apache/hdfs/sensors/web_hdfs.py index 8b951bf3ec8..2af323bbf5f 100644 --- a/providers/src/airflow/providers/apache/hdfs/sensors/web_hdfs.py +++ b/providers/apache/hdfs/src/airflow/providers/apache/hdfs/sensors/web_hdfs.py @@ -23,11 +23,10 @@ from typing import TYPE_CHECKING, Any from airflow.sensors.base import BaseSensorOperator if TYPE_CHECKING: + from airflow.utils.context import Context from hdfs import InsecureClient from hdfs.ext.kerberos import KerberosClient - from airflow.utils.context import Context - class WebHdfsSensor(BaseSensorOperator): """Waits for a file or folder to land in HDFS.""" diff --git a/providers/src/airflow/providers/apache/hdfs/hooks/__init__.py b/providers/apache/hdfs/tests/conftest.py similarity index 58% rename from providers/src/airflow/providers/apache/hdfs/hooks/__init__.py rename to providers/apache/hdfs/tests/conftest.py index 217e5db9607..068fe6bbf5a 100644 --- a/providers/src/airflow/providers/apache/hdfs/hooks/__init__.py +++ b/providers/apache/hdfs/tests/conftest.py @@ -1,4 +1,3 @@ -# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -15,3 +14,19 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations + +import pathlib + +import pytest + +pytest_plugins = "tests_common.pytest_plugin" + + +@pytest.hookimpl(tryfirst=True) +def pytest_configure(config: pytest.Config) -> None: + deprecations_ignore_path = pathlib.Path(__file__).parent.joinpath("deprecations_ignore.yml") + dep_path = [deprecations_ignore_path] if deprecations_ignore_path.exists() else [] + config.inicfg["airflow_deprecations_ignore"] = ( + config.inicfg.get("airflow_deprecations_ignore", []) + dep_path # type: ignore[assignment,operator] + ) diff --git a/providers/tests/apache/hdfs/__init__.py b/providers/apache/hdfs/tests/provider_tests/__init__.py similarity index 90% copy from providers/tests/apache/hdfs/__init__.py copy to providers/apache/hdfs/tests/provider_tests/__init__.py index 217e5db9607..e8fd2285643 100644 --- a/providers/tests/apache/hdfs/__init__.py +++ b/providers/apache/hdfs/tests/provider_tests/__init__.py @@ -1,4 +1,3 @@ -# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -15,3 +14,4 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/tests/apache/hdfs/__init__.py b/providers/apache/hdfs/tests/provider_tests/apache/__init__.py similarity index 90% copy from providers/tests/apache/hdfs/__init__.py copy to providers/apache/hdfs/tests/provider_tests/apache/__init__.py index 217e5db9607..e8fd2285643 100644 --- a/providers/tests/apache/hdfs/__init__.py +++ b/providers/apache/hdfs/tests/provider_tests/apache/__init__.py @@ -1,4 +1,3 @@ -# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -15,3 +14,4 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +__path__ = __import__("pkgutil").extend_path(__path__, __name__) # type: ignore diff --git a/providers/tests/apache/hdfs/__init__.py b/providers/apache/hdfs/tests/provider_tests/apache/hdfs/__init__.py similarity index 100% rename from providers/tests/apache/hdfs/__init__.py rename to providers/apache/hdfs/tests/provider_tests/apache/hdfs/__init__.py diff --git a/providers/tests/apache/hdfs/hooks/__init__.py b/providers/apache/hdfs/tests/provider_tests/apache/hdfs/hooks/__init__.py similarity index 100% rename from providers/tests/apache/hdfs/hooks/__init__.py rename to providers/apache/hdfs/tests/provider_tests/apache/hdfs/hooks/__init__.py diff --git a/providers/tests/apache/hdfs/hooks/test_webhdfs.py b/providers/apache/hdfs/tests/provider_tests/apache/hdfs/hooks/test_webhdfs.py similarity index 100% rename from providers/tests/apache/hdfs/hooks/test_webhdfs.py rename to providers/apache/hdfs/tests/provider_tests/apache/hdfs/hooks/test_webhdfs.py diff --git a/providers/tests/apache/hdfs/sensors/__init__.py b/providers/apache/hdfs/tests/provider_tests/apache/hdfs/sensors/__init__.py similarity index 100% rename from providers/tests/apache/hdfs/sensors/__init__.py rename to providers/apache/hdfs/tests/provider_tests/apache/hdfs/sensors/__init__.py diff --git a/providers/tests/apache/hdfs/sensors/test_web_hdfs.py b/providers/apache/hdfs/tests/provider_tests/apache/hdfs/sensors/test_web_hdfs.py similarity index 100% rename from providers/tests/apache/hdfs/sensors/test_web_hdfs.py rename to providers/apache/hdfs/tests/provider_tests/apache/hdfs/sensors/test_web_hdfs.py diff --git a/pyproject.toml b/pyproject.toml index 6ba1099fdc7..ea6face2d29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -645,6 +645,7 @@ dev = [ "apache-airflow-providers-apache-drill", "apache-airflow-providers-apache-druid", "apache-airflow-providers-apache-flink", + "apache-airflow-providers-apache-hdfs", "apache-airflow-providers-apache-hive", "apache-airflow-providers-apache-iceberg", "apache-airflow-providers-apache-impala", @@ -741,6 +742,7 @@ apache-airflow-providers-apache-cassandra = { workspace = true } apache-airflow-providers-apache-drill = { workspace = true } apache-airflow-providers-apache-druid = { workspace = true } apache-airflow-providers-apache-flink = { workspace = true } +apache-airflow-providers-apache-hdfs = { workspace = true } apache-airflow-providers-apache-hive = { workspace = true } apache-airflow-providers-apache-iceberg = {workspace = true} apache-airflow-providers-apache-impala = { workspace = true } @@ -835,6 +837,7 @@ members = [ "providers/apache/drill", "providers/apache/druid", "providers/apache/flink", + "providers/apache/hdfs", "providers/apache/hive", "providers/apache/iceberg", "providers/apache/impala", diff --git a/scripts/ci/docker-compose/remove-sources.yml b/scripts/ci/docker-compose/remove-sources.yml index 3f736ff5d0e..17e4e53627a 100644 --- a/scripts/ci/docker-compose/remove-sources.yml +++ b/scripts/ci/docker-compose/remove-sources.yml @@ -39,6 +39,7 @@ services: - ../../../empty:/opt/airflow/providers/apache/drill/src - ../../../empty:/opt/airflow/providers/apache/druid/src - ../../../empty:/opt/airflow/providers/apache/flink/src + - ../../../empty:/opt/airflow/providers/apache/hdfs/src - ../../../empty:/opt/airflow/providers/apache/hive/src - ../../../empty:/opt/airflow/providers/apache/iceberg/src - ../../../empty:/opt/airflow/providers/apache/impala/src diff --git a/scripts/ci/docker-compose/tests-sources.yml b/scripts/ci/docker-compose/tests-sources.yml index b84ddb29d5e..43ff5ad79e4 100644 --- a/scripts/ci/docker-compose/tests-sources.yml +++ b/scripts/ci/docker-compose/tests-sources.yml @@ -46,6 +46,7 @@ services: - ../../../providers/apache/drill/tests:/opt/airflow/providers/apache/drill/tests - ../../../providers/apache/druid/tests:/opt/airflow/providers/apache/druid/tests - ../../../providers/apache/flink/tests:/opt/airflow/providers/apache/flink/tests + - ../../../providers/apache/hdfs/tests:/opt/airflow/providers/apache/hdfs/tests - ../../../providers/apache/hive/tests:/opt/airflow/providers/apache/hive/tests - ../../../providers/apache/iceberg/tests:/opt/airflow/providers/apache/iceberg/tests - ../../../providers/apache/impala/tests:/opt/airflow/providers/apache/impala/tests