This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch v1-10-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 43c72f2f4c7d0b72d9fcbbfb17135d9be59eb4c9 Author: SZN <szy...@nieradka.net> AuthorDate: Mon Nov 2 14:17:41 2020 +0100 Checks if all the libraries in setup.py are listed in installation.rst file (#12023) (cherry picked from commit 2354bd2be381bcfe6db132990af1ac34df52b9b4) --- .pre-commit-config.yaml | 6 + BREEZE.rst | 9 +- CONTRIBUTING.rst | 16 +- INSTALL | 16 +- STATIC_CODE_CHECKS.rst | 2 + breeze-complete | 1 + docs/installation.rst | 216 +++++++++++++++------ .../pre_commit_check_setup_installation.py | 108 +++++++++++ setup.py | 16 ++ 9 files changed, 314 insertions(+), 76 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6763164..cb5c753 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -213,6 +213,12 @@ repos: files: ^setup.py$ pass_filenames: false entry: ./scripts/ci/pre_commit/pre_commit_check_order_setup.py + - id: setup-installation + name: Checks if all the libraries in setup.py are listed in installation.rst file + language: python + files: ^setup.py$|^docs/installation.rst$ + pass_filenames: false + entry: ./scripts/ci/pre_commit/pre_commit_check_setup_installation.py - id: update-breeze-file name: Update output of breeze command in BREEZE.rst entry: "./scripts/ci/pre_commit/pre_commit_breeze_cmd_line.sh" diff --git a/BREEZE.rst b/BREEZE.rst index cf119ba..5e481f7 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -395,7 +395,7 @@ run ``airflow webserver``, ``airflow scheduler``, ``airflow worker`` in separate This can be achieved either via ``tmux`` or via exec-ing into the running container from the host. Tmux is installed inside the container and you can launch it with ``tmux`` command. Tmux provides you with the capability of creating multiple virtual terminals and multiplex between them. More about ``tmux`` can be -found at `tmux github wiki page <https://github.com/tmux/tmux/wiki>`_ . Tmux has several useful shortcuts +found at `tmux GitHub wiki page <https://github.com/tmux/tmux/wiki>`_ . Tmux has several useful shortcuts that allow you to split the terminals, open new tabs etc - it's pretty useful to learn it. .. raw:: html @@ -1001,7 +1001,7 @@ Managing Dependencies --------------------- If you need to change apt dependencies in the ``Dockerfile.ci``, add Python packages in ``setup.py`` or -add javascript dependencies in ``package.json``, you can either add dependencies temporarily for a single +add JavaScript dependencies in ``package.json``, you can either add dependencies temporarily for a single Breeze session or permanently in ``setup.py``, ``Dockerfile.ci``, or ``package.json`` files. Installing Dependencies for a Single Breeze Session @@ -1865,8 +1865,9 @@ This is the current syntax for `./breeze <./breeze>`_: flake8 forbid-tabs helm-lint incorrect-use-of-LoggingMixin insert-license language-matters lint-dockerfile lint-openapi mixed-line-ending mypy mypy-helm no-relative-imports pre-commit-descriptions pydevd python2-compile python2-fastcheck - python-no-log-warn rst-backticks setup-order shellcheck sort-in-the-wild - trailing-whitespace update-breeze-file update-extras update-local-yml-file yamllint + python-no-log-warn rst-backticks setup-order setup-installation shellcheck + sort-in-the-wild trailing-whitespace update-breeze-file update-extras + update-local-yml-file yamllint You can pass extra arguments including options to to the pre-commit framework as <EXTRA_ARGS> passed after --. For example: diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 6d3aa91..6d34026 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -540,13 +540,15 @@ This is the full list of those extras: .. START EXTRAS HERE -all, all_dbs, async, atlas, aws, azure, azure_blob_storage, azure_container_instances, azure_cosmos, -azure_data_lake, azure_secrets, cassandra, celery, cgroups, cloudant, crypto, dask, databricks, -datadog, devel, devel_azure, devel_ci, devel_hadoop, doc, docker, druid, elasticsearch, emr, gcp, -gcp_api, github_enterprise, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, -kubernetes, ldap, mongo, mssql, mysql, oracle, papermill, password, pinot, postgres, presto, qds, -rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, slack, snowflake, ssh, statsd, -vertica, virtualenv, webhdfs, winrm +all, all_dbs, amazon, apache.atlas, apache.cassandra, apache.druid, apache.hdfs, apache.hive, +apache.pinot, apache.presto, apache.webhdfs, async, atlas, aws, azure, azure_blob_storage, +azure_container_instances, azure_cosmos, azure_data_lake, azure_secrets, cassandra, celery, cgroups, +cloudant, cncf.kubernetes, crypto, dask, databricks, datadog, devel, devel_all, devel_azure, +devel_ci, devel_hadoop, doc, docker, druid, elasticsearch, emr, gcp, gcp_api, github_enterprise, +google, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap, +microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, oracle, papermill, password, +pinot, postgres, presto, qds, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, +slack, snowflake, ssh, statsd, vertica, virtualenv, webhdfs, winrm .. END EXTRAS HERE diff --git a/INSTALL b/INSTALL index c0582f3..0e2f582 100644 --- a/INSTALL +++ b/INSTALL @@ -45,13 +45,15 @@ pip install . \ # You can also install Airflow with extras specified. The list of available extras: # START EXTRAS HERE -all, all_dbs, async, atlas, aws, azure, azure_blob_storage, azure_container_instances, azure_cosmos, -azure_data_lake, azure_secrets, cassandra, celery, cgroups, cloudant, crypto, dask, databricks, -datadog, devel, devel_azure, devel_ci, devel_hadoop, doc, docker, druid, elasticsearch, emr, gcp, -gcp_api, github_enterprise, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, -kubernetes, ldap, mongo, mssql, mysql, oracle, papermill, password, pinot, postgres, presto, qds, -rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, slack, snowflake, ssh, statsd, -vertica, virtualenv, webhdfs, winrm +all, all_dbs, amazon, apache.atlas, apache.cassandra, apache.druid, apache.hdfs, apache.hive, +apache.pinot, apache.presto, apache.webhdfs, async, atlas, aws, azure, azure_blob_storage, +azure_container_instances, azure_cosmos, azure_data_lake, azure_secrets, cassandra, celery, cgroups, +cloudant, cncf.kubernetes, crypto, dask, databricks, datadog, devel, devel_all, devel_azure, +devel_ci, devel_hadoop, doc, docker, druid, elasticsearch, emr, gcp, gcp_api, github_enterprise, +google, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap, +microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, oracle, papermill, password, +pinot, postgres, presto, qds, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, +slack, snowflake, ssh, statsd, vertica, virtualenv, webhdfs, winrm # END EXTRAS HERE diff --git a/STATIC_CODE_CHECKS.rst b/STATIC_CODE_CHECKS.rst index 1439417..ce68e0a 100644 --- a/STATIC_CODE_CHECKS.rst +++ b/STATIC_CODE_CHECKS.rst @@ -134,6 +134,8 @@ require Breeze Docker images to be installed locally: ----------------------------------- ---------------------------------------------------------------- ------------ ``setup-order`` Checks for an order of dependencies in setup.py ----------------------------------- ---------------------------------------------------------------- ------------ +``setup-installation`` Checks if all the libraries in setup.py are listed in docs +----------------------------------- ---------------------------------------------------------------- ------------ ``shellcheck`` Checks shell files with shellcheck. ----------------------------------- ---------------------------------------------------------------- ------------ ``sort-in-the-wild`` Sort INTHEWILD.md alphabetically. diff --git a/breeze-complete b/breeze-complete index 6ec739c..1b89b70 100644 --- a/breeze-complete +++ b/breeze-complete @@ -102,6 +102,7 @@ python2-fastcheck python-no-log-warn rst-backticks setup-order +setup-installation shellcheck sort-in-the-wild trailing-whitespace diff --git a/docs/installation.rst b/docs/installation.rst index ed82157..de1985c 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -37,7 +37,6 @@ Those "known-to-be-working" constraints are per major/minor python version. You files when installing Airflow from PyPI. Note that you have to specify correct Airflow version and python versions in the URL. - **Prerequisites** On Debian based Linux OS: @@ -52,18 +51,21 @@ and python versions in the URL. .. code-block:: bash - pip install \ - apache-airflow==1.10.12 \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-1.10.12/constraints-3.7.txt" - + AIRFLOW_VERSION=1.10.12 + PYTHON_VERSION="$(python --version | cut -d " " -f 2 | cut -d "." -f 1-2)" + # For example: 3.6 + CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-${PYTHON_VERSION}.txt" + # For example: https://raw.githubusercontent.com/apache/airflow/constraints-1.10.12/constraints-3.6.txt + pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}" -2. Installing with extras (for example postgres, gcp) +2. Installing with extras (for example postgres, google) .. code-block:: bash - pip install \ - apache-airflow[postgres,gcp]==1.10.12 \ - --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-1.10.12/constraints-3.7.txt" + AIRFLOW_VERSION=1.10.12 + PYTHON_VERSION="$(python --version | cut -d " " -f 2 | cut -d "." -f 1-2)" + CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-${PYTHON_VERSION}.txt" + pip install "apache-airflow[postgres,google]==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}" You need certain system level requirements in order to install Airflow. Those are requirements that are known @@ -108,6 +110,9 @@ these extra dependencies. Here's the list of the subpackages and what they enable: + +**Fundamentals:** + +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ | subpackage | install command | enables | +=====================+=====================================================+======================================================================+ @@ -115,76 +120,171 @@ Here's the list of the subpackages and what they enable: +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ | all_dbs | ``pip install 'apache-airflow[all_dbs]'`` | All databases integrations | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| async | ``pip install 'apache-airflow[async]'`` | Async worker classes for Gunicorn | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| aws | ``pip install 'apache-airflow[aws]'`` | Amazon Web Services | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| azure | ``pip install 'apache-airflow[azure]'`` | Microsoft Azure | +| devel | ``pip install 'apache-airflow[devel]'`` | Minimum dev tools requirements | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| celery | ``pip install 'apache-airflow[celery]'`` | CeleryExecutor | +| devel_all | ``pip install 'apache-airflow[devel_all]'`` | All dev tools requirements | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| cloudant | ``pip install 'apache-airflow[cloudant]'`` | Cloudant hook | +| devel_azure | ``pip install 'apache-airflow[devel_azure]'`` | Azure development requirements | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| crypto | ``pip install 'apache-airflow[crypto]'`` | Encrypt connection passwords in metadata db | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| devel | ``pip install 'apache-airflow[devel]'`` | Minimum dev tools requirements | +| devel_ci | ``pip install 'apache-airflow[devel_ci]'`` | Development requirements used in CI | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ | devel_hadoop | ``pip install 'apache-airflow[devel_hadoop]'`` | Airflow + dependencies on the Hadoop stack | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| druid | ``pip install 'apache-airflow[druid]'`` | Druid related operators & hooks | +| doc | ``pip install 'apache-airflow[doc]'`` | Packages needed to build docs | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| gcp | ``pip install 'apache-airflow[gcp]'`` | Google Cloud Platform | +| password | ``pip install 'apache-airflow[password]'`` | Password authentication for users | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| github_enterprise | ``pip install 'apache-airflow[github_enterprise]'`` | GitHub Enterprise auth backend | + + +**Apache Software:** + +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| google_auth | ``pip install 'apache-airflow[google_auth]'`` | Google auth backend | +| subpackage | install command | enables | ++=====================+=====================================================+======================================================================+ +| atlas | ``pip install 'apache-airflow[apache.atlas]'`` | Apache Atlas to use Data Lineage feature | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ +| cassandra | ``pip install 'apache-airflow[apache.cassandra]'`` | Cassandra related operators & hooks | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ +| druid | ``pip install 'apache-airflow[apache.druid]'`` | Druid related operators & hooks | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ +| hdfs | ``pip install 'apache-airflow[apache.hdfs]'`` | HDFS hooks and operators | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ +| hive | ``pip install 'apache-airflow[apache.hive]'`` | All Hive related operators | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ +| presto | ``pip install 'apache-airflow[apache.presto]'`` | All Presto related operators & hooks | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ +| webhdfs | ``pip install 'apache-airflow[webhdfs]'`` | HDFS hooks and operators | ++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ + + +**Services:** + ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| subpackage | install command | enables | ++=============================+===============================================================+======================================================================+ +| aws | ``pip install 'apache-airflow[amazon]'`` | Amazon Web Services | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| azure | ``pip install 'apache-airflow[microsoft.azure]'`` | Microsoft Azure | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| azure_blob_storage | ``pip install 'apache-airflow[azure_blob_storage]'`` | Microsoft Azure (blob storage) | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| azure_cosmos | ``pip install 'apache-airflow[azure_cosmos]'`` | Microsoft Azure (cosmos) | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| azure_container_instances | ``pip install 'apache-airflow[azure_container_instances]'`` | Microsoft Azure (container instances) | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| azure_data_lake | ``pip install 'apache-airflow[azure_data_lake]'`` | Microsoft Azure (data lake) | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| azure_secrets | ``pip install 'apache-airflow[azure_secrets]'`` | Microsoft Azure (secrets) | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| azure | ``pip install 'apache-airflow[microsoft.azure]'`` | Microsoft Azure | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| cloudant | ``pip install 'apache-airflow[cloudant]'`` | Cloudant hook | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| databricks | ``pip install 'apache-airflow[databricks]'`` | Databricks hooks and operators | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| datadog | ``pip install 'apache-airflow[datadog]'`` | Datadog hooks and sensors | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| gcp | ``pip install 'apache-airflow[gcp]'`` | Google Cloud | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| github_enterprise | ``pip install 'apache-airflow[github_enterprise]'`` | GitHub Enterprise auth backend | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| google | ``pip install 'apache-airflow[google]'`` | Google Cloud (same as gcp) | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| google_auth | ``pip install 'apache-airflow[google_auth]'`` | Google auth backend | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| hashicorp | ``pip install 'apache-airflow[hashicorp]'`` | Hashicorp Services (Vault) | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| jira | ``pip install 'apache-airflow[jira]'`` | Jira hooks and operators | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| qds | ``pip install 'apache-airflow[qds]'`` | Enable QDS (Qubole Data Service) support | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| salesforce | ``pip install 'apache-airflow[salesforce]'`` | Salesforce hook | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| sendgrid | ``pip install 'apache-airflow[sendgrid]'`` | Send email using sendgrid | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| segment | ``pip install 'apache-airflow[segment]'`` | Segment hooks and sensors | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| sentry | ``pip install 'apache-airflow[sentry]'`` | | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| slack | ``pip install 'apache-airflow[slack]'`` | :class:`airflow.providers.slack.operators.slack.SlackAPIOperator` | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| snowflake | ``pip install 'apache-airflow[snowflake]'`` | Snowflake hooks and operators | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ +| vertica | ``pip install 'apache-airflow[vertica]'`` | Vertica hook support as an Airflow backend | ++-----------------------------+---------------------------------------------------------------+----------------------------------------------------------------------+ + + +**Software:** + ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| subpackage | install command | enables | ++=====================+=====================================================+====================================================================================+ +| async | ``pip install 'apache-airflow[async]'`` | Async worker classes for Gunicorn | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| celery | ``pip install 'apache-airflow[celery]'`` | CeleryExecutor | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| dask | ``pip install 'apache-airflow[dask]'`` | DaskExecutor | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| docker | ``pip install 'apache-airflow[docker]'`` | Docker hooks and operators | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| elasticsearch | ``pip install 'apache-airflow[elasticsearch]'`` | Elasticsearch hooks and Log Handler | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| kubernetes | ``pip install 'apache-airflow[cncf.kubernetes]'`` | Kubernetes Executor and operator | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| mongo | ``pip install 'apache-airflow[mongo]'`` | Mongo hooks and operators | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| mssql (deprecated) | ``pip install 'apache-airflow[microsoft.mssql]'`` | Microsoft SQL Server operators and hook, | +| | | support as an Airflow backend. Uses pymssql. | +| | | Will be replaced by subpackage ``odbc``. | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| mysql | ``pip install 'apache-airflow[mysql]'`` | MySQL operators and hook, support as an Airflow | +| | | backend. The version of MySQL server has to be | +| | | 5.6.4+. The exact version upper bound depends | +| | | on version of ``mysqlclient`` package. For | +| | | example, ``mysqlclient`` 1.3.12 can only be | +| | | used with MySQL server 5.6.4 through 5.7. | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| oracle | ``pip install 'apache-airflow[oracle]'`` | Oracle hooks and operators | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| pinot | ``pip install 'apache-airflow[pinot]'`` | Pinot DB hook | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| postgres | ``pip install 'apache-airflow[postgres]'`` | PostgreSQL operators and hook, support as an | +| | | Airflow backend | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| rabbitmq | ``pip install 'apache-airflow[rabbitmq]'`` | RabbitMQ support as a Celery backend | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| redis | ``pip install 'apache-airflow[redis]'`` | Redis hooks and sensors | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| samba | ``pip install 'apache-airflow[samba]'`` | :class:`airflow.providers.apache.hive.transfers.hive_to_samba.HiveToSambaOperator` | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| statsd | ``pip install 'apache-airflow[statsd]'`` | Needed by StatsD metrics | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ +| virtualenv | ``pip install 'apache-airflow[virtualenv]'`` | | ++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+ + + +**Other:** + +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| hashicorp | ``pip install 'apache-airflow[hashicorp]'`` | Hashicorp Services (Vault) | +| subpackage | install command | enables | ++=====================+=====================================================+======================================================================+ +| cgroups | ``pip install 'apache-airflow[cgroups]'`` | Needed To use CgroupTaskRunner | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| hdfs | ``pip install 'apache-airflow[hdfs]'`` | HDFS hooks and operators | +| crypto | ``pip install 'apache-airflow[crypto]'`` | Cryptography libraries | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| hive | ``pip install 'apache-airflow[hive]'`` | All Hive related operators | +| grpc | ``pip install 'apache-airflow[grpc]'`` | Grpc hooks and operators | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ | jdbc | ``pip install 'apache-airflow[jdbc]'`` | JDBC hooks and operators | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ | kerberos | ``pip install 'apache-airflow[kerberos]'`` | Kerberos integration for Kerberized Hadoop | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| kubernetes | ``pip install 'apache-airflow[kubernetes]'`` | Kubernetes Executor and operator | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ | ldap | ``pip install 'apache-airflow[ldap]'`` | LDAP authentication for users | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| mssql | ``pip install 'apache-airflow[mssql]'`` | Microsoft SQL Server operators and hook, | -| | | support as an Airflow backend | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| mysql | ``pip install 'apache-airflow[mysql]'`` | MySQL operators and hook, support as an Airflow | -| | | backend. The version of MySQL server has to be | -| | | 5.6.4+. The exact version upper bound depends | -| | | on version of ``mysqlclient`` package. For | -| | | example, ``mysqlclient`` 1.3.12 can only be | -| | | used with MySQL server 5.6.4 through 5.7. | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| oracle | ``pip install 'apache-airflow[oracle]'`` | Oracle hooks and operators | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| password | ``pip install 'apache-airflow[password]'`` | Password authentication for users | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| postgres | ``pip install 'apache-airflow[postgres]'`` | PostgreSQL operators and hook, support as an | -| | | Airflow backend | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| presto | ``pip install 'apache-airflow[presto]'`` | All Presto related operators & hooks | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| qds | ``pip install 'apache-airflow[qds]'`` | Enable QDS (Qubole Data Service) support | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| rabbitmq | ``pip install 'apache-airflow[rabbitmq]'`` | RabbitMQ support as a Celery backend | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| redis | ``pip install 'apache-airflow[redis]'`` | Redis hooks and sensors | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| samba | ``pip install apache-airflow[samba]'`` | :class:`airflow.operators.hive_to_samba_operator.Hive2SambaOperator` | -+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| slack | ``pip install 'apache-airflow[slack']`` | :class:`airflow.operators.slack_operator.SlackAPIOperator` | +| papermill | ``pip install 'apache-airflow[papermill]'`` | Papermill hooks and operators | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ | ssh | ``pip install 'apache-airflow[ssh]'`` | SSH hooks and Operator | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ -| vertica | ``pip install 'apache-airflow[vertica]'`` | Vertica hook support as an Airflow backend | +| winrm | ``pip install 'apache-airflow[microsoft.winrm]'`` | WinRM hooks and operators | +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+ Initializing Airflow Database @@ -200,4 +300,4 @@ run tasks: .. code-block:: bash - airflow initdb + airflow db init diff --git a/scripts/ci/pre_commit/pre_commit_check_setup_installation.py b/scripts/ci/pre_commit/pre_commit_check_setup_installation.py new file mode 100755 index 0000000..2fdeca6 --- /dev/null +++ b/scripts/ci/pre_commit/pre_commit_check_setup_installation.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Checks if all the libraries in setup.py are listed in installation.rst file +""" + +import os +import re +from os.path import dirname +from typing import Dict, List + +AIRFLOW_SOURCES_DIR = os.path.join(dirname(__file__), os.pardir, os.pardir, os.pardir) +SETUP_PY_FILE = 'setup.py' +DOCS_FILE = 'installation.rst' +PY_IDENTIFIER = r'[a-zA-Z_][a-zA-Z0-9_\.]*' + + +def get_file_content(*path_elements: str) -> str: + file_path = os.path.join(AIRFLOW_SOURCES_DIR, *path_elements) + with open(file_path) as file_to_read: + return file_to_read.read() + + +def get_extras_from_setup() -> Dict[str, List[str]]: + """ + Returns an array EXTRAS_REQUIREMENTS with aliases from setup.py file in format: + {'package name': ['alias1', 'alias2], ...} + """ + setup_content = get_file_content(SETUP_PY_FILE) + + extras_section_regex = re.compile( + r'^EXTRAS_REQUIREMENTS[^{]+{([^}]+)}', re.MULTILINE) + extras_section = extras_section_regex.findall(setup_content)[0] + + extras_regex = re.compile( + rf'^\s+[\"\']({PY_IDENTIFIER})[\"\']:\s*({PY_IDENTIFIER})[^#\n]*(#\s*TODO.*)?$', re.MULTILINE) + + extras_dict: Dict[str, List[str]] = {} + for extras in extras_regex.findall(extras_section): + package = extras[1] + alias = extras[0] + if not extras_dict.get(package): + extras_dict[package] = [] + extras_dict[package].append(alias) + return extras_dict + + +def get_extras_from_docs() -> List[str]: + """ + Returns an array of install packages names from installation.rst. + """ + docs_content = get_file_content('docs', DOCS_FILE) + + extras_section_regex = re.compile(rf'^\|[^|]+\|.*pip install .apache-airflow\[({PY_IDENTIFIER})\].', + re.MULTILINE) + extras = extras_section_regex.findall(docs_content) + + extras = list(filter(lambda entry: entry != 'all', extras)) + return extras + + +if __name__ == '__main__': + setup_packages = get_extras_from_setup() + docs_packages = get_extras_from_docs() + + output_table = "" + + for extras in sorted(setup_packages.keys()): + if not set(setup_packages[extras]).intersection(docs_packages): + output_table += "| {:20} | {:^10} | {:^10} |\n".format(extras, "V", "") + + setup_packages_str = str(setup_packages) + for extras in sorted(docs_packages): + if f"'{extras}'" not in setup_packages_str: + output_table += "| {:20} | {:^10} | {:^10} |\n".format(extras, "", "V") + + if(output_table == ""): + exit(0) + + print(f""" +ERROR + +"EXTRAS_REQUIREMENTS" section in {SETUP_PY_FILE} should be synchronized +with "Extra Packages" section in documentation file doc/{DOCS_FILE}. + +here is a list of packages that are used but are not documented, or +documented although not used. + """) + print(".{:_^22}.{:_^12}.{:_^12}.".format("NAME", "SETUP", "INSTALLATION")) + print(output_table) + + exit(1) diff --git a/setup.py b/setup.py index 3ace091..9a7e732 100644 --- a/setup.py +++ b/setup.py @@ -484,6 +484,15 @@ else: EXTRAS_REQUIREMENTS = { 'all': devel_all, 'all_dbs': all_dbs, + 'amazon': aws, + 'apache.atlas': atlas, + "apache.cassandra": cassandra, + "apache.druid": druid, + "apache.hdfs": hdfs, + "apache.hive": hive, + "apache.pinot": pinot, + "apache.presto": presto, + "apache.webhdfs": webhdfs, 'async': async_packages, 'atlas': atlas, 'aws': aws, @@ -497,11 +506,13 @@ EXTRAS_REQUIREMENTS = { 'celery': celery, 'cgroups': cgroups, 'cloudant': cloudant, + 'cncf.kubernetes': kubernetes, 'crypto': crypto, 'dask': dask, 'databricks': databricks, 'datadog': datadog, 'devel': devel_minreq, + 'devel_all': devel_all, 'devel_azure': devel_azure, 'devel_ci': devel_ci, 'devel_hadoop': devel_hadoop, @@ -513,6 +524,7 @@ EXTRAS_REQUIREMENTS = { 'gcp': gcp, 'gcp_api': gcp, 'github_enterprise': flask_oauth, + 'google': gcp, 'google_auth': flask_oauth, 'grpc': grpc, 'hashicorp': hashicorp, @@ -525,6 +537,10 @@ EXTRAS_REQUIREMENTS = { 'ldap': ldap, 'mongo': mongo, 'mssql': mssql, + 'microsoft.azure': + azure_blob_storage + azure_container_instances + azure_cosmos + azure_data_lake + azure_secrets, + 'microsoft.mssql': mssql, + 'microsoft.winrm': winrm, 'mysql': mysql, 'oracle': oracle, 'papermill': papermill,