This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push: new 5a6dcfd865 Refactor docker operator attribute validations and docs (#35571) 5a6dcfd865 is described below commit 5a6dcfd8655c9622f3838a0e66948dc3091afccb Author: Andrey Anshin <andrey.ans...@taragol.is> AuthorDate: Sun Nov 12 20:51:27 2023 +0400 Refactor docker operator attribute validations and docs (#35571) * Refactor docker operator attribute validations and docs * Network mode: None -> 'none' --- airflow/providers/docker/decorators/docker.py | 9 +-- airflow/providers/docker/operators/docker.py | 84 +++++++++++++--------- .../decorators/docker.rst | 75 ++++++++++++++----- docs/spelling_wordlist.txt | 2 + tests/providers/docker/operators/test_docker.py | 65 ++++++++++++++++- 5 files changed, 176 insertions(+), 59 deletions(-) diff --git a/airflow/providers/docker/decorators/docker.py b/airflow/providers/docker/decorators/docker.py index 153b55d4ab..81ff679f8a 100644 --- a/airflow/providers/docker/decorators/docker.py +++ b/airflow/providers/docker/decorators/docker.py @@ -28,14 +28,7 @@ import dill from airflow.decorators.base import DecoratedOperator, task_decorator_factory from airflow.providers.docker.operators.docker import DockerOperator - -try: - from airflow.utils.decorators import remove_task_decorator - - # This can be removed after we move to Airflow 2.4+ -except ImportError: - from airflow.utils.python_virtualenv import remove_task_decorator - +from airflow.utils.decorators import remove_task_decorator from airflow.utils.python_virtualenv import write_python_script if TYPE_CHECKING: diff --git a/airflow/providers/docker/operators/docker.py b/airflow/providers/docker/operators/docker.py index 94f3be9653..b60fd359e4 100644 --- a/airflow/providers/docker/operators/docker.py +++ b/airflow/providers/docker/operators/docker.py @@ -33,6 +33,7 @@ from docker.constants import DEFAULT_TIMEOUT_SECONDS from docker.errors import APIError from docker.types import LogConfig, Mount, Ulimit from dotenv import dotenv_values +from typing_extensions import Literal from airflow.exceptions import AirflowProviderDeprecationWarning from airflow.models import BaseOperator @@ -96,7 +97,7 @@ class DockerOperator(BaseOperator): :param environment: Environment variables to set in the container. (templated) :param private_environment: Private environment variables to set in the container. These are not templated, and hidden from the website. - :param env_file: Relative path to the .env file with environment variables to set in the container. + :param env_file: Relative path to the ``.env`` file with environment variables to set in the container. Overridden by variables in the environment parameter. (templated) :param force_pull: Pull the docker image on every run. Default is False. :param mem_limit: Maximum amount of memory the container can use. @@ -104,14 +105,14 @@ class DockerOperator(BaseOperator): or a string like ``128m`` or ``1g``. :param host_tmp_dir: Specify the location of the temporary directory on the host which will be mapped to tmp_dir. If not provided defaults to using the standard system temp directory. - :param network_mode: Network mode for the container. - It can be one of the following: - bridge - Create new network stack for the container with default docker bridge network - None - No networking for this container - container:<name|id> - Use the network stack of another container specified via <name|id> - host - Use the host network stack. Incompatible with `port_bindings` - '<network-name>|<network-id>' - Connects the container to user created network - (using `docker network create` command) + :param network_mode: Network mode for the container. It can be one of the following: + + - ``"bridge"``: Create new network stack for the container with default docker bridge network + - ``"none"``: No networking for this container + - ``"container:<name|id>"``: Use the network stack of another container specified via <name|id> + - ``"host"``: Use the host network stack. Incompatible with `port_bindings` + - ``"<network-name>|<network-id>"``: Connects the container to user created network + (using ``docker network create`` command) :param tls_ca_cert: Path to a PEM-encoded certificate authority to secure the docker connection. :param tls_client_cert: Path to the PEM-encoded certificate @@ -138,9 +139,11 @@ class DockerOperator(BaseOperator): :param docker_conn_id: The :ref:`Docker connection id <howto/connection:docker>` :param dns: Docker custom DNS servers :param dns_search: Docker custom DNS search domain - :param auto_remove: Auto-removal of the container on daemon side when the - container's process exits. - The default is never. + :param auto_remove: Enable removal of the container when the container's process exits. Possible values: + + - ``never``: (default) do not remove container + - ``success``: remove on success + - ``force``: always remove container :param shm_size: Size of ``/dev/shm`` in bytes. The size must be greater than 0. If omitted uses system default. :param tty: Allocate pseudo-TTY to the container @@ -148,6 +151,8 @@ class DockerOperator(BaseOperator): :param hostname: Optional hostname for the container. :param privileged: Give extended privileges to this container. :param cap_add: Include container capabilities + :param extra_hosts: Additional hostnames to resolve inside the container, + as a mapping of hostname to IP address. :param retrieve_output: Should this docker image consistently attempt to pull from and output file before manually shutting down the image. Useful for cases where users want a pickle serialized output that is not posted to logs @@ -166,11 +171,15 @@ class DockerOperator(BaseOperator): :param port_bindings: Publish a container's port(s) to the host. It is a dictionary of value where the key indicates the port to open inside the container and value indicates the host port that binds to the container port. - Incompatible with ``host`` in ``network_mode``. + Incompatible with ``"host"`` in ``network_mode``. :param ulimits: List of ulimit options to set for the container. Each item should be a :py:class:`docker.types.Ulimit` instance. """ + # !!! Changes in DockerOperator's arguments should be also reflected in !!! + # - docs/apache-airflow-providers-docker/decorators/docker.rst + # - airflow/decorators/__init__.pyi (by a separate PR) + template_fields: Sequence[str] = ("image", "command", "environment", "env_file", "container_name") template_fields_renderers = {"env_file": "yaml"} template_ext: Sequence[str] = ( @@ -211,7 +220,7 @@ class DockerOperator(BaseOperator): docker_conn_id: str | None = None, dns: list[str] | None = None, dns_search: list[str] | None = None, - auto_remove: str = "never", + auto_remove: Literal["never", "success", "force"] = "never", shm_size: int | None = None, tty: bool = False, hostname: str | None = None, @@ -225,28 +234,43 @@ class DockerOperator(BaseOperator): log_opts_max_size: str | None = None, log_opts_max_file: str | None = None, ipc_mode: str | None = None, - skip_exit_code: int | None = None, skip_on_exit_code: int | Container[int] | None = None, port_bindings: dict | None = None, ulimits: list[Ulimit] | None = None, **kwargs, ) -> None: - super().__init__(**kwargs) - self.api_version = api_version + if skip_exit_code := kwargs.pop("skip_exit_code", None): + warnings.warn( + "`skip_exit_code` is deprecated and will be removed in the future. " + "Please use `skip_on_exit_code` instead.", + AirflowProviderDeprecationWarning, + stacklevel=2, + ) + if skip_on_exit_code is not None and skip_exit_code != skip_on_exit_code: + msg = ( + f"Conflicting `skip_on_exit_code` provided, " + f"skip_on_exit_code={skip_on_exit_code!r}, skip_exit_code={skip_exit_code!r}." + ) + raise ValueError(msg) + skip_on_exit_code = skip_exit_code if isinstance(auto_remove, bool): warnings.warn( - "bool value for auto_remove is deprecated, please use 'never', 'success', or 'force' instead", + "bool value for `auto_remove` is deprecated and will be removed in the future. " + "Please use 'never', 'success', or 'force' instead", AirflowProviderDeprecationWarning, stacklevel=2, ) - if str(auto_remove) == "False": - self.auto_remove = "never" - elif str(auto_remove) == "True": - self.auto_remove = "success" - elif str(auto_remove) in ("never", "success", "force"): - self.auto_remove = auto_remove - else: - raise ValueError("unsupported auto_remove option, use 'never', 'success', or 'force' instead") + auto_remove = "success" if auto_remove else "never" + + super().__init__(**kwargs) + self.api_version = api_version + if not auto_remove or auto_remove not in ("never", "success", "force"): + msg = ( + f"Invalid `auto_remove` value {auto_remove!r}, " + "expected one of 'never', 'success', or 'force'." + ) + raise ValueError(msg) + self.auto_remove = auto_remove self.command = command self.container_name = container_name self.cpus = cpus @@ -291,14 +315,6 @@ class DockerOperator(BaseOperator): self.log_opts_max_size = log_opts_max_size self.log_opts_max_file = log_opts_max_file self.ipc_mode = ipc_mode - if skip_exit_code is not None: - warnings.warn( - "skip_exit_code is deprecated. Please use skip_on_exit_code", - AirflowProviderDeprecationWarning, - stacklevel=2, - ) - skip_on_exit_code = skip_exit_code - self.skip_on_exit_code = ( skip_on_exit_code if isinstance(skip_on_exit_code, Container) diff --git a/docs/apache-airflow-providers-docker/decorators/docker.rst b/docs/apache-airflow-providers-docker/decorators/docker.rst index 809e6d0bc1..1ef2af81d7 100644 --- a/docs/apache-airflow-providers-docker/decorators/docker.rst +++ b/docs/apache-airflow-providers-docker/decorators/docker.rst @@ -32,7 +32,7 @@ The following parameters are supported in Docker Task decorator. multiple_outputs If set, function return value will be unrolled to multiple XCom values. - Dict will unroll to XCom values with keys as XCom keys. Defaults to False. + Dict will unroll to XCom values with keys as XCom keys. Defaults to False. use_dill Whether to use dill or pickle for serialization python_command @@ -44,7 +44,7 @@ api_version Remote API version. Set to ``auto`` to automatically detect the server's version. container_name Name of the container. Optional (templated) -cpus: +cpus Number of CPUs to assign to the container. This value gets multiplied with 1024. docker_url URL of the host running the docker daemon. @@ -54,6 +54,9 @@ environment private_environment Private environment variables to set in the container. These are not templated, and hidden from the website. +env_file + Relative path to the ``.env`` file with environment variables to set in the container. + Overridden by variables in the environment parameter. force_pull Pull the docker image on every run. Default is False. mem_limit @@ -64,29 +67,27 @@ host_tmp_dir Specify the location of the temporary directory on the host which will be mapped to tmp_dir. If not provided defaults to using the standard system temp directory. network_mode - Network mode for the container. + Network mode for the container. It can be one of the following - It can be one of the following: - bridge - Create new network stack for the container with default docker bridge network - 'None' - No networking for this container - container:<name> or <id> - Use the network stack of another container specified via <name> or <id> - host - Use the host network stack. Incompatible with `port_bindings` - '<network-name>' or '<network-id>' - Connects the container to user created network(using `docker network create` command) + - ``"bridge"``: Create new network stack for the container with default docker bridge network + - ``"none"``: No networking for this container + - ``"container:<name>"`` or ``"container:<id>"``: Use the network stack of another container specified via <name> or <id> + - ``"host"``: Use the host network stack. Incompatible with **port_bindings** + - ``"<network-name>"`` or ``"<network-id>"``: Connects the container to user created network (using ``docker network create`` command) tls_ca_cert Path to a PEM-encoded certificate authority to secure the docker connection. tls_client_cert Path to the PEM-encoded certificate used to authenticate docker client. tls_client_key Path to the PEM-encoded key used to authenticate docker client. +tls_verify + Set ``True`` to verify the validity of the provided certificate. tls_hostname Hostname to match against the docker server certificate or False to disable the check. tls_ssl_version Version of SSL to use when communicating with docker daemon. +mount_tmp_dir + Specify whether the temporary directory should be bind-mounted from the host to the container. tmp_dir Mount point inside the container to a temporary directory created on the host by the operator. @@ -99,6 +100,8 @@ mounts ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``. working_dir Working directory to set on the container (equivalent to the -w switch the docker client) +entrypoint + Overwrite the default ENTRYPOINT of the image xcom_all Push all the stdout or just the last line. The default is False (last line). docker_conn_id @@ -108,18 +111,58 @@ dns dns_search Docker custom DNS search domain auto_remove - Auto-removal of the container on daemon side when the container's process exits. - The default is False. + Enable removal of the container when the container's process exits. Possible values + + - ``never``: (default) do not remove container + - ``success``: remove on success + - ``force``: always remove container shm_size Size of ``/dev/shm`` in bytes. The size must be greater than 0. If omitted uses system default. tty Allocate pseudo-TTY to the container This needs to be set see logs of the Docker container. +hostname + Optional hostname for the container. privileged Give extended privileges to this container. cap_add Include container capabilities +extra_hosts + Additional hostnames to resolve inside the container, as a mapping of hostname to IP address. +retrieve_output + Should this docker image consistently attempt to pull from and output + file before manually shutting down the image. Useful for cases where users want a pickle serialized + output that is not posted to logs +retrieve_output_path + path for output file that will be retrieved and passed to xcom +timeout + Default timeout for API calls, in seconds. +device_requests + Expose host resources such as GPUs to the container. +log_opts_max_size + The maximum size of the log before it is rolled. + A positive integer plus a modifier representing the unit of measure (k, m, or g). + Eg: 10m or 1g Defaults to -1 (unlimited). +log_opts_max_file + The maximum number of log files that can be present. + If rolling the logs creates excess files, the oldest file is removed. + Only effective when max-size is also set. A positive integer. Defaults to 1. +ipc_mode + Set the IPC mode for the container. +skip_on_exit_code + If task exits with this exit code, leave the task + in ``skipped`` state (default: None). If set to ``None``, any non-zero + exit code will be treated as a failure. +port_bindings + Publish a container's port(s) to the host. It is a + dictionary of value where the key indicates the port to open inside the container + and value indicates the host port that binds to the container port. + Incompatible with ``"host"`` in ``network_mode``. +ulimits + List of ulimit options to set for the container. + Each item should be a ``docker.types.Ulimit`` instance. + Usage Example ------------- diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 04efc4813b..3979ae3c59 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -802,6 +802,7 @@ intvl Investorise io ip +ipc iPython irreproducible IRSA @@ -1627,6 +1628,7 @@ ui uid ukey ulimit +ulimits Umask umask Un diff --git a/tests/providers/docker/operators/test_docker.py b/tests/providers/docker/operators/test_docker.py index 11b992c4be..5a89a9606a 100644 --- a/tests/providers/docker/operators/test_docker.py +++ b/tests/providers/docker/operators/test_docker.py @@ -26,7 +26,7 @@ from docker import APIClient from docker.errors import APIError from docker.types import DeviceRequest, LogConfig, Mount, Ulimit -from airflow.exceptions import AirflowException, AirflowSkipException +from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning, AirflowSkipException from airflow.providers.docker.exceptions import DockerContainerFailedException from airflow.providers.docker.operators.docker import DockerOperator @@ -722,3 +722,66 @@ class TestDockerOperator: assert "host_config" in self.client_mock.create_container.call_args.kwargs assert "ulimits" in self.client_mock.create_host_config.call_args.kwargs assert ulimits == self.client_mock.create_host_config.call_args.kwargs["ulimits"] + + @pytest.mark.parametrize( + "auto_remove, expected", + [ + pytest.param(True, "success", id="true"), + pytest.param(False, "never", id="false"), + ], + ) + def test_bool_auto_remove_fallback(self, auto_remove, expected): + with pytest.warns(AirflowProviderDeprecationWarning, match="bool value for `auto_remove`"): + op = DockerOperator(task_id="test", image="test", auto_remove=auto_remove) + assert op.auto_remove == expected + + @pytest.mark.parametrize( + "auto_remove", + ["True", "false", pytest.param(None, id="none"), pytest.param(None, id="empty"), "here-and-now"], + ) + def test_auto_remove_invalid(self, auto_remove): + with pytest.raises(ValueError, match="Invalid `auto_remove` value"): + DockerOperator(task_id="test", image="test", auto_remove=auto_remove) + + @pytest.mark.parametrize( + "skip_exit_code, skip_on_exit_code, expected", + [ + pytest.param(101, None, [101], id="skip-on-exit-code-not-set"), + pytest.param(102, 102, [102], id="skip-on-exit-code-same"), + ], + ) + def test_skip_exit_code_fallback(self, skip_exit_code, skip_on_exit_code, expected): + warning_match = "`skip_exit_code` is deprecated and will be removed in the future." + + with pytest.warns(AirflowProviderDeprecationWarning, match=warning_match): + op = DockerOperator( + task_id="test", + image="test", + skip_exit_code=skip_exit_code, + skip_on_exit_code=skip_on_exit_code, + ) + assert op.skip_on_exit_code == expected + + @pytest.mark.parametrize( + "skip_exit_code, skip_on_exit_code", + [ + pytest.param(103, 0, id="skip-on-exit-code-zero"), + pytest.param(104, 105, id="skip-on-exit-code-not-same"), + ], + ) + def test_skip_exit_code_invalid(self, skip_exit_code, skip_on_exit_code): + warning_match = "`skip_exit_code` is deprecated and will be removed in the future." + error_match = "Conflicting `skip_on_exit_code` provided" + + with pytest.warns(AirflowProviderDeprecationWarning, match=warning_match): + with pytest.raises(ValueError, match=error_match): + DockerOperator(task_id="test", image="test", skip_exit_code=103, skip_on_exit_code=104) + + with pytest.warns(AirflowProviderDeprecationWarning, match=warning_match): + with pytest.raises(ValueError, match=error_match): + DockerOperator( + task_id="test", + image="test", + skip_exit_code=skip_exit_code, + skip_on_exit_code=skip_on_exit_code, + )