This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 5a6dcfd865 Refactor docker operator attribute validations and docs 
(#35571)
5a6dcfd865 is described below

commit 5a6dcfd8655c9622f3838a0e66948dc3091afccb
Author: Andrey Anshin <andrey.ans...@taragol.is>
AuthorDate: Sun Nov 12 20:51:27 2023 +0400

    Refactor docker operator attribute validations and docs (#35571)
    
    * Refactor docker operator attribute validations and docs
    
    * Network mode: None -> 'none'
---
 airflow/providers/docker/decorators/docker.py      |  9 +--
 airflow/providers/docker/operators/docker.py       | 84 +++++++++++++---------
 .../decorators/docker.rst                          | 75 ++++++++++++++-----
 docs/spelling_wordlist.txt                         |  2 +
 tests/providers/docker/operators/test_docker.py    | 65 ++++++++++++++++-
 5 files changed, 176 insertions(+), 59 deletions(-)

diff --git a/airflow/providers/docker/decorators/docker.py 
b/airflow/providers/docker/decorators/docker.py
index 153b55d4ab..81ff679f8a 100644
--- a/airflow/providers/docker/decorators/docker.py
+++ b/airflow/providers/docker/decorators/docker.py
@@ -28,14 +28,7 @@ import dill
 
 from airflow.decorators.base import DecoratedOperator, task_decorator_factory
 from airflow.providers.docker.operators.docker import DockerOperator
-
-try:
-    from airflow.utils.decorators import remove_task_decorator
-
-    # This can be removed after we move to Airflow 2.4+
-except ImportError:
-    from airflow.utils.python_virtualenv import remove_task_decorator
-
+from airflow.utils.decorators import remove_task_decorator
 from airflow.utils.python_virtualenv import write_python_script
 
 if TYPE_CHECKING:
diff --git a/airflow/providers/docker/operators/docker.py 
b/airflow/providers/docker/operators/docker.py
index 94f3be9653..b60fd359e4 100644
--- a/airflow/providers/docker/operators/docker.py
+++ b/airflow/providers/docker/operators/docker.py
@@ -33,6 +33,7 @@ from docker.constants import DEFAULT_TIMEOUT_SECONDS
 from docker.errors import APIError
 from docker.types import LogConfig, Mount, Ulimit
 from dotenv import dotenv_values
+from typing_extensions import Literal
 
 from airflow.exceptions import AirflowProviderDeprecationWarning
 from airflow.models import BaseOperator
@@ -96,7 +97,7 @@ class DockerOperator(BaseOperator):
     :param environment: Environment variables to set in the container. 
(templated)
     :param private_environment: Private environment variables to set in the 
container.
         These are not templated, and hidden from the website.
-    :param env_file: Relative path to the .env file with environment variables 
to set in the container.
+    :param env_file: Relative path to the ``.env`` file with environment 
variables to set in the container.
         Overridden by variables in the environment parameter. (templated)
     :param force_pull: Pull the docker image on every run. Default is False.
     :param mem_limit: Maximum amount of memory the container can use.
@@ -104,14 +105,14 @@ class DockerOperator(BaseOperator):
         or a string like ``128m`` or ``1g``.
     :param host_tmp_dir: Specify the location of the temporary directory on 
the host which will
         be mapped to tmp_dir. If not provided defaults to using the standard 
system temp directory.
-    :param network_mode: Network mode for the container.
-        It can be one of the following:
-        bridge - Create new network stack for the container with default 
docker bridge network
-        None - No networking for this container
-        container:<name|id> - Use the network stack of another container 
specified via <name|id>
-        host - Use the host network stack. Incompatible with `port_bindings`
-        '<network-name>|<network-id>' - Connects the container to user created 
network
-        (using `docker network create` command)
+    :param network_mode: Network mode for the container. It can be one of the 
following:
+
+        - ``"bridge"``: Create new network stack for the container with 
default docker bridge network
+        - ``"none"``: No networking for this container
+        - ``"container:<name|id>"``: Use the network stack of another 
container specified via <name|id>
+        - ``"host"``: Use the host network stack. Incompatible with 
`port_bindings`
+        - ``"<network-name>|<network-id>"``: Connects the container to user 
created network
+          (using ``docker network create`` command)
     :param tls_ca_cert: Path to a PEM-encoded certificate authority
         to secure the docker connection.
     :param tls_client_cert: Path to the PEM-encoded certificate
@@ -138,9 +139,11 @@ class DockerOperator(BaseOperator):
     :param docker_conn_id: The :ref:`Docker connection id 
<howto/connection:docker>`
     :param dns: Docker custom DNS servers
     :param dns_search: Docker custom DNS search domain
-    :param auto_remove: Auto-removal of the container on daemon side when the
-        container's process exits.
-        The default is never.
+    :param auto_remove: Enable removal of the container when the container's 
process exits. Possible values:
+
+        - ``never``: (default) do not remove container
+        - ``success``: remove on success
+        - ``force``: always remove container
     :param shm_size: Size of ``/dev/shm`` in bytes. The size must be
         greater than 0. If omitted uses system default.
     :param tty: Allocate pseudo-TTY to the container
@@ -148,6 +151,8 @@ class DockerOperator(BaseOperator):
     :param hostname: Optional hostname for the container.
     :param privileged: Give extended privileges to this container.
     :param cap_add: Include container capabilities
+    :param extra_hosts: Additional hostnames to resolve inside the container,
+        as a mapping of hostname to IP address.
     :param retrieve_output: Should this docker image consistently attempt to 
pull from and output
         file before manually shutting down the image. Useful for cases where 
users want a pickle serialized
         output that is not posted to logs
@@ -166,11 +171,15 @@ class DockerOperator(BaseOperator):
     :param port_bindings: Publish a container's port(s) to the host. It is a
         dictionary of value where the key indicates the port to open inside 
the container
         and value indicates the host port that binds to the container port.
-        Incompatible with ``host`` in ``network_mode``.
+        Incompatible with ``"host"`` in ``network_mode``.
     :param ulimits: List of ulimit options to set for the container. Each item 
should
         be a :py:class:`docker.types.Ulimit` instance.
     """
 
+    # !!! Changes in DockerOperator's arguments should be also reflected in !!!
+    #  - docs/apache-airflow-providers-docker/decorators/docker.rst
+    #  - airflow/decorators/__init__.pyi  (by a separate PR)
+
     template_fields: Sequence[str] = ("image", "command", "environment", 
"env_file", "container_name")
     template_fields_renderers = {"env_file": "yaml"}
     template_ext: Sequence[str] = (
@@ -211,7 +220,7 @@ class DockerOperator(BaseOperator):
         docker_conn_id: str | None = None,
         dns: list[str] | None = None,
         dns_search: list[str] | None = None,
-        auto_remove: str = "never",
+        auto_remove: Literal["never", "success", "force"] = "never",
         shm_size: int | None = None,
         tty: bool = False,
         hostname: str | None = None,
@@ -225,28 +234,43 @@ class DockerOperator(BaseOperator):
         log_opts_max_size: str | None = None,
         log_opts_max_file: str | None = None,
         ipc_mode: str | None = None,
-        skip_exit_code: int | None = None,
         skip_on_exit_code: int | Container[int] | None = None,
         port_bindings: dict | None = None,
         ulimits: list[Ulimit] | None = None,
         **kwargs,
     ) -> None:
-        super().__init__(**kwargs)
-        self.api_version = api_version
+        if skip_exit_code := kwargs.pop("skip_exit_code", None):
+            warnings.warn(
+                "`skip_exit_code` is deprecated and will be removed in the 
future. "
+                "Please use `skip_on_exit_code` instead.",
+                AirflowProviderDeprecationWarning,
+                stacklevel=2,
+            )
+            if skip_on_exit_code is not None and skip_exit_code != 
skip_on_exit_code:
+                msg = (
+                    f"Conflicting `skip_on_exit_code` provided, "
+                    f"skip_on_exit_code={skip_on_exit_code!r}, 
skip_exit_code={skip_exit_code!r}."
+                )
+                raise ValueError(msg)
+            skip_on_exit_code = skip_exit_code
         if isinstance(auto_remove, bool):
             warnings.warn(
-                "bool value for auto_remove is deprecated, please use 'never', 
'success', or 'force' instead",
+                "bool value for `auto_remove` is deprecated and will be 
removed in the future. "
+                "Please use 'never', 'success', or 'force' instead",
                 AirflowProviderDeprecationWarning,
                 stacklevel=2,
             )
-        if str(auto_remove) == "False":
-            self.auto_remove = "never"
-        elif str(auto_remove) == "True":
-            self.auto_remove = "success"
-        elif str(auto_remove) in ("never", "success", "force"):
-            self.auto_remove = auto_remove
-        else:
-            raise ValueError("unsupported auto_remove option, use 'never', 
'success', or 'force' instead")
+            auto_remove = "success" if auto_remove else "never"
+
+        super().__init__(**kwargs)
+        self.api_version = api_version
+        if not auto_remove or auto_remove not in ("never", "success", "force"):
+            msg = (
+                f"Invalid `auto_remove` value {auto_remove!r}, "
+                "expected one of 'never', 'success', or 'force'."
+            )
+            raise ValueError(msg)
+        self.auto_remove = auto_remove
         self.command = command
         self.container_name = container_name
         self.cpus = cpus
@@ -291,14 +315,6 @@ class DockerOperator(BaseOperator):
         self.log_opts_max_size = log_opts_max_size
         self.log_opts_max_file = log_opts_max_file
         self.ipc_mode = ipc_mode
-        if skip_exit_code is not None:
-            warnings.warn(
-                "skip_exit_code is deprecated. Please use skip_on_exit_code",
-                AirflowProviderDeprecationWarning,
-                stacklevel=2,
-            )
-            skip_on_exit_code = skip_exit_code
-
         self.skip_on_exit_code = (
             skip_on_exit_code
             if isinstance(skip_on_exit_code, Container)
diff --git a/docs/apache-airflow-providers-docker/decorators/docker.rst 
b/docs/apache-airflow-providers-docker/decorators/docker.rst
index 809e6d0bc1..1ef2af81d7 100644
--- a/docs/apache-airflow-providers-docker/decorators/docker.rst
+++ b/docs/apache-airflow-providers-docker/decorators/docker.rst
@@ -32,7 +32,7 @@ The following parameters are supported in Docker Task 
decorator.
 
 multiple_outputs
     If set, function return value will be unrolled to multiple XCom values.
-        Dict will unroll to XCom values with keys as XCom keys. Defaults to 
False.
+    Dict will unroll to XCom values with keys as XCom keys. Defaults to False.
 use_dill
     Whether to use dill or pickle for serialization
 python_command
@@ -44,7 +44,7 @@ api_version
     Remote API version. Set to ``auto`` to automatically detect the server's 
version.
 container_name
     Name of the container. Optional (templated)
-cpus:
+cpus
     Number of CPUs to assign to the container. This value gets multiplied with 
1024.
 docker_url
     URL of the host running the docker daemon.
@@ -54,6 +54,9 @@ environment
 private_environment
     Private environment variables to set in the container.
     These are not templated, and hidden from the website.
+env_file
+    Relative path to the ``.env`` file with environment variables to set in 
the container.
+    Overridden by variables in the environment parameter.
 force_pull
     Pull the docker image on every run. Default is False.
 mem_limit
@@ -64,29 +67,27 @@ host_tmp_dir
     Specify the location of the temporary directory on the host which will
     be mapped to tmp_dir. If not provided defaults to using the standard 
system temp directory.
 network_mode
-    Network mode for the container.
+    Network mode for the container. It can be one of the following
 
-    It can be one of the following:
-        bridge
-            Create new network stack for the container with default docker 
bridge network
-        'None'
-            No networking for this container
-        container:<name> or <id>
-            Use the network stack of another container specified via <name> or 
<id>
-        host
-            Use the host network stack. Incompatible with `port_bindings`
-        '<network-name>' or '<network-id>'
-            Connects the container to user created network(using `docker 
network create` command)
+    - ``"bridge"``: Create new network stack for the container with default 
docker bridge network
+    - ``"none"``: No networking for this container
+    - ``"container:<name>"`` or ``"container:<id>"``: Use the network stack of 
another container specified via <name> or <id>
+    - ``"host"``: Use the host network stack. Incompatible with 
**port_bindings**
+    - ``"<network-name>"`` or ``"<network-id>"``: Connects the container to 
user created network (using ``docker network create`` command)
 tls_ca_cert
     Path to a PEM-encoded certificate authority to secure the docker 
connection.
 tls_client_cert
     Path to the PEM-encoded certificate used to authenticate docker client.
 tls_client_key
     Path to the PEM-encoded key used to authenticate docker client.
+tls_verify
+    Set ``True`` to verify the validity of the provided certificate.
 tls_hostname
     Hostname to match against the docker server certificate or False to 
disable the check.
 tls_ssl_version
     Version of SSL to use when communicating with docker daemon.
+mount_tmp_dir
+    Specify whether the temporary directory should be bind-mounted from the 
host to the container.
 tmp_dir
     Mount point inside the container to
     a temporary directory created on the host by the operator.
@@ -99,6 +100,8 @@ mounts
     ``['/host/path:/container/path', '/host/path2:/container/path2:ro']``.
 working_dir
     Working directory to set on the container (equivalent to the -w switch the 
docker client)
+entrypoint
+    Overwrite the default ENTRYPOINT of the image
 xcom_all
     Push all the stdout or just the last line. The default is False (last 
line).
 docker_conn_id
@@ -108,18 +111,58 @@ dns
 dns_search
     Docker custom DNS search domain
 auto_remove
-    Auto-removal of the container on daemon side when the container's process 
exits.
-    The default is False.
+    Enable removal of the container when the container's process exits. 
Possible values
+
+    - ``never``: (default) do not remove container
+    - ``success``: remove on success
+    - ``force``: always remove container
 shm_size
     Size of ``/dev/shm`` in bytes. The size must be greater than 0.
     If omitted uses system default.
 tty
     Allocate pseudo-TTY to the container
     This needs to be set see logs of the Docker container.
+hostname
+    Optional hostname for the container.
 privileged
     Give extended privileges to this container.
 cap_add
     Include container capabilities
+extra_hosts
+    Additional hostnames to resolve inside the container, as a mapping of 
hostname to IP address.
+retrieve_output
+    Should this docker image consistently attempt to pull from and output
+    file before manually shutting down the image. Useful for cases where users 
want a pickle serialized
+    output that is not posted to logs
+retrieve_output_path
+    path for output file that will be retrieved and passed to xcom
+timeout
+    Default timeout for API calls, in seconds.
+device_requests
+    Expose host resources such as GPUs to the container.
+log_opts_max_size
+    The maximum size of the log before it is rolled.
+    A positive integer plus a modifier representing the unit of measure (k, m, 
or g).
+    Eg: 10m or 1g Defaults to -1 (unlimited).
+log_opts_max_file
+    The maximum number of log files that can be present.
+    If rolling the logs creates excess files, the oldest file is removed.
+    Only effective when max-size is also set. A positive integer. Defaults to 
1.
+ipc_mode
+    Set the IPC mode for the container.
+skip_on_exit_code
+    If task exits with this exit code, leave the task
+    in ``skipped`` state (default: None). If set to ``None``, any non-zero
+    exit code will be treated as a failure.
+port_bindings
+    Publish a container's port(s) to the host. It is a
+    dictionary of value where the key indicates the port to open inside the 
container
+    and value indicates the host port that binds to the container port.
+    Incompatible with ``"host"`` in ``network_mode``.
+ulimits
+    List of ulimit options to set for the container.
+    Each item should be a ``docker.types.Ulimit`` instance.
+
 
 Usage Example
 -------------
diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
index 04efc4813b..3979ae3c59 100644
--- a/docs/spelling_wordlist.txt
+++ b/docs/spelling_wordlist.txt
@@ -802,6 +802,7 @@ intvl
 Investorise
 io
 ip
+ipc
 iPython
 irreproducible
 IRSA
@@ -1627,6 +1628,7 @@ ui
 uid
 ukey
 ulimit
+ulimits
 Umask
 umask
 Un
diff --git a/tests/providers/docker/operators/test_docker.py 
b/tests/providers/docker/operators/test_docker.py
index 11b992c4be..5a89a9606a 100644
--- a/tests/providers/docker/operators/test_docker.py
+++ b/tests/providers/docker/operators/test_docker.py
@@ -26,7 +26,7 @@ from docker import APIClient
 from docker.errors import APIError
 from docker.types import DeviceRequest, LogConfig, Mount, Ulimit
 
-from airflow.exceptions import AirflowException, AirflowSkipException
+from airflow.exceptions import AirflowException, 
AirflowProviderDeprecationWarning, AirflowSkipException
 from airflow.providers.docker.exceptions import DockerContainerFailedException
 from airflow.providers.docker.operators.docker import DockerOperator
 
@@ -722,3 +722,66 @@ class TestDockerOperator:
         assert "host_config" in 
self.client_mock.create_container.call_args.kwargs
         assert "ulimits" in 
self.client_mock.create_host_config.call_args.kwargs
         assert ulimits == 
self.client_mock.create_host_config.call_args.kwargs["ulimits"]
+
+    @pytest.mark.parametrize(
+        "auto_remove, expected",
+        [
+            pytest.param(True, "success", id="true"),
+            pytest.param(False, "never", id="false"),
+        ],
+    )
+    def test_bool_auto_remove_fallback(self, auto_remove, expected):
+        with pytest.warns(AirflowProviderDeprecationWarning, match="bool value 
for `auto_remove`"):
+            op = DockerOperator(task_id="test", image="test", 
auto_remove=auto_remove)
+        assert op.auto_remove == expected
+
+    @pytest.mark.parametrize(
+        "auto_remove",
+        ["True", "false", pytest.param(None, id="none"), pytest.param(None, 
id="empty"), "here-and-now"],
+    )
+    def test_auto_remove_invalid(self, auto_remove):
+        with pytest.raises(ValueError, match="Invalid `auto_remove` value"):
+            DockerOperator(task_id="test", image="test", 
auto_remove=auto_remove)
+
+    @pytest.mark.parametrize(
+        "skip_exit_code, skip_on_exit_code, expected",
+        [
+            pytest.param(101, None, [101], id="skip-on-exit-code-not-set"),
+            pytest.param(102, 102, [102], id="skip-on-exit-code-same"),
+        ],
+    )
+    def test_skip_exit_code_fallback(self, skip_exit_code, skip_on_exit_code, 
expected):
+        warning_match = "`skip_exit_code` is deprecated and will be removed in 
the future."
+
+        with pytest.warns(AirflowProviderDeprecationWarning, 
match=warning_match):
+            op = DockerOperator(
+                task_id="test",
+                image="test",
+                skip_exit_code=skip_exit_code,
+                skip_on_exit_code=skip_on_exit_code,
+            )
+            assert op.skip_on_exit_code == expected
+
+    @pytest.mark.parametrize(
+        "skip_exit_code, skip_on_exit_code",
+        [
+            pytest.param(103, 0, id="skip-on-exit-code-zero"),
+            pytest.param(104, 105, id="skip-on-exit-code-not-same"),
+        ],
+    )
+    def test_skip_exit_code_invalid(self, skip_exit_code, skip_on_exit_code):
+        warning_match = "`skip_exit_code` is deprecated and will be removed in 
the future."
+        error_match = "Conflicting `skip_on_exit_code` provided"
+
+        with pytest.warns(AirflowProviderDeprecationWarning, 
match=warning_match):
+            with pytest.raises(ValueError, match=error_match):
+                DockerOperator(task_id="test", image="test", 
skip_exit_code=103, skip_on_exit_code=104)
+
+        with pytest.warns(AirflowProviderDeprecationWarning, 
match=warning_match):
+            with pytest.raises(ValueError, match=error_match):
+                DockerOperator(
+                    task_id="test",
+                    image="test",
+                    skip_exit_code=skip_exit_code,
+                    skip_on_exit_code=skip_on_exit_code,
+                )

Reply via email to