This is an automated email from the ASF dual-hosted git repository.

kaxil pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 007ce9d11eb Add Snowflake Workload Identity Federation (WIF) support 
(#68107)
007ce9d11eb is described below

commit 007ce9d11eb2d5ff73a5a99b0eeb2bcfe6723cfb
Author: Kaxil Naik <[email protected]>
AuthorDate: Sat Jun 6 02:50:58 2026 +0100

    Add Snowflake Workload Identity Federation (WIF) support (#68107)
    
    SnowflakeHook accepted authenticator=WORKLOAD_IDENTITY but dropped the
    workload_identity_provider param, so the connector rejected the connect
    with "251017: workload_identity_provider must be set". Forward the param
    from the connection extra so keyless Workload Identity Federation works.
    
    One value (AWS, AZURE, GCP or OIDC) covers all clouds. Also expose it as
    a connection form widget, document it, and bump snowflake-connector-python
    to >=3.17.0 (the first version with WIF).
    
    Closes: https://github.com/apache/airflow/issues/54983
    
    * Forward OIDC token for Snowflake WORKLOAD_IDENTITY auth
    
    AWS, AZURE and GCP fetch the workload identity token from the cloud
    metadata service, but the OIDC provider requires the caller to supply
    the token. Without it the connector raises "token must be provided if
    workload_identity_provider=OIDC". Forward ``token`` (inline JWT) and
    ``token_file_path`` (a file the connector reads, suited to rotated
    tokens) from the connection extra, and document both.
---
 docs/spelling_wordlist.txt                         |  1 +
 generated/provider_dependencies.json               |  2 +-
 generated/provider_dependencies.json.sha256sum     |  2 +-
 providers/snowflake/README.rst                     |  2 +-
 providers/snowflake/docs/connections/snowflake.rst | 42 +++++++++-
 providers/snowflake/docs/index.rst                 |  2 +-
 providers/snowflake/pyproject.toml                 |  3 +-
 .../airflow/providers/snowflake/hooks/snowflake.py | 23 +++++-
 .../tests/unit/snowflake/hooks/test_snowflake.py   | 95 ++++++++++++++++++++++
 uv.lock                                            |  2 +-
 10 files changed, 166 insertions(+), 8 deletions(-)

diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
index 202d24f00a3..00c557d1eb2 100644
--- a/docs/spelling_wordlist.txt
+++ b/docs/spelling_wordlist.txt
@@ -1140,6 +1140,7 @@ observability
 od
 odbc
 odps
+OIDC
 ok
 oklch
 Okta
diff --git a/generated/provider_dependencies.json 
b/generated/provider_dependencies.json
index ebe2ce88b40..d9c5dac2752 100644
--- a/generated/provider_dependencies.json
+++ b/generated/provider_dependencies.json
@@ -1789,7 +1789,7 @@
       "pyarrow>=18.0.0; python_version >= '3.13' and python_version < '3.14'",
       "pyarrow>=22.0.0; python_version >= '3.14'",
       "setuptools>=80.0.0,<9999",
-      "snowflake-connector-python>=3.16.0",
+      "snowflake-connector-python>=3.17.0",
       "snowflake-snowpark-python>=1.17.0,<9999;python_version<'3.12'",
       "snowflake-snowpark-python>=1.27.0,<9999;python_version>='3.12' and 
python_version<'3.14'",
       "snowflake-sqlalchemy>=1.7.0"
diff --git a/generated/provider_dependencies.json.sha256sum 
b/generated/provider_dependencies.json.sha256sum
index dffaafcb16b..8a645f848dd 100644
--- a/generated/provider_dependencies.json.sha256sum
+++ b/generated/provider_dependencies.json.sha256sum
@@ -1 +1 @@
-e7408d69a7c8076d9e114aea31c0240d4dcf577977ed0de1b2f0dec8a7251a91
+86e39c620f3926c99e1c702a496d6161032e1a3ac69eba7da10214a2c4ba24f1
diff --git a/providers/snowflake/README.rst b/providers/snowflake/README.rst
index 68d72c3b2e2..50c7567a7c5 100644
--- a/providers/snowflake/README.rst
+++ b/providers/snowflake/README.rst
@@ -62,7 +62,7 @@ PIP package                                 Version required
 ``pyarrow``                                 ``>=16.1.0; python_version < 
"3.13"``
 ``pyarrow``                                 ``>=18.0.0; python_version >= 
"3.13" and python_version < "3.14"``
 ``pyarrow``                                 ``>=22.0.0; python_version >= 
"3.14"``
-``snowflake-connector-python``              ``>=3.16.0``
+``snowflake-connector-python``              ``>=3.17.0``
 ``snowflake-sqlalchemy``                    ``>=1.7.0``
 ``snowflake-snowpark-python``               ``>=1.17.0,<9999; python_version < 
"3.12"``
 ``snowflake-snowpark-python``               ``>=1.27.0,<9999; python_version 
>= "3.12" and python_version < "3.14"``
diff --git a/providers/snowflake/docs/connections/snowflake.rst 
b/providers/snowflake/docs/connections/snowflake.rst
index ba73dd4a5b9..c4e6eb2f0fc 100644
--- a/providers/snowflake/docs/connections/snowflake.rst
+++ b/providers/snowflake/docs/connections/snowflake.rst
@@ -58,9 +58,24 @@ Extra (optional)
     * ``region``: Warehouse region.
     * ``warehouse``: Snowflake warehouse name.
     * ``role``: Snowflake role.
-    * ``authenticator``: To connect using OAuth set this parameter ``oauth``. 
For Programmatic Access
+    * ``authenticator``: To connect using OAuth set this parameter ``oauth``. 
To connect without a stored secret using
+      `Workload Identity Federation 
<https://docs.snowflake.com/en/user-guide/workload-identity-federation>`_,
+      set it to ``WORKLOAD_IDENTITY`` and also set 
``workload_identity_provider`` (see below). For Programmatic Access
       Token (PAT) authentication, no special authenticator is required — 
simply set the PAT token as
       the Password field. See `Snowflake PAT documentation 
<https://docs.snowflake.com/en/user-guide/programmatic-access-tokens>`_.
+    * ``workload_identity_provider``: The cloud whose workload identity is 
used as the Snowflake credential
+      when ``authenticator`` is ``WORKLOAD_IDENTITY``. One of ``AWS``, 
``AZURE``, ``GCP`` or ``OIDC``. With
+      Workload Identity Federation no long-lived secret (password, key-pair or 
PAT) is stored; the workload's
+      cloud identity is the credential. Requires 
``snowflake-connector-python>=3.17.0`` and the workload to
+      run on the matching cloud. ``AWS``, ``AZURE`` and ``GCP`` fetch the 
identity token from the cloud's
+      metadata service. ``OIDC`` instead requires the token to be supplied via 
``token`` or ``token_file_path``
+      (see below); see `custom OIDC configuration
+      
<https://docs.snowflake.com/en/user-guide/workload-identity-federation#label-wif-oidc-custom-configure-custom>`_.
+    * ``token``: The OIDC ID token (JWT) used when 
``workload_identity_provider`` is ``OIDC``. Prefer
+      ``token_file_path`` for tokens that rotate.
+    * ``token_file_path``: Path to a file holding the OIDC ID token used when 
``workload_identity_provider``
+      is ``OIDC``. The connector reads the token from this file, which suits 
projected or rotated tokens
+      (for example a Kubernetes service-account token).
     * ``token_endpoint``: Specify token endpoint for external OAuth provider.
     * ``grant_type``: Specify grant type for OAuth authentication. Currently 
supported: ``refresh_token`` (default), ``client_credentials``.
     * ``scope``: Specify OAuth scope to include in the access token request 
for any OAuth grant type.
@@ -138,3 +153,28 @@ set the PAT token as the password with no special 
authenticator required:
             "role": "role"
         }
     }'
+
+JSON format example with Workload Identity Federation (WIF)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To authenticate without a stored secret using
+`Workload Identity Federation 
<https://docs.snowflake.com/en/user-guide/workload-identity-federation>`_,
+set ``authenticator`` to ``WORKLOAD_IDENTITY`` and 
``workload_identity_provider`` to the cloud the
+workload runs on (here ``GCP``). No password, key-pair or token is stored; the 
workload's cloud identity
+is the credential. The Snowflake side needs a ``TYPE = SERVICE`` user that 
trusts the workload's identity
+and is granted a role with access to the target objects.
+
+.. code-block:: bash
+
+    export AIRFLOW_CONN_SNOWFLAKE_DEFAULT='{
+        "conn_type": "snowflake",
+        "login": "service-user",
+        "extra": {
+            "account": "account",
+            "database": "database",
+            "warehouse": "snow-warehouse",
+            "role": "role",
+            "authenticator": "WORKLOAD_IDENTITY",
+            "workload_identity_provider": "GCP"
+        }
+    }'
diff --git a/providers/snowflake/docs/index.rst 
b/providers/snowflake/docs/index.rst
index 0bd88ded522..ecf5950f93b 100644
--- a/providers/snowflake/docs/index.rst
+++ b/providers/snowflake/docs/index.rst
@@ -111,7 +111,7 @@ PIP package                                 Version required
 ``pyarrow``                                 ``>=16.1.0; python_version < 
"3.13"``
 ``pyarrow``                                 ``>=18.0.0; python_version >= 
"3.13" and python_version < "3.14"``
 ``pyarrow``                                 ``>=22.0.0; python_version >= 
"3.14"``
-``snowflake-connector-python``              ``>=3.16.0``
+``snowflake-connector-python``              ``>=3.17.0``
 ``snowflake-sqlalchemy``                    ``>=1.7.0``
 ``snowflake-snowpark-python``               ``>=1.17.0,<9999; python_version < 
"3.12"``
 ``snowflake-snowpark-python``               ``>=1.27.0,<9999; python_version 
>= "3.12" and python_version < "3.14"``
diff --git a/providers/snowflake/pyproject.toml 
b/providers/snowflake/pyproject.toml
index 2375cdf6c6f..e9ed033f8b8 100644
--- a/providers/snowflake/pyproject.toml
+++ b/providers/snowflake/pyproject.toml
@@ -69,7 +69,8 @@ dependencies = [
     "pyarrow>=18.0.0; python_version >= '3.13' and python_version < '3.14'",
     "pyarrow>=22.0.0; python_version >= '3.14'",
     # TODO(potiuk): We should bump the snowflake-connector-python to >=4.0.0 
when sqlalchemy>=2.0 is required
-    "snowflake-connector-python>=3.16.0",
+    # 3.17.0 is the first version with Workload Identity Federation 
(WORKLOAD_IDENTITY authenticator).
+    "snowflake-connector-python>=3.17.0",
     "snowflake-sqlalchemy>=1.7.0",
     # The "<9999" is a hint to the pip resolver to resolve this requirement 
early,
     # can be removed when the pip resolver is improved
diff --git 
a/providers/snowflake/src/airflow/providers/snowflake/hooks/snowflake.py 
b/providers/snowflake/src/airflow/providers/snowflake/hooks/snowflake.py
index b4a8a91e31d..e25b8605241 100644
--- a/providers/snowflake/src/airflow/providers/snowflake/hooks/snowflake.py
+++ b/providers/snowflake/src/airflow/providers/snowflake/hooks/snowflake.py
@@ -149,6 +149,9 @@ class SnowflakeHook(DbApiHook):
             "private_key_content": PasswordField(
                 lazy_gettext("Private key (Text)"), 
widget=BS3PasswordFieldWidget()
             ),
+            "workload_identity_provider": StringField(
+                lazy_gettext("Workload Identity Provider"), 
widget=BS3TextFieldWidget()
+            ),
             "insecure_mode": BooleanField(
                 label=lazy_gettext("Insecure mode"), description="Turns off 
OCSP certificate checks"
             ),
@@ -173,7 +176,7 @@ class SnowflakeHook(DbApiHook):
             "placeholders": {
                 "extra": json.dumps(
                     {
-                        "authenticator": "snowflake oauth",
+                        "authenticator": "snowflake oauth / WORKLOAD_IDENTITY",
                         "private_key_file": "private key",
                         "session_parameters": "session parameters",
                         "client_request_mfa_token": "client request mfa token",
@@ -199,6 +202,7 @@ class SnowflakeHook(DbApiHook):
                 "role": "snowflake role",
                 "private_key_file": "Path of snowflake private key (PEM 
Format)",
                 "private_key_content": "Content to snowflake private key (PEM 
format)",
+                "workload_identity_provider": "AWS, AZURE, GCP or OIDC",
                 "insecure_mode": "insecure mode",
                 "proxy_host": "Proxy server hostname",
                 "proxy_port": "Proxy server port",
@@ -380,6 +384,7 @@ class SnowflakeHook(DbApiHook):
         # authenticator and session_parameters never supported long name so we 
don't use _get_field
         authenticator = extra_dict.get("authenticator", "snowflake")
         session_parameters = extra_dict.get("session_parameters")
+        workload_identity_provider = self._get_field(extra_dict, 
"workload_identity_provider")
 
         conn_config = {
             "user": conn.login,
@@ -407,6 +412,22 @@ class SnowflakeHook(DbApiHook):
         if client_store_temporary_credential:
             conn_config["client_store_temporary_credential"] = 
client_store_temporary_credential
 
+        # Workload Identity Federation (keyless auth): when the connection sets
+        # ``authenticator=WORKLOAD_IDENTITY``, the connector also needs to 
know which
+        # cloud the workload runs on. One value (AWS, AZURE, GCP or OIDC) 
covers all
+        # providers. See 
https://docs.snowflake.com/en/user-guide/workload-identity-federation.
+        if workload_identity_provider:
+            conn_config["workload_identity_provider"] = 
workload_identity_provider
+            # AWS, AZURE and GCP fetch the identity token from the cloud's 
metadata
+            # service. OIDC instead requires the caller to supply the token, 
either
+            # inline (``token``) or from a file (``token_file_path``).
+            token = self._get_field(extra_dict, "token")
+            token_file_path = self._get_field(extra_dict, "token_file_path")
+            if token:
+                conn_config["token"] = token
+            if token_file_path:
+                conn_config["token_file_path"] = token_file_path
+
         p_key = self.get_private_key()
 
         if p_key:
diff --git a/providers/snowflake/tests/unit/snowflake/hooks/test_snowflake.py 
b/providers/snowflake/tests/unit/snowflake/hooks/test_snowflake.py
index 111a79d7021..5b96484933e 100644
--- a/providers/snowflake/tests/unit/snowflake/hooks/test_snowflake.py
+++ b/providers/snowflake/tests/unit/snowflake/hooks/test_snowflake.py
@@ -1821,3 +1821,98 @@ class TestPytestSnowflakeHook:
         invalid_form = form_cls(MultiDict([("proxy_port", "not-an-int")]))
         assert invalid_form.validate() is False
         assert "proxy_port" in invalid_form.errors
+
+    @pytest.mark.parametrize("provider", ["AWS", "AZURE", "GCP", "OIDC"])
+    def test_get_conn_params_forwards_workload_identity_provider(self, 
provider):
+        """When authenticator is WORKLOAD_IDENTITY, workload_identity_provider 
must reach the connector.
+
+        The connector raises ``251017: workload_identity_provider must be set 
...`` if the param is
+        dropped, so the hook has to forward it for keyless Workload Identity 
Federation to work.
+        """
+        connection_kwargs = deepcopy(BASE_CONNECTION_KWARGS)
+        connection_kwargs["extra"]["authenticator"] = "WORKLOAD_IDENTITY"
+        connection_kwargs["extra"]["workload_identity_provider"] = provider
+
+        with mock.patch.dict("os.environ", 
AIRFLOW_CONN_TEST_CONN=Connection(**connection_kwargs).get_uri()):
+            conn_params = 
SnowflakeHook(snowflake_conn_id="test_conn")._get_conn_params()
+
+        assert conn_params["authenticator"] == "WORKLOAD_IDENTITY"
+        assert conn_params["workload_identity_provider"] == provider
+
+    def test_get_conn_params_omits_workload_identity_provider_when_unset(self):
+        """workload_identity_provider must not appear in conn params unless 
configured."""
+        with mock.patch.dict(
+            "os.environ", 
AIRFLOW_CONN_TEST_CONN=Connection(**BASE_CONNECTION_KWARGS).get_uri()
+        ):
+            conn_params = 
SnowflakeHook(snowflake_conn_id="test_conn")._get_conn_params()
+
+        assert "workload_identity_provider" not in conn_params
+
+    def 
test_get_conn_params_workload_identity_provider_backcompat_prefix(self):
+        """The backcompat ``extra__snowflake__`` prefix is honored for 
workload_identity_provider."""
+        connection_kwargs = deepcopy(BASE_CONNECTION_KWARGS)
+        connection_kwargs["extra"]["authenticator"] = "WORKLOAD_IDENTITY"
+        
connection_kwargs["extra"]["extra__snowflake__workload_identity_provider"] = 
"GCP"
+
+        with mock.patch.dict("os.environ", 
AIRFLOW_CONN_TEST_CONN=Connection(**connection_kwargs).get_uri()):
+            conn_params = 
SnowflakeHook(snowflake_conn_id="test_conn")._get_conn_params()
+
+        assert conn_params["workload_identity_provider"] == "GCP"
+
+    def test_get_conn_passes_workload_identity_provider_to_connect(self):
+        """The forwarded param has to land in the actual 
``snowflake.connector.connect()`` call."""
+        connection_kwargs = deepcopy(BASE_CONNECTION_KWARGS)
+        connection_kwargs["extra"]["authenticator"] = "WORKLOAD_IDENTITY"
+        connection_kwargs["extra"]["workload_identity_provider"] = "GCP"
+
+        with (
+            mock.patch.dict("os.environ", 
AIRFLOW_CONN_TEST_CONN=Connection(**connection_kwargs).get_uri()),
+            mock.patch("snowflake.connector.connect") as mock_connect,
+        ):
+            SnowflakeHook(snowflake_conn_id="test_conn").get_conn()
+
+        call_kwargs = mock_connect.call_args[1]
+        assert call_kwargs["authenticator"] == "WORKLOAD_IDENTITY"
+        assert call_kwargs["workload_identity_provider"] == "GCP"
+
+    def 
test_get_connection_form_widgets_exposes_workload_identity_provider(self):
+        """The connection form must expose a workload_identity_provider field 
so users can set it in the UI."""
+        pytest.importorskip("flask_appbuilder")
+        pytest.importorskip("flask_babel")
+
+        widgets = SnowflakeHook.get_connection_form_widgets()
+
+        assert "workload_identity_provider" in widgets
+
+    @pytest.mark.parametrize(
+        ("field", "value"),
+        [("token", "an-oidc-jwt"), ("token_file_path", 
"/var/run/secrets/oidc/token")],
+    )
+    def test_get_conn_params_forwards_oidc_token(self, field, value):
+        """OIDC WIF needs a caller-supplied token; the connector raises if it 
is missing.
+
+        Unlike AWS/AZURE/GCP (which fetch the token from cloud metadata), OIDC 
requires
+        ``token`` or ``token_file_path`` to be forwarded.
+        """
+        connection_kwargs = deepcopy(BASE_CONNECTION_KWARGS)
+        connection_kwargs["extra"]["authenticator"] = "WORKLOAD_IDENTITY"
+        connection_kwargs["extra"]["workload_identity_provider"] = "OIDC"
+        connection_kwargs["extra"][field] = value
+
+        with mock.patch.dict("os.environ", 
AIRFLOW_CONN_TEST_CONN=Connection(**connection_kwargs).get_uri()):
+            conn_params = 
SnowflakeHook(snowflake_conn_id="test_conn")._get_conn_params()
+
+        assert conn_params["workload_identity_provider"] == "OIDC"
+        assert conn_params[field] == value
+
+    def test_get_conn_params_omits_oidc_token_when_unset(self):
+        """token/token_file_path must not appear unless explicitly 
configured."""
+        connection_kwargs = deepcopy(BASE_CONNECTION_KWARGS)
+        connection_kwargs["extra"]["authenticator"] = "WORKLOAD_IDENTITY"
+        connection_kwargs["extra"]["workload_identity_provider"] = "GCP"
+
+        with mock.patch.dict("os.environ", 
AIRFLOW_CONN_TEST_CONN=Connection(**connection_kwargs).get_uri()):
+            conn_params = 
SnowflakeHook(snowflake_conn_id="test_conn")._get_conn_params()
+
+        assert "token" not in conn_params
+        assert "token_file_path" not in conn_params
diff --git a/uv.lock b/uv.lock
index c3402fbff19..79698138589 100644
--- a/uv.lock
+++ b/uv.lock
@@ -7621,7 +7621,7 @@ requires-dist = [
     { name = "pyarrow", marker = "python_full_version == '3.13.*'", specifier 
= ">=18.0.0" },
     { name = "pyarrow", marker = "python_full_version >= '3.14'", specifier = 
">=22.0.0" },
     { name = "setuptools", specifier = ">=80.0.0,<9999" },
-    { name = "snowflake-connector-python", specifier = ">=3.16.0" },
+    { name = "snowflake-connector-python", specifier = ">=3.17.0" },
     { name = "snowflake-snowpark-python", marker = "python_full_version < 
'3.12'", specifier = ">=1.17.0,<9999" },
     { name = "snowflake-snowpark-python", marker = "python_full_version >= 
'3.12' and python_full_version < '3.14'", specifier = ">=1.27.0,<9999" },
     { name = "snowflake-sqlalchemy", specifier = ">=1.7.0" },

Reply via email to