This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 6ec60283b38 Add SSH auth parameters to GitHook for bastion hosts, 
custom ports, and passphrases (#63942)
6ec60283b38 is described below

commit 6ec60283b38fea86563cf63cbd107658e331439a
Author: Jarek Potiuk <[email protected]>
AuthorDate: Fri Mar 20 09:12:57 2026 +0100

    Add SSH auth parameters to GitHook for bastion hosts, custom ports, and 
passphrases (#63942)
    
    Adds support for all common SSH authentication mechanisms in GitHook:
    - private_key_passphrase: unlock encrypted SSH keys via SSH_ASKPASS
    - known_hosts_file: custom known-hosts path
    - ssh_config_file: custom SSH config file
    - host_proxy_cmd: ProxyCommand for bastion/jump hosts
    - ssh_port: non-default SSH port
    
    Also sends UserKnownHostsFile=/dev/null when strict checking is off
    to suppress known_hosts warnings in automated environments.
---
 providers/git/docs/connections/git.rst             |  34 ++++-
 .../git/src/airflow/providers/git/hooks/git.py     | 113 +++++++++++++++--
 providers/git/tests/unit/git/hooks/test_git.py     | 139 ++++++++++++++++++++-
 3 files changed, 269 insertions(+), 17 deletions(-)

diff --git a/providers/git/docs/connections/git.rst 
b/providers/git/docs/connections/git.rst
index 3ea9c8e2749..3f6bc8dc3a8 100644
--- a/providers/git/docs/connections/git.rst
+++ b/providers/git/docs/connections/git.rst
@@ -55,13 +55,28 @@ Access Token (optional)
 Extra (optional)
     Specify the extra parameters as a JSON dictionary. The following keys are 
supported:
 
+    **SSH key authentication:**
+
     * ``key_file``: Path to an SSH private key file to use for authentication.
     * ``private_key``: An inline SSH private key string. When provided, the 
hook writes it
       to a temporary file and uses it for the SSH connection.
+      Mutually exclusive with ``key_file``.
+    * ``private_key_passphrase``: Passphrase for the private key (works with 
both
+      ``key_file`` and ``private_key``). Uses ``SSH_ASKPASS`` to provide the 
passphrase
+      non-interactively.
+
+    **SSH connection options:**
+
     * ``strict_host_key_checking``: Controls SSH strict host key checking. 
Defaults to ``no``.
       Set to ``yes`` to enable strict checking.
+    * ``known_hosts_file``: Path to a custom SSH known-hosts file. When
+      ``strict_host_key_checking`` is ``no`` and this is not set, 
``/dev/null`` is used.
+    * ``ssh_config_file``: Path to a custom SSH config file (passed as ``ssh 
-F``).
+    * ``host_proxy_cmd``: SSH ProxyCommand string for connecting through a 
bastion or
+      jump host (e.g. ``ssh -W %h:%p bastion.example.com``).
+    * ``ssh_port``: Non-default SSH port number.
 
-    Example:
+    Example with key file:
 
     .. code-block:: json
 
@@ -70,10 +85,23 @@ Extra (optional)
             "strict_host_key_checking": "no"
         }
 
-    Or with an inline private key:
+    Example with inline private key and passphrase:
+
+    .. code-block:: json
+
+        {
+            "private_key": "<content of your PEM-encoded private key>",
+            "private_key_passphrase": "my-passphrase"
+        }
+
+    Example with bastion host and custom port:
 
     .. code-block:: json
 
         {
-            "private_key": "<content of your PEM-encoded private key>"
+            "key_file": "/path/to/id_rsa",
+            "host_proxy_cmd": "ssh -W %h:%p bastion.example.com",
+            "ssh_port": "2222",
+            "strict_host_key_checking": "yes",
+            "known_hosts_file": "/path/to/known_hosts"
         }
diff --git a/providers/git/src/airflow/providers/git/hooks/git.py 
b/providers/git/src/airflow/providers/git/hooks/git.py
index 416aff16003..015fd7d7c30 100644
--- a/providers/git/src/airflow/providers/git/hooks/git.py
+++ b/providers/git/src/airflow/providers/git/hooks/git.py
@@ -21,6 +21,7 @@ import contextlib
 import json
 import logging
 import os
+import stat
 import tempfile
 from typing import Any
 
@@ -34,7 +35,18 @@ class GitHook(BaseHook):
     Hook for git repositories.
 
     :param git_conn_id: Connection ID for SSH connection to the repository
-
+    :param repo_url: Explicit Git repository URL to override the connection's 
host.
+
+    Connection extra fields:
+
+    * ``key_file`` — path to an SSH private key file.
+    * ``private_key`` — inline SSH private key string (mutually exclusive with 
``key_file``).
+    * ``private_key_passphrase`` — passphrase for the private key (key_file or 
inline).
+    * ``strict_host_key_checking`` — ``"yes"`` or ``"no"`` (default ``"no"``).
+    * ``known_hosts_file`` — path to a custom SSH known-hosts file.
+    * ``ssh_config_file`` — path to a custom SSH config file.
+    * ``host_proxy_cmd`` — SSH ProxyCommand string (e.g. for bastion/jump 
hosts).
+    * ``ssh_port`` — non-default SSH port.
     """
 
     conn_name_attr = "git_conn_id"
@@ -56,6 +68,12 @@ class GitHook(BaseHook):
                     {
                         "key_file": "optional/path/to/keyfile",
                         "private_key": "optional inline private key",
+                        "private_key_passphrase": "",
+                        "strict_host_key_checking": "no",
+                        "known_hosts_file": "",
+                        "ssh_config_file": "",
+                        "host_proxy_cmd": "",
+                        "ssh_port": "",
                     }
                 )
             },
@@ -66,24 +84,54 @@ class GitHook(BaseHook):
     ) -> None:
         super().__init__()
         connection = self.get_connection(git_conn_id)
+        extra = connection.extra_dejson
+
         self.repo_url = repo_url or connection.host
         self.user_name = connection.login or "user"
         self.auth_token = connection.password
-        self.private_key = connection.extra_dejson.get("private_key")
-        self.key_file = connection.extra_dejson.get("key_file")
-        self.strict_host_key_checking = 
connection.extra_dejson.get("strict_host_key_checking", "no")
+
+        # SSH key authentication
+        self.private_key = extra.get("private_key")
+        self.key_file = extra.get("key_file")
+        self.private_key_passphrase = extra.get("private_key_passphrase")
+
+        # SSH connection options
+        self.strict_host_key_checking = extra.get("strict_host_key_checking", 
"no")
+        self.known_hosts_file = extra.get("known_hosts_file")
+        self.ssh_config_file = extra.get("ssh_config_file")
+        self.host_proxy_cmd = extra.get("host_proxy_cmd")
+        self.ssh_port: int | None = int(extra["ssh_port"]) if 
extra.get("ssh_port") else None
+
         self.env: dict[str, str] = {}
 
         if self.key_file and self.private_key:
             raise AirflowException("Both 'key_file' and 'private_key' cannot 
be provided at the same time")
         self._process_git_auth_url()
 
-    def _build_ssh_command(self, key_path: str) -> str:
-        return (
-            f"ssh -i {key_path} "
-            f"-o IdentitiesOnly=yes "
-            f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
-        )
+    def _build_ssh_command(self, key_path: str | None = None) -> str:
+        parts = ["ssh"]
+
+        if key_path:
+            parts.append(f"-i {key_path}")
+            parts.append("-o IdentitiesOnly=yes")
+
+        parts.append(f"-o 
StrictHostKeyChecking={self.strict_host_key_checking}")
+
+        if self.known_hosts_file:
+            parts.append(f"-o UserKnownHostsFile={self.known_hosts_file}")
+        elif self.strict_host_key_checking == "no":
+            parts.append("-o UserKnownHostsFile=/dev/null")
+
+        if self.ssh_config_file:
+            parts.append(f"-F {self.ssh_config_file}")
+
+        if self.host_proxy_cmd:
+            parts.append(f'-o ProxyCommand="{self.host_proxy_cmd}"')
+
+        if self.ssh_port:
+            parts.append(f"-p {self.ssh_port}")
+
+        return " ".join(parts)
 
     def _process_git_auth_url(self):
         if not isinstance(self.repo_url, str):
@@ -98,9 +146,44 @@ class GitHook(BaseHook):
         elif not self.repo_url.startswith("git@") or not 
self.repo_url.startswith("https://";):
             self.repo_url = os.path.expanduser(self.repo_url)
 
-    def set_git_env(self, key: str) -> None:
+    def set_git_env(self, key: str | None = None) -> None:
         self.env["GIT_SSH_COMMAND"] = self._build_ssh_command(key)
 
+    @contextlib.contextmanager
+    def _passphrase_askpass_env(self):
+        """Set up SSH_ASKPASS so ssh can unlock passphrase-protected keys 
non-interactively."""
+        if not self.private_key_passphrase:
+            yield
+            return
+
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".sh", delete=True) 
as askpass_script:
+            askpass_script.write(f"#!/bin/sh\necho 
'{self.private_key_passphrase}'\n")
+            askpass_script.flush()
+            os.chmod(askpass_script.name, stat.S_IRWXU)
+
+            old_askpass = os.environ.get("SSH_ASKPASS")
+            old_display = os.environ.get("DISPLAY")
+            old_askpass_require = os.environ.get("SSH_ASKPASS_REQUIRE")
+            try:
+                os.environ["SSH_ASKPASS"] = askpass_script.name
+                os.environ["SSH_ASKPASS_REQUIRE"] = "force"
+                # DISPLAY must be set for SSH_ASKPASS to be used
+                os.environ.setdefault("DISPLAY", ":")
+                self.env["SSH_ASKPASS"] = askpass_script.name
+                self.env["SSH_ASKPASS_REQUIRE"] = "force"
+                self.env.setdefault("DISPLAY", os.environ["DISPLAY"])
+                yield
+            finally:
+                for var, old_val in [
+                    ("SSH_ASKPASS", old_askpass),
+                    ("DISPLAY", old_display),
+                    ("SSH_ASKPASS_REQUIRE", old_askpass_require),
+                ]:
+                    if old_val is None:
+                        os.environ.pop(var, None)
+                    else:
+                        os.environ[var] = old_val
+
     @contextlib.contextmanager
     def configure_hook_env(self):
         if self.private_key:
@@ -109,7 +192,15 @@ class GitHook(BaseHook):
                 tmp_keyfile.flush()
                 os.chmod(tmp_keyfile.name, 0o600)
                 self.set_git_env(tmp_keyfile.name)
+                with self._passphrase_askpass_env():
+                    yield
+        elif self.key_file:
+            self.set_git_env(self.key_file)
+            with self._passphrase_askpass_env():
                 yield
+        elif self.host_proxy_cmd or self.ssh_port or self.ssh_config_file or 
self.known_hosts_file:
+            self.set_git_env()
+            yield
         else:
             self.set_git_env(self.key_file)
             yield
diff --git a/providers/git/tests/unit/git/hooks/test_git.py 
b/providers/git/tests/unit/git/hooks/test_git.py
index 60e35e3bddd..f814dec6d37 100644
--- a/providers/git/tests/unit/git/hooks/test_git.py
+++ b/providers/git/tests/unit/git/hooks/test_git.py
@@ -154,7 +154,7 @@ class TestGitHook:
         default_hook = GitHook(git_conn_id=CONN_DEFAULT)
         with default_hook.configure_hook_env():
             assert default_hook.env == {
-                "GIT_SSH_COMMAND": "ssh -i /files/pkey.pem -o 
IdentitiesOnly=yes -o StrictHostKeyChecking=no"
+                "GIT_SSH_COMMAND": "ssh -i /files/pkey.pem -o 
IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
             }
         create_connection_without_db(
             Connection(
@@ -195,7 +195,7 @@ class TestGitHook:
         assert hasattr(hook, "env")
         with hook.configure_hook_env():
             assert hook.env == {
-                "GIT_SSH_COMMAND": "ssh -i /files/pkey.pem -o 
IdentitiesOnly=yes -o StrictHostKeyChecking=no"
+                "GIT_SSH_COMMAND": "ssh -i /files/pkey.pem -o 
IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
             }
 
     def test_private_key_lazy_env_var(self):
@@ -204,7 +204,7 @@ class TestGitHook:
 
         hook.set_git_env("dummy_inline_key")
         assert hook.env == {
-            "GIT_SSH_COMMAND": "ssh -i dummy_inline_key -o IdentitiesOnly=yes 
-o StrictHostKeyChecking=no"
+            "GIT_SSH_COMMAND": "ssh -i dummy_inline_key -o IdentitiesOnly=yes 
-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
         }
 
     def test_configure_hook_env(self):
@@ -219,3 +219,136 @@ class TestGitHook:
             assert os.path.exists(temp_key_path)
 
         assert not os.path.exists(temp_key_path)
+
+    def test_ssh_port(self, create_connection_without_db):
+        create_connection_without_db(
+            Connection(
+                conn_id="git_with_port",
+                host=AIRFLOW_GIT,
+                conn_type="git",
+                extra={"key_file": "/files/pkey.pem", "ssh_port": "2222"},
+            )
+        )
+        hook = GitHook(git_conn_id="git_with_port")
+        with hook.configure_hook_env():
+            cmd = hook.env["GIT_SSH_COMMAND"]
+            assert "-p 2222" in cmd
+
+    def test_proxy_command(self, create_connection_without_db):
+        create_connection_without_db(
+            Connection(
+                conn_id="git_with_proxy",
+                host=AIRFLOW_GIT,
+                conn_type="git",
+                extra={
+                    "key_file": "/files/pkey.pem",
+                    "host_proxy_cmd": "ssh -W %h:%p bastion.example.com",
+                },
+            )
+        )
+        hook = GitHook(git_conn_id="git_with_proxy")
+        with hook.configure_hook_env():
+            cmd = hook.env["GIT_SSH_COMMAND"]
+            assert 'ProxyCommand="ssh -W %h:%p bastion.example.com"' in cmd
+
+    def test_known_hosts_file(self, create_connection_without_db):
+        create_connection_without_db(
+            Connection(
+                conn_id="git_known_hosts",
+                host=AIRFLOW_GIT,
+                conn_type="git",
+                extra={
+                    "key_file": "/files/pkey.pem",
+                    "strict_host_key_checking": "yes",
+                    "known_hosts_file": "/etc/ssh/known_hosts",
+                },
+            )
+        )
+        hook = GitHook(git_conn_id="git_known_hosts")
+        with hook.configure_hook_env():
+            cmd = hook.env["GIT_SSH_COMMAND"]
+            assert "-o StrictHostKeyChecking=yes" in cmd
+            assert "-o UserKnownHostsFile=/etc/ssh/known_hosts" in cmd
+            assert "/dev/null" not in cmd
+
+    def test_ssh_config_file(self, create_connection_without_db):
+        create_connection_without_db(
+            Connection(
+                conn_id="git_ssh_config",
+                host=AIRFLOW_GIT,
+                conn_type="git",
+                extra={
+                    "key_file": "/files/pkey.pem",
+                    "ssh_config_file": "/home/user/.ssh/config",
+                },
+            )
+        )
+        hook = GitHook(git_conn_id="git_ssh_config")
+        with hook.configure_hook_env():
+            cmd = hook.env["GIT_SSH_COMMAND"]
+            assert "-F /home/user/.ssh/config" in cmd
+
+    def test_no_key_with_ssh_options_sets_env(self, 
create_connection_without_db):
+        """SSH options without a key still produce GIT_SSH_COMMAND."""
+        create_connection_without_db(
+            Connection(
+                conn_id="git_proxy_only",
+                host=AIRFLOW_GIT,
+                conn_type="git",
+                extra={"host_proxy_cmd": "ssh -W %h:%p bastion"},
+            )
+        )
+        hook = GitHook(git_conn_id="git_proxy_only")
+        assert hook.env == {}
+        with hook.configure_hook_env():
+            cmd = hook.env["GIT_SSH_COMMAND"]
+            assert cmd.startswith("ssh ")
+            assert "-i " not in cmd
+            assert "ProxyCommand" in cmd
+
+    def test_default_user_known_hosts_devnull_when_no_strict_checking(self):
+        """When strict_host_key_checking=no and no known_hosts_file, /dev/null 
is used."""
+        hook = GitHook(git_conn_id=CONN_DEFAULT)
+        with hook.configure_hook_env():
+            cmd = hook.env["GIT_SSH_COMMAND"]
+            assert "-o UserKnownHostsFile=/dev/null" in cmd
+
+    def test_passphrase_sets_askpass_env(self, create_connection_without_db):
+        create_connection_without_db(
+            Connection(
+                conn_id="git_passphrase",
+                host=AIRFLOW_GIT,
+                conn_type="git",
+                extra={
+                    "key_file": "/files/pkey.pem",
+                    "private_key_passphrase": "my_secret",
+                },
+            )
+        )
+        hook = GitHook(git_conn_id="git_passphrase")
+        with hook.configure_hook_env():
+            assert "SSH_ASKPASS" in hook.env
+            assert hook.env["SSH_ASKPASS_REQUIRE"] == "force"
+            askpass_path = hook.env["SSH_ASKPASS"]
+            assert os.path.exists(askpass_path)
+
+    def test_passphrase_askpass_cleaned_up(self, create_connection_without_db):
+        create_connection_without_db(
+            Connection(
+                conn_id="git_passphrase_cleanup",
+                host=AIRFLOW_GIT,
+                conn_type="git",
+                extra={
+                    "private_key": "inline_key",
+                    "private_key_passphrase": "my_secret",
+                },
+            )
+        )
+        hook = GitHook(git_conn_id="git_passphrase_cleanup")
+        askpass_path = None
+        with hook.configure_hook_env():
+            askpass_path = hook.env.get("SSH_ASKPASS")
+            assert askpass_path is not None
+            assert os.path.exists(askpass_path)
+        # Both the askpass script and the temp key file should be cleaned up
+        assert not os.path.exists(askpass_path)

Reply via email to