This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 6ec60283b38 Add SSH auth parameters to GitHook for bastion hosts,
custom ports, and passphrases (#63942)
6ec60283b38 is described below
commit 6ec60283b38fea86563cf63cbd107658e331439a
Author: Jarek Potiuk <[email protected]>
AuthorDate: Fri Mar 20 09:12:57 2026 +0100
Add SSH auth parameters to GitHook for bastion hosts, custom ports, and
passphrases (#63942)
Adds support for all common SSH authentication mechanisms in GitHook:
- private_key_passphrase: unlock encrypted SSH keys via SSH_ASKPASS
- known_hosts_file: custom known-hosts path
- ssh_config_file: custom SSH config file
- host_proxy_cmd: ProxyCommand for bastion/jump hosts
- ssh_port: non-default SSH port
Also sends UserKnownHostsFile=/dev/null when strict checking is off
to suppress known_hosts warnings in automated environments.
---
providers/git/docs/connections/git.rst | 34 ++++-
.../git/src/airflow/providers/git/hooks/git.py | 113 +++++++++++++++--
providers/git/tests/unit/git/hooks/test_git.py | 139 ++++++++++++++++++++-
3 files changed, 269 insertions(+), 17 deletions(-)
diff --git a/providers/git/docs/connections/git.rst
b/providers/git/docs/connections/git.rst
index 3ea9c8e2749..3f6bc8dc3a8 100644
--- a/providers/git/docs/connections/git.rst
+++ b/providers/git/docs/connections/git.rst
@@ -55,13 +55,28 @@ Access Token (optional)
Extra (optional)
Specify the extra parameters as a JSON dictionary. The following keys are
supported:
+ **SSH key authentication:**
+
* ``key_file``: Path to an SSH private key file to use for authentication.
* ``private_key``: An inline SSH private key string. When provided, the
hook writes it
to a temporary file and uses it for the SSH connection.
+ Mutually exclusive with ``key_file``.
+ * ``private_key_passphrase``: Passphrase for the private key (works with
both
+ ``key_file`` and ``private_key``). Uses ``SSH_ASKPASS`` to provide the
passphrase
+ non-interactively.
+
+ **SSH connection options:**
+
* ``strict_host_key_checking``: Controls SSH strict host key checking.
Defaults to ``no``.
Set to ``yes`` to enable strict checking.
+ * ``known_hosts_file``: Path to a custom SSH known-hosts file. When
+ ``strict_host_key_checking`` is ``no`` and this is not set,
``/dev/null`` is used.
+ * ``ssh_config_file``: Path to a custom SSH config file (passed as ``ssh
-F``).
+ * ``host_proxy_cmd``: SSH ProxyCommand string for connecting through a
bastion or
+ jump host (e.g. ``ssh -W %h:%p bastion.example.com``).
+ * ``ssh_port``: Non-default SSH port number.
- Example:
+ Example with key file:
.. code-block:: json
@@ -70,10 +85,23 @@ Extra (optional)
"strict_host_key_checking": "no"
}
- Or with an inline private key:
+ Example with inline private key and passphrase:
+
+ .. code-block:: json
+
+ {
+ "private_key": "<content of your PEM-encoded private key>",
+ "private_key_passphrase": "my-passphrase"
+ }
+
+ Example with bastion host and custom port:
.. code-block:: json
{
- "private_key": "<content of your PEM-encoded private key>"
+ "key_file": "/path/to/id_rsa",
+ "host_proxy_cmd": "ssh -W %h:%p bastion.example.com",
+ "ssh_port": "2222",
+ "strict_host_key_checking": "yes",
+ "known_hosts_file": "/path/to/known_hosts"
}
diff --git a/providers/git/src/airflow/providers/git/hooks/git.py
b/providers/git/src/airflow/providers/git/hooks/git.py
index 416aff16003..015fd7d7c30 100644
--- a/providers/git/src/airflow/providers/git/hooks/git.py
+++ b/providers/git/src/airflow/providers/git/hooks/git.py
@@ -21,6 +21,7 @@ import contextlib
import json
import logging
import os
+import stat
import tempfile
from typing import Any
@@ -34,7 +35,18 @@ class GitHook(BaseHook):
Hook for git repositories.
:param git_conn_id: Connection ID for SSH connection to the repository
-
+ :param repo_url: Explicit Git repository URL to override the connection's
host.
+
+ Connection extra fields:
+
+ * ``key_file`` — path to an SSH private key file.
+ * ``private_key`` — inline SSH private key string (mutually exclusive with
``key_file``).
+ * ``private_key_passphrase`` — passphrase for the private key (key_file or
inline).
+ * ``strict_host_key_checking`` — ``"yes"`` or ``"no"`` (default ``"no"``).
+ * ``known_hosts_file`` — path to a custom SSH known-hosts file.
+ * ``ssh_config_file`` — path to a custom SSH config file.
+ * ``host_proxy_cmd`` — SSH ProxyCommand string (e.g. for bastion/jump
hosts).
+ * ``ssh_port`` — non-default SSH port.
"""
conn_name_attr = "git_conn_id"
@@ -56,6 +68,12 @@ class GitHook(BaseHook):
{
"key_file": "optional/path/to/keyfile",
"private_key": "optional inline private key",
+ "private_key_passphrase": "",
+ "strict_host_key_checking": "no",
+ "known_hosts_file": "",
+ "ssh_config_file": "",
+ "host_proxy_cmd": "",
+ "ssh_port": "",
}
)
},
@@ -66,24 +84,54 @@ class GitHook(BaseHook):
) -> None:
super().__init__()
connection = self.get_connection(git_conn_id)
+ extra = connection.extra_dejson
+
self.repo_url = repo_url or connection.host
self.user_name = connection.login or "user"
self.auth_token = connection.password
- self.private_key = connection.extra_dejson.get("private_key")
- self.key_file = connection.extra_dejson.get("key_file")
- self.strict_host_key_checking =
connection.extra_dejson.get("strict_host_key_checking", "no")
+
+ # SSH key authentication
+ self.private_key = extra.get("private_key")
+ self.key_file = extra.get("key_file")
+ self.private_key_passphrase = extra.get("private_key_passphrase")
+
+ # SSH connection options
+ self.strict_host_key_checking = extra.get("strict_host_key_checking",
"no")
+ self.known_hosts_file = extra.get("known_hosts_file")
+ self.ssh_config_file = extra.get("ssh_config_file")
+ self.host_proxy_cmd = extra.get("host_proxy_cmd")
+ self.ssh_port: int | None = int(extra["ssh_port"]) if
extra.get("ssh_port") else None
+
self.env: dict[str, str] = {}
if self.key_file and self.private_key:
raise AirflowException("Both 'key_file' and 'private_key' cannot
be provided at the same time")
self._process_git_auth_url()
- def _build_ssh_command(self, key_path: str) -> str:
- return (
- f"ssh -i {key_path} "
- f"-o IdentitiesOnly=yes "
- f"-o StrictHostKeyChecking={self.strict_host_key_checking}"
- )
+ def _build_ssh_command(self, key_path: str | None = None) -> str:
+ parts = ["ssh"]
+
+ if key_path:
+ parts.append(f"-i {key_path}")
+ parts.append("-o IdentitiesOnly=yes")
+
+ parts.append(f"-o
StrictHostKeyChecking={self.strict_host_key_checking}")
+
+ if self.known_hosts_file:
+ parts.append(f"-o UserKnownHostsFile={self.known_hosts_file}")
+ elif self.strict_host_key_checking == "no":
+ parts.append("-o UserKnownHostsFile=/dev/null")
+
+ if self.ssh_config_file:
+ parts.append(f"-F {self.ssh_config_file}")
+
+ if self.host_proxy_cmd:
+ parts.append(f'-o ProxyCommand="{self.host_proxy_cmd}"')
+
+ if self.ssh_port:
+ parts.append(f"-p {self.ssh_port}")
+
+ return " ".join(parts)
def _process_git_auth_url(self):
if not isinstance(self.repo_url, str):
@@ -98,9 +146,44 @@ class GitHook(BaseHook):
elif not self.repo_url.startswith("git@") or not
self.repo_url.startswith("https://"):
self.repo_url = os.path.expanduser(self.repo_url)
- def set_git_env(self, key: str) -> None:
+ def set_git_env(self, key: str | None = None) -> None:
self.env["GIT_SSH_COMMAND"] = self._build_ssh_command(key)
+ @contextlib.contextmanager
+ def _passphrase_askpass_env(self):
+ """Set up SSH_ASKPASS so ssh can unlock passphrase-protected keys
non-interactively."""
+ if not self.private_key_passphrase:
+ yield
+ return
+
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".sh", delete=True)
as askpass_script:
+ askpass_script.write(f"#!/bin/sh\necho
'{self.private_key_passphrase}'\n")
+ askpass_script.flush()
+ os.chmod(askpass_script.name, stat.S_IRWXU)
+
+ old_askpass = os.environ.get("SSH_ASKPASS")
+ old_display = os.environ.get("DISPLAY")
+ old_askpass_require = os.environ.get("SSH_ASKPASS_REQUIRE")
+ try:
+ os.environ["SSH_ASKPASS"] = askpass_script.name
+ os.environ["SSH_ASKPASS_REQUIRE"] = "force"
+ # DISPLAY must be set for SSH_ASKPASS to be used
+ os.environ.setdefault("DISPLAY", ":")
+ self.env["SSH_ASKPASS"] = askpass_script.name
+ self.env["SSH_ASKPASS_REQUIRE"] = "force"
+ self.env.setdefault("DISPLAY", os.environ["DISPLAY"])
+ yield
+ finally:
+ for var, old_val in [
+ ("SSH_ASKPASS", old_askpass),
+ ("DISPLAY", old_display),
+ ("SSH_ASKPASS_REQUIRE", old_askpass_require),
+ ]:
+ if old_val is None:
+ os.environ.pop(var, None)
+ else:
+ os.environ[var] = old_val
+
@contextlib.contextmanager
def configure_hook_env(self):
if self.private_key:
@@ -109,7 +192,15 @@ class GitHook(BaseHook):
tmp_keyfile.flush()
os.chmod(tmp_keyfile.name, 0o600)
self.set_git_env(tmp_keyfile.name)
+ with self._passphrase_askpass_env():
+ yield
+ elif self.key_file:
+ self.set_git_env(self.key_file)
+ with self._passphrase_askpass_env():
yield
+ elif self.host_proxy_cmd or self.ssh_port or self.ssh_config_file or
self.known_hosts_file:
+ self.set_git_env()
+ yield
else:
self.set_git_env(self.key_file)
yield
diff --git a/providers/git/tests/unit/git/hooks/test_git.py
b/providers/git/tests/unit/git/hooks/test_git.py
index 60e35e3bddd..f814dec6d37 100644
--- a/providers/git/tests/unit/git/hooks/test_git.py
+++ b/providers/git/tests/unit/git/hooks/test_git.py
@@ -154,7 +154,7 @@ class TestGitHook:
default_hook = GitHook(git_conn_id=CONN_DEFAULT)
with default_hook.configure_hook_env():
assert default_hook.env == {
- "GIT_SSH_COMMAND": "ssh -i /files/pkey.pem -o
IdentitiesOnly=yes -o StrictHostKeyChecking=no"
+ "GIT_SSH_COMMAND": "ssh -i /files/pkey.pem -o
IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
}
create_connection_without_db(
Connection(
@@ -195,7 +195,7 @@ class TestGitHook:
assert hasattr(hook, "env")
with hook.configure_hook_env():
assert hook.env == {
- "GIT_SSH_COMMAND": "ssh -i /files/pkey.pem -o
IdentitiesOnly=yes -o StrictHostKeyChecking=no"
+ "GIT_SSH_COMMAND": "ssh -i /files/pkey.pem -o
IdentitiesOnly=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
}
def test_private_key_lazy_env_var(self):
@@ -204,7 +204,7 @@ class TestGitHook:
hook.set_git_env("dummy_inline_key")
assert hook.env == {
- "GIT_SSH_COMMAND": "ssh -i dummy_inline_key -o IdentitiesOnly=yes
-o StrictHostKeyChecking=no"
+ "GIT_SSH_COMMAND": "ssh -i dummy_inline_key -o IdentitiesOnly=yes
-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
}
def test_configure_hook_env(self):
@@ -219,3 +219,136 @@ class TestGitHook:
assert os.path.exists(temp_key_path)
assert not os.path.exists(temp_key_path)
+
+ def test_ssh_port(self, create_connection_without_db):
+ create_connection_without_db(
+ Connection(
+ conn_id="git_with_port",
+ host=AIRFLOW_GIT,
+ conn_type="git",
+ extra={"key_file": "/files/pkey.pem", "ssh_port": "2222"},
+ )
+ )
+ hook = GitHook(git_conn_id="git_with_port")
+ with hook.configure_hook_env():
+ cmd = hook.env["GIT_SSH_COMMAND"]
+ assert "-p 2222" in cmd
+
+ def test_proxy_command(self, create_connection_without_db):
+ create_connection_without_db(
+ Connection(
+ conn_id="git_with_proxy",
+ host=AIRFLOW_GIT,
+ conn_type="git",
+ extra={
+ "key_file": "/files/pkey.pem",
+ "host_proxy_cmd": "ssh -W %h:%p bastion.example.com",
+ },
+ )
+ )
+ hook = GitHook(git_conn_id="git_with_proxy")
+ with hook.configure_hook_env():
+ cmd = hook.env["GIT_SSH_COMMAND"]
+ assert 'ProxyCommand="ssh -W %h:%p bastion.example.com"' in cmd
+
+ def test_known_hosts_file(self, create_connection_without_db):
+ create_connection_without_db(
+ Connection(
+ conn_id="git_known_hosts",
+ host=AIRFLOW_GIT,
+ conn_type="git",
+ extra={
+ "key_file": "/files/pkey.pem",
+ "strict_host_key_checking": "yes",
+ "known_hosts_file": "/etc/ssh/known_hosts",
+ },
+ )
+ )
+ hook = GitHook(git_conn_id="git_known_hosts")
+ with hook.configure_hook_env():
+ cmd = hook.env["GIT_SSH_COMMAND"]
+ assert "-o StrictHostKeyChecking=yes" in cmd
+ assert "-o UserKnownHostsFile=/etc/ssh/known_hosts" in cmd
+ assert "/dev/null" not in cmd
+
+ def test_ssh_config_file(self, create_connection_without_db):
+ create_connection_without_db(
+ Connection(
+ conn_id="git_ssh_config",
+ host=AIRFLOW_GIT,
+ conn_type="git",
+ extra={
+ "key_file": "/files/pkey.pem",
+ "ssh_config_file": "/home/user/.ssh/config",
+ },
+ )
+ )
+ hook = GitHook(git_conn_id="git_ssh_config")
+ with hook.configure_hook_env():
+ cmd = hook.env["GIT_SSH_COMMAND"]
+ assert "-F /home/user/.ssh/config" in cmd
+
+ def test_no_key_with_ssh_options_sets_env(self,
create_connection_without_db):
+ """SSH options without a key still produce GIT_SSH_COMMAND."""
+ create_connection_without_db(
+ Connection(
+ conn_id="git_proxy_only",
+ host=AIRFLOW_GIT,
+ conn_type="git",
+ extra={"host_proxy_cmd": "ssh -W %h:%p bastion"},
+ )
+ )
+ hook = GitHook(git_conn_id="git_proxy_only")
+ assert hook.env == {}
+ with hook.configure_hook_env():
+ cmd = hook.env["GIT_SSH_COMMAND"]
+ assert cmd.startswith("ssh ")
+ assert "-i " not in cmd
+ assert "ProxyCommand" in cmd
+
+ def test_default_user_known_hosts_devnull_when_no_strict_checking(self):
+ """When strict_host_key_checking=no and no known_hosts_file, /dev/null
is used."""
+ hook = GitHook(git_conn_id=CONN_DEFAULT)
+ with hook.configure_hook_env():
+ cmd = hook.env["GIT_SSH_COMMAND"]
+ assert "-o UserKnownHostsFile=/dev/null" in cmd
+
+ def test_passphrase_sets_askpass_env(self, create_connection_without_db):
+ create_connection_without_db(
+ Connection(
+ conn_id="git_passphrase",
+ host=AIRFLOW_GIT,
+ conn_type="git",
+ extra={
+ "key_file": "/files/pkey.pem",
+ "private_key_passphrase": "my_secret",
+ },
+ )
+ )
+ hook = GitHook(git_conn_id="git_passphrase")
+ with hook.configure_hook_env():
+ assert "SSH_ASKPASS" in hook.env
+ assert hook.env["SSH_ASKPASS_REQUIRE"] == "force"
+ askpass_path = hook.env["SSH_ASKPASS"]
+ assert os.path.exists(askpass_path)
+
+ def test_passphrase_askpass_cleaned_up(self, create_connection_without_db):
+ create_connection_without_db(
+ Connection(
+ conn_id="git_passphrase_cleanup",
+ host=AIRFLOW_GIT,
+ conn_type="git",
+ extra={
+ "private_key": "inline_key",
+ "private_key_passphrase": "my_secret",
+ },
+ )
+ )
+ hook = GitHook(git_conn_id="git_passphrase_cleanup")
+ askpass_path = None
+ with hook.configure_hook_env():
+ askpass_path = hook.env.get("SSH_ASKPASS")
+ assert askpass_path is not None
+ assert os.path.exists(askpass_path)
+ # Both the askpass script and the temp key file should be cleaned up
+ assert not os.path.exists(askpass_path)