ephraimbuddy commented on code in PR #64105:
URL: https://github.com/apache/airflow/pull/64105#discussion_r3234010433
##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -145,20 +144,48 @@ def _build_ssh_command(self, key_path: str | None = None)
-> str:
def _process_git_auth_url(self):
if not isinstance(self.repo_url, str):
return
- if self.auth_token and self.repo_url.startswith("https://"):
- encoded_user = urlquote(self.user_name, safe="")
- encoded_token = urlquote(self.auth_token, safe="")
- self.repo_url = self.repo_url.replace("https://",
f"https://{encoded_user}:{encoded_token}@", 1)
- elif self.auth_token and self.repo_url.startswith("http://"):
- encoded_user = urlquote(self.user_name, safe="")
- encoded_token = urlquote(self.auth_token, safe="")
- self.repo_url = self.repo_url.replace("http://",
f"http://{encoded_user}:{encoded_token}@", 1)
- elif self.repo_url.startswith("http://"):
- # if no auth token, use the repo url as is
- pass
- elif not self.repo_url.startswith("git@") and not
self.repo_url.startswith("https://"):
+
+ if not self.repo_url.startswith("git@") and not
self.repo_url.startswith("https://"):
self.repo_url = os.path.expanduser(self.repo_url)
+ @contextlib.contextmanager
+ def _token_askpass_env(self):
+ if not self.auth_token:
+ yield
+ return
+
+ raw_username = self.user_name or "git"
+ username = shlex.quote(raw_username)
+ password = shlex.quote(self.auth_token)
+
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".sh", delete=True)
as askpass_script:
+ script_content = f"""#!/bin/sh
+case "$1" in
+ *Username*) echo {username} ;;
+ *Password*) echo {password} ;;
+ *) exit 1 ;;
+esac
+"""
+ askpass_script.write(script_content)
+ askpass_script.flush()
+ os.chmod(askpass_script.name, stat.S_IRWXU)
+
+ old_git_askpass = self.env.get("GIT_ASKPASS")
+ old_git_terminal_prompt = self.env.get("GIT_TERMINAL_PROMPT")
+ try:
+ self.env["GIT_ASKPASS"] = askpass_script.name
Review Comment:
**Blocker (same point as the top-level review).** `self.env` is a plain dict
on the hook — setting `GIT_ASKPASS` here only reaches the subprocess when the
caller explicitly passes `env=self.hook.env`. That happens for the initial bare
clone but not for `_fetch_bare_repo`, `_fetch_submodules`, or `refresh()`, so
HTTPS-with-token refreshes will fail (and may hang, since `GIT_TERMINAL_PROMPT`
is also missing from the real env).
Mirror what `_passphrase_askpass_env` does just above — set both
`os.environ` and `self.env`, and restore both in `finally`:
```python
old_os_askpass = os.environ.get("GIT_ASKPASS")
old_os_prompt = os.environ.get("GIT_TERMINAL_PROMPT")
old_env_askpass = self.env.get("GIT_ASKPASS")
old_env_prompt = self.env.get("GIT_TERMINAL_PROMPT")
try:
os.environ["GIT_ASKPASS"] = askpass_script.name
os.environ["GIT_TERMINAL_PROMPT"] = "0"
self.env["GIT_ASKPASS"] = askpass_script.name
self.env["GIT_TERMINAL_PROMPT"] = "0"
yield
finally:
for store, key, old in [
(os.environ, "GIT_ASKPASS", old_os_askpass),
(os.environ, "GIT_TERMINAL_PROMPT", old_os_prompt),
(self.env, "GIT_ASKPASS", old_env_askpass),
(self.env, "GIT_TERMINAL_PROMPT", old_env_prompt),
]:
if old is None:
store.pop(key, None)
else:
store[key] = old
```
---
Drafted-by: Claude Code (Opus 4.7); reviewed by @ephraimbuddy before posting
##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -145,20 +144,48 @@ def _build_ssh_command(self, key_path: str | None = None)
-> str:
def _process_git_auth_url(self):
if not isinstance(self.repo_url, str):
return
- if self.auth_token and self.repo_url.startswith("https://"):
- encoded_user = urlquote(self.user_name, safe="")
- encoded_token = urlquote(self.auth_token, safe="")
- self.repo_url = self.repo_url.replace("https://",
f"https://{encoded_user}:{encoded_token}@", 1)
- elif self.auth_token and self.repo_url.startswith("http://"):
- encoded_user = urlquote(self.user_name, safe="")
- encoded_token = urlquote(self.auth_token, safe="")
- self.repo_url = self.repo_url.replace("http://",
f"http://{encoded_user}:{encoded_token}@", 1)
- elif self.repo_url.startswith("http://"):
- # if no auth token, use the repo url as is
- pass
- elif not self.repo_url.startswith("git@") and not
self.repo_url.startswith("https://"):
+
+ if not self.repo_url.startswith("git@") and not
self.repo_url.startswith("https://"):
self.repo_url = os.path.expanduser(self.repo_url)
+ @contextlib.contextmanager
+ def _token_askpass_env(self):
+ if not self.auth_token:
+ yield
+ return
+
+ raw_username = self.user_name or "git"
Review Comment:
Nit: `self.user_name` is set in `__init__` as `connection.login or "user"`,
so it's never falsy by the time we get here — the `or "git"` branch is
unreachable.
Worth picking one default though, because `_process_git_auth_url` uses
`"user"` and this method uses `"git"`. For GitHub fine-grained tokens the
username is ignored, but GitLab deploy tokens and some Bitbucket setups care
about which username is sent. I'd either standardise on one default in
`__init__` (and drop the fallback here), or document why they differ.
---
Drafted-by: Claude Code (Opus 4.7); reviewed by @ephraimbuddy before posting
##########
providers/git/tests/unit/git/hooks/test_git.py:
##########
@@ -352,3 +353,40 @@ def test_passphrase_askpass_cleaned_up(self,
create_connection_without_db):
assert os.path.exists(askpass_path)
# Both the askpass script and the temp key file should be cleaned up
assert not os.path.exists(askpass_path)
+
+ def test_token_askpass_env_and_cleanup(self):
Review Comment:
These two new tests assert the dict contents and the script body, but never
exercise a real `git` call, so they wouldn't have caught (and don't guard
against) the env-propagation bug noted on the source side.
Could you add a test that monkey-patches `Repo.clone_from` (and ideally
`origin.fetch`) to capture the `env` argument actually handed to gitpython, and
asserts `GIT_ASKPASS` and `GIT_TERMINAL_PROMPT` are present for HTTPS+token? A
bundle-level test in `tests/unit/git/bundles/test_git.py` is probably the right
place since the bundle is what owns the propagation.
Also — `ACCESS_TOKEN = "my_access_token"` has no shell-special chars, so
`shlex.quote(ACCESS_TOKEN)` returns it unquoted and this assertion silently
doesn't exercise the quoting path. Parametrising with a token like
`"tok$with'quote"` would actually cover it.
---
Drafted-by: Claude Code (Opus 4.7); reviewed by @ephraimbuddy before posting
##########
providers/git/src/airflow/providers/git/hooks/git.py:
##########
@@ -145,20 +144,48 @@ def _build_ssh_command(self, key_path: str | None = None)
-> str:
def _process_git_auth_url(self):
Review Comment:
After this PR the method only handles `~` expansion for local paths —
there's no "auth url" logic left in it. The name is misleading now; consider
renaming to `_normalize_repo_url` (or inlining the one remaining line into
`__init__`).
---
Drafted-by: Claude Code (Opus 4.7); reviewed by @ephraimbuddy before posting
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]