This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new aa3b7d4f59a Redact secret-looking query parameters in HTTP access log
(#67498)
aa3b7d4f59a is described below
commit aa3b7d4f59ac362d446d716b171dae84392ce8ad
Author: Jarek Potiuk <[email protected]>
AuthorDate: Tue May 26 00:09:12 2026 +0200
Redact secret-looking query parameters in HTTP access log (#67498)
The HTTP access log middleware logged the raw query string without
passing it through ``secrets_masker.redact()``. The decorator-layer
audit log already masks request data; the access-log layer did not.
A secret inadvertently passed as a query parameter (e.g.
``?password=foo`` or ``?token=bar``) was therefore written to the
access log in plaintext.
Parse the query string into ``(key, value)`` pairs and call
``secrets_masker.redact(value, key)`` per pair before logging. This
matches the pattern already used in ``logging/decorators.py``: keys
whose names are flagged sensitive by ``secrets_masker`` (``password``,
``token``, ``api_key``, …) have their values replaced with ``***``;
values previously registered via ``mask_secret()`` are caught too.
Non-sensitive keys are unchanged, blank values are preserved so log
readers still see the parameter was present, and malformed query
strings fall back to raw logging rather than silently dropping
diagnostic information.
---
.../airflow/api_fastapi/common/http_access_log.py | 28 ++++++++++-
.../api_fastapi/common/test_http_access_log.py | 56 +++++++++++++++++++++-
2 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
b/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
index 581c3593790..efaf570c580 100644
--- a/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
+++ b/airflow-core/src/airflow/api_fastapi/common/http_access_log.py
@@ -21,9 +21,12 @@ from __future__ import annotations
import contextlib
import time
from typing import TYPE_CHECKING
+from urllib.parse import parse_qsl, urlencode
import structlog
+from airflow._shared.secrets_masker import secrets_masker
+
if TYPE_CHECKING:
from starlette.types import ASGIApp, Message, Receive, Scope, Send
@@ -32,6 +35,29 @@ logger = structlog.get_logger(logger_name="http.access")
_HEALTH_PATHS = frozenset(["/api/v2/monitor/health"])
+def _redact_query_string(query: str) -> str:
+ """
+ Redact secret-looking query parameters before they reach the access log.
+
+ Treat each ``key=value`` pair independently so a key whose name signals a
secret
+ (``password``, ``token``, ``api_key`` — anything ``secrets_masker`` flags
as sensitive)
+ gets its value replaced with ``***``. Also catches values that were
previously registered
+ via ``mask_secret()``.
+ """
+ if not query:
+ return query
+ try:
+ pairs = parse_qsl(query, keep_blank_values=True)
+ except ValueError:
+ # Malformed query string — leave it alone; we'd rather log the raw
bytes than
+ # silently drop diagnostic information.
+ return query
+ if not pairs:
+ return query
+ redacted_pairs = [(k, secrets_masker.redact(v, k)) for k, v in pairs]
+ return urlencode(redacted_pairs)
+
+
class HttpAccessLogMiddleware:
"""
Log completed HTTP requests as structured log events.
@@ -91,7 +117,7 @@ class HttpAccessLogMiddleware:
duration_us = (time.monotonic_ns() - start) // 1000
status = response["status"] if response is not None else 0
method = scope.get("method", "")
- query = scope["query_string"].decode("ascii",
errors="replace")
+ query =
_redact_query_string(scope["query_string"].decode("ascii", errors="replace"))
client = scope.get("client")
client_addr = f"{client[0]}:{client[1]}" if client else
None
diff --git a/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
b/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
index 5e7e9e6e760..243049aedbe 100644
--- a/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
+++ b/airflow-core/tests/unit/api_fastapi/common/test_http_access_log.py
@@ -16,6 +16,7 @@
# under the License.
from __future__ import annotations
+import pytest
import structlog
import structlog.testing
from starlette.applications import Starlette
@@ -23,7 +24,29 @@ from starlette.responses import PlainTextResponse
from starlette.routing import Route
from starlette.testclient import TestClient
-from airflow.api_fastapi.common.http_access_log import _HEALTH_PATHS,
HttpAccessLogMiddleware
+from airflow._shared.secrets_masker import _secrets_masker
+from airflow.api_fastapi.common.http_access_log import (
+ _HEALTH_PATHS,
+ HttpAccessLogMiddleware,
+ _redact_query_string,
+)
+
+
[email protected]
+def _password_sensitive_field():
+ """Register ``password`` as a sensitive field name on the module-level
masker.
+
+ Production initialises this list from ``DEFAULT_SENSITIVE_FIELDS`` via
+ ``settings.mask_secret``; unit tests run without that initialisation, so we
+ populate the field explicitly for the redaction tests.
+ """
+ masker = _secrets_masker()
+ original = masker.sensitive_variables_fields
+ masker.sensitive_variables_fields = list(set(original) | {"password"})
+ try:
+ yield
+ finally:
+ masker.sensitive_variables_fields = original
def _make_app(raise_exc: bool = False) -> Starlette:
@@ -130,3 +153,34 @@ def test_non_http_scope_not_logged():
def test_health_paths_constant():
assert "/api/v2/monitor/health" in _HEALTH_PATHS
+
+
[email protected]_redact
+def
test_redact_query_string_masks_value_by_sensitive_key_name(_password_sensitive_field):
+ """A key flagged sensitive by ``secrets_masker`` has its value replaced
with ``***``."""
+ redacted = _redact_query_string("password=topsecret&safe=value")
+ assert "topsecret" not in redacted
+ assert "safe=value" in redacted
+
+
+def test_redact_query_string_leaves_safe_pairs_untouched():
+ assert _redact_query_string("page=2&limit=50") == "page=2&limit=50"
+
+
+def test_redact_query_string_handles_empty_and_blank_values():
+ assert _redact_query_string("") == ""
+ # Blank values should be preserved so log readers still see the key was
present.
+ assert _redact_query_string("flag=&other=x") == "flag=&other=x"
+
+
[email protected]_redact
+def test_logs_redact_sensitive_query_param(_password_sensitive_field):
+ """Integration: a request with `?password=secret` is logged with the value
masked."""
+ with structlog.testing.capture_logs() as logs:
+ client = TestClient(_make_app(), raise_server_exceptions=False)
+ client.get("/?password=topsecret&keep=ok")
+
+ assert len(logs) == 1
+ query = logs[0]["query"]
+ assert "topsecret" not in query
+ assert "keep=ok" in query