Copilot commented on code in PR #64523:
URL: https://github.com/apache/airflow/pull/64523#discussion_r3066495579


##########
airflow-core/src/airflow/api_fastapi/common/http_access_log.py:
##########
@@ -23,13 +23,87 @@
 from typing import TYPE_CHECKING
 
 import structlog
+from starlette.routing import Match
+
+from airflow._shared.observability.metrics.stats import Stats
 
 if TYPE_CHECKING:
     from starlette.types import ASGIApp, Message, Receive, Scope, Send
 
 logger = structlog.get_logger(logger_name="http.access")
 
 _HEALTH_PATHS = frozenset(["/api/v2/monitor/health"])
+_API_PATH_PREFIX_TO_SURFACE = (
+    ("/api/v2", "public"),
+    ("/ui", "ui"),
+)
+
+
+def _get_api_surface(path: str) -> str | None:
+    for prefix, surface in _API_PATH_PREFIX_TO_SURFACE:
+        if path == prefix or path.startswith(f"{prefix}/"):
+            return surface
+    return None
+
+
+def _get_status_family(status_code: int) -> str:
+    if status_code < 100:
+        return "unknown"
+    return f"{status_code // 100}xx"
+
+
+def _get_route_tag(scope: Scope) -> str:
+    router = scope.get("router")
+    routes = getattr(router, "routes", None)
+    partial_route_path: str | None = None
+    if routes:
+        for route in routes:
+            match, _ = route.matches(scope)
+            route_path = getattr(route, "path", None)
+            if not isinstance(route_path, str) or not route_path:
+                continue
+            if match == Match.FULL:
+                return route_path
+            if match == Match.PARTIAL and partial_route_path is None:
+                partial_route_path = route_path
+
+    if partial_route_path:
+        return partial_route_path

Review Comment:
   This re-runs route matching by iterating over all routes and calling 
`route.matches(scope)` for every API request, which can add avoidable overhead. 
Starlette typically puts the matched route into the ASGI scope (e.g., 
`scope.get(\"route\")`); prefer using that when present. If you keep the 
partial-match fallback, selecting the *most specific* partial match (e.g., 
longest `route_path`) would be more accurate than taking the first partial 
match.



##########
shared/observability/src/airflow_shared/observability/metrics/metrics_template.yaml:
##########
@@ -504,9 +504,30 @@ metrics:
     legacy_name: "edge_worker.ti.finish.{queue}.{state}.{dag_id}.{task_id}"
     name_variables: ["queue", "state", "dag_id", "task_id"]
 
+  - name: "api.requests"
+    description: "Number of completed REST API requests. Metric with 
api_surface, method, route,
+    status_code and status_family tagging."

Review Comment:
   These descriptions are split across lines inside a double-quoted scalar. 
While YAML allows multiline quoted scalars, this often leads to unexpected 
newline/whitespace rendering in generated docs or tooling. Consider switching 
to a single-line description or using a folded scalar style (`>-`) for 
predictable formatting.



##########
airflow-core/src/airflow/api_fastapi/common/http_access_log.py:
##########
@@ -23,13 +23,87 @@
 from typing import TYPE_CHECKING
 
 import structlog
+from starlette.routing import Match
+
+from airflow._shared.observability.metrics.stats import Stats
 
 if TYPE_CHECKING:
     from starlette.types import ASGIApp, Message, Receive, Scope, Send
 
 logger = structlog.get_logger(logger_name="http.access")
 
 _HEALTH_PATHS = frozenset(["/api/v2/monitor/health"])
+_API_PATH_PREFIX_TO_SURFACE = (
+    ("/api/v2", "public"),
+    ("/ui", "ui"),
+)
+
+
+def _get_api_surface(path: str) -> str | None:
+    for prefix, surface in _API_PATH_PREFIX_TO_SURFACE:
+        if path == prefix or path.startswith(f"{prefix}/"):
+            return surface
+    return None
+
+
+def _get_status_family(status_code: int) -> str:
+    if status_code < 100:
+        return "unknown"
+    return f"{status_code // 100}xx"
+
+
+def _get_route_tag(scope: Scope) -> str:
+    router = scope.get("router")
+    routes = getattr(router, "routes", None)
+    partial_route_path: str | None = None
+    if routes:
+        for route in routes:
+            match, _ = route.matches(scope)
+            route_path = getattr(route, "path", None)
+            if not isinstance(route_path, str) or not route_path:
+                continue
+            if match == Match.FULL:
+                return route_path
+            if match == Match.PARTIAL and partial_route_path is None:
+                partial_route_path = route_path
+
+    if partial_route_path:
+        return partial_route_path
+
+    endpoint = scope.get("endpoint")
+    endpoint_name = getattr(endpoint, "__name__", None)
+    if isinstance(endpoint_name, str) and endpoint_name:
+        return endpoint_name
+
+    return "unmatched"
+
+
+def _emit_api_metrics(
+    *,
+    scope: Scope,
+    path: str,
+    method: str,
+    status_code: int,
+    duration_us: int,
+) -> None:
+    api_surface = _get_api_surface(path)
+    if api_surface is None:
+        return
+
+    # Keep tags bounded so API metrics remain usable across supported backends.
+    tags = {
+        "api_surface": api_surface,
+        "method": method or "UNKNOWN",
+        "route": _get_route_tag(scope),
+        "status_code": str(status_code) if status_code else "unknown",
+        "status_family": _get_status_family(status_code),
+    }
+    duration_ms = duration_us / 1000.0
+
+    Stats.incr("api.requests", tags=tags)
+    Stats.timing("api.request.duration", duration_ms, tags=tags)

Review Comment:
   `api.request.errors` is incremented for any `status_code >= 400`, which 
includes expected client errors (4xx) such as 404. The metric name/description 
currently reads like it might be server-side failures only; please clarify the 
definition (e.g., '4xx/5xx responses') in the metrics template and/or add an 
inline comment here, or change the threshold to `>= 500` if the intent is to 
capture only server errors.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to