amoghrajesh commented on code in PR #68675:
URL: https://github.com/apache/airflow/pull/68675#discussion_r3428734871
##########
scripts/ci/prek/check_metrics_synced_with_the_registry.py:
##########
@@ -220,6 +271,53 @@ def scan_file_for_metrics(file_path: Path) ->
list[MetricCall]:
return metrics_found
+def scan_file_for_direct_stats_imports(file_path: Path) ->
list[DirectStatsImport]:
+ """Return direct imports of stats module-level functions.
+
+ Only the metric-emitting methods in ``STATS_METHOD_TO_TYPE`` (``incr``,
+ ``decr``, ``gauge``, ``timing``, ``timer``) are restricted; other names
+ exported from ``stats.py`` (helpers, the ``Stats`` shim, ``initialize``,
+ etc.) may still be imported directly.
+
+ Imports inside a ``try`` block that catches ``ImportError`` (or
+ ``ModuleNotFoundError``) are exempt: those are intentional
+ back-compat version checks whose success is the signal the code is checking
+ for, and rewriting them to namespace form would defeat the check.
+ """
+ try:
+ source = file_path.read_text(encoding="utf-8")
+ tree = ast.parse(source, filename=str(file_path))
+ except (OSError, UnicodeDecodeError, SyntaxError):
+ return []
+
+ # Imports wrapped with try-except. These should be ignored.
+ back_compat_check_ids = find_back_compat_check_import_ids(tree)
+
+ violations: list[DirectStatsImport] = []
+ for node in ast.walk(tree):
+ if not isinstance(node, ast.ImportFrom):
+ continue
+ if not _is_stats_module_path(node.module):
+ continue
+ if id(node) in back_compat_check_ids:
+ continue
+ offending: list[str] = []
+ for alias in node.names:
+ if alias.name in STATS_METHOD_TO_TYPE:
+ offending.append(f"{alias.name} as {alias.asname}" if
alias.asname else alias.name)
+ if not offending:
+ continue
+ violations.append(
+ DirectStatsImport(
+ file_path=str(file_path),
+ line_num=node.lineno,
+ module=node.module or "",
+ imported_names=offending,
+ )
+ )
+ return violations
Review Comment:
A file with both a valid `stats.incr()` and a banned direct import has its
metric call scan skipped entirely i think, so a genuine registry mismatch in
that file stays hidden until the import is fixed unless im mistaken
##########
scripts/ci/prek/check_metrics_synced_with_the_registry.py:
##########
@@ -177,6 +187,47 @@ class MetricCall:
is_dynamic: bool
+@dataclass
+class DirectStatsImport:
+ file_path: str
+ line_num: int
+ module: str
+ imported_names: list[str]
+
+
+def _is_stats_module_path(module: str | None) -> bool:
+ return module is not None and STATS_MODULE_SUFFIX in module and
module.endswith(STATS_MODULE_SUFFIX)
Review Comment:
`in` clause is redundant? `endswith` implies inclusion too right?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]