yuseok89 commented on code in PR #67242:
URL: https://github.com/apache/airflow/pull/67242#discussion_r3403943036
##########
airflow-core/src/airflow/api_fastapi/core_api/routes/ui/dags.py:
##########
@@ -308,3 +311,45 @@ def get_latest_run_info(dag_id: str, session: SessionDep)
-> DAGRunLightResponse
latest_run_info = session.execute(latest_run_info_select).one_or_none()
return DAGRunLightResponse(**latest_run_info._mapping) if latest_run_info
else None
+
+
+@dags_router.get(
+ "/run_state_counts",
+ dependencies=[
+ Depends(requires_access_dag(method="GET")),
+ Depends(requires_access_dag(method="GET",
access_entity=DagAccessEntity.RUN)),
+ ],
+ operation_id="get_dag_run_state_counts_ui",
+)
+def get_dag_run_state_counts(
+ session: SessionDep,
+ readable_dags_filter: ReadableDagsFilterDep,
+ dag_ids: Annotated[list[str], Query(min_length=1)],
+ run_after_gte: datetime | None = None,
+) -> DAGsRunStateCountsCollectionResponse:
+ """Return per-Dag DagRun state counts (zero-filled) for the Dag list
page."""
+ permitted_dag_ids = readable_dags_filter.value or set()
+ requested_dag_ids = [dag_id for dag_id in dict.fromkeys(dag_ids) if dag_id
in permitted_dag_ids]
+ counts_by_dag: dict[str, dict[DagRunState, int]] = {
+ dag_id: {state: 0 for state in DagRunState} for dag_id in
requested_dag_ids
+ }
+
+ if requested_dag_ids:
+ count_query = (
+ select(DagRun.dag_id, DagRun.state, func.count().label("cnt"))
+ .where(DagRun.dag_id.in_(requested_dag_ids))
+ .group_by(DagRun.dag_id, DagRun.state)
+ )
+ if run_after_gte is not None:
+ count_query = count_query.where(DagRun.run_after >= run_after_gte)
+ for row in session.execute(count_query):
+ if row.state is None:
+ continue
+ counts_by_dag[row.dag_id][DagRunState(row.state)] = row.cnt
+
Review Comment:
I tested this on a heavy dataset. I seeded 50+ Dags with about 10K DagRuns
each across all four states (success, failed, running, queued), plus one Dag
with over 5M DagRuns as a worst case.
With the improved query, run_state_counts stayed responsive even with the
5M-run Dag on the page. There is a difference between cold (uncached) and warm
runs, but it looks acceptable to me. I'd welcome your opinion.
<img width="1133" height="607" alt="image"
src="https://github.com/user-attachments/assets/d66af80d-ad2d-4b89-a383-617d81b78e65"
/>
This heavy testing also surfaced a few other spots, unrelated to this
change, that don't scale well on large DagRun tables. I'll handle those in
separate PRs.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]