Lee-W commented on code in PR #63546:
URL: https://github.com/apache/airflow/pull/63546#discussion_r3014734288
##########
tests/models/test_dag.py:
##########
@@ -3064,6 +3065,109 @@ def test_dags_needing_dagruns_datasets(self, dag_maker,
session):
dag_models = query.all()
assert dag_models == [dag_model]
+ def test_dags_needing_dagruns_skips_ddrq_when_serialized_dag_missing(self,
session, caplog):
+ """DDRQ rows for a dag_id without SerializedDagModel must be skipped
(no dataset_triggered info).
Review Comment:
```suggestion
"""DDRQ rows for a Dag without SerializedDagModel must be skipped
(no dataset_triggered info).
```
##########
airflow/models/dag.py:
##########
@@ -4094,13 +4099,33 @@ def dag_ready(dag_id: str, cond: BaseDataset, statuses:
dict) -> bool | None:
ser_dags = session.scalars(
select(SerializedDagModel).where(SerializedDagModel.dag_id.in_(dag_statuses.keys()))
).all()
+ ser_dag_ids = {s.dag_id for s in ser_dags}
+ missing_from_serialized = set(by_dag.keys()) - ser_dag_ids
+ if missing_from_serialized:
+ log.debug(
+ "DAGs in DDRQ but missing SerializedDagModel "
+ "(skipping — condition cannot be evaluated): %s",
+ sorted(missing_from_serialized),
+ )
+ for dag_id in missing_from_serialized:
+ del by_dag[dag_id]
+ del dag_statuses[dag_id]
Review Comment:
```suggestion
missing_from_serialized = set(by_dag.keys()) - ser_dag_ids
if missing_from_serialized:
log.debug(
"Dags have queued datasset events (DDRQs), but are not found
in the serialized_dag table."
" — skipping Dag run creation: %s",
sorted(missing_from_serialized),
)
for dag_id in missing_from_serialized:
del by_dag[dag_id]
del dag_statuses[dag_id]
```
##########
newsfragments/63546.bugfix.rst:
##########
@@ -0,0 +1 @@
+Fix premature dataset-triggered DagRuns when ``SerializedDagModel`` was
missing while ``DatasetDagRunQueue`` still had rows for that DAG; queue entries
are kept for the next evaluation.
Review Comment:
```suggestion
Fix premature dataset-triggered DagRuns when ``SerializedDagModel`` was
missing while ``DatasetDagRunQueue`` still had rows for that Dag; queue entries
are kept for the next evaluation.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]