uranusjr commented on code in PR #46460:
URL: https://github.com/apache/airflow/pull/46460#discussion_r1953877058


##########
airflow/jobs/scheduler_job_runner.py:
##########
@@ -1356,64 +1349,50 @@ def _create_dag_runs_asset_triggered(
 
             latest_dag_version = DagVersion.get_latest_version(dag.dag_id, 
session=session)
 
-            # Explicitly check if the DagRun already exists. This is an edge 
case
-            # where a Dag Run is created but `DagModel.next_dagrun` and 
`DagModel.next_dagrun_create_after`
-            # are not updated.
-            # We opted to check DagRun existence instead
-            # of catching an Integrity error and rolling back the session i.e
-            # we need to set dag.next_dagrun_info if the Dag Run already 
exists or if we
-            # create a new one. This is so that in the next Scheduling loop we 
try to create new runs
-            # instead of falling in a loop of Integrity Error.
-            logical_date = logical_dates[dag.dag_id]
-            if (dag.dag_id, logical_date) not in existing_dagruns:
-                previous_dag_run = session.scalar(
-                    select(DagRun)
-                    .where(
-                        DagRun.dag_id == dag.dag_id,
-                        DagRun.logical_date < logical_date,
-                        DagRun.run_type == DagRunType.ASSET_TRIGGERED,
-                    )
-                    .order_by(DagRun.logical_date.desc())
-                    .limit(1)
-                )
-                asset_event_filters = [
-                    DagScheduleAssetReference.dag_id == dag.dag_id,
-                    AssetEvent.timestamp <= logical_date,
-                ]
-                if previous_dag_run:
-                    asset_event_filters.append(AssetEvent.timestamp > 
previous_dag_run.logical_date)
-
-                asset_events = session.scalars(
-                    select(AssetEvent)
-                    .join(
-                        DagScheduleAssetReference,
-                        AssetEvent.asset_id == 
DagScheduleAssetReference.asset_id,
-                    )
-                    .where(*asset_event_filters)
-                ).all()
-
-                data_interval = 
dag.timetable.data_interval_for_events(logical_date, asset_events)
-                dag_run = dag.create_dagrun(
-                    run_id=DagRun.generate_run_id(
-                        run_type=DagRunType.ASSET_TRIGGERED,
-                        logical_date=logical_date,
-                        run_after=max(logical_dates.values()),
-                    ),
-                    logical_date=logical_date,
-                    data_interval=data_interval,
-                    run_after=max(logical_dates.values()),
-                    run_type=DagRunType.ASSET_TRIGGERED,
-                    triggered_by=DagRunTriggeredByType.ASSET,
-                    dag_version=latest_dag_version,
-                    state=DagRunState.QUEUED,
-                    creating_job_id=self.job.id,
-                    session=session,
+            triggered_date = triggered_dates[dag.dag_id]
+            previous_dag_run = session.scalar(
+                select(DagRun)
+                .where(
+                    DagRun.dag_id == dag.dag_id,
+                    DagRun.run_after < triggered_date,
+                    DagRun.run_type == DagRunType.ASSET_TRIGGERED,
                 )
-                Stats.incr("asset.triggered_dagruns")
-                dag_run.consumed_asset_events.extend(asset_events)
-                session.execute(
-                    
delete(AssetDagRunQueue).where(AssetDagRunQueue.target_dag_id == dag_run.dag_id)
+                .order_by(DagRun.run_after.desc())
+                .limit(1)
+            )
+            asset_event_filters = [
+                DagScheduleAssetReference.dag_id == dag.dag_id,
+                AssetEvent.timestamp <= triggered_date,
+            ]
+            if previous_dag_run:
+                asset_event_filters.append(AssetEvent.timestamp > 
previous_dag_run.run_after)
+
+            asset_events = session.scalars(
+                select(AssetEvent)
+                .join(
+                    DagScheduleAssetReference,
+                    AssetEvent.asset_id == DagScheduleAssetReference.asset_id,
                 )
+                .where(*asset_event_filters)
+            ).all()
+
+            dag_run = dag.create_dagrun(
+                run_id=DagRun.generate_run_id(
+                    run_type=DagRunType.ASSET_TRIGGERED, logical_date=None, 
run_after=triggered_date
+                ),
+                logical_date=None,
+                data_interval=None,
+                run_after=triggered_date,
+                run_type=DagRunType.ASSET_TRIGGERED,
+                triggered_by=DagRunTriggeredByType.ASSET,
+                dag_version=latest_dag_version,
+                state=DagRunState.QUEUED,
+                creating_job_id=self.job.id,
+                session=session,
+            )
+            Stats.incr("asset.triggered_dagruns")
+            dag_run.consumed_asset_events.extend(asset_events)

Review Comment:
   We should be able to do better… this currently takes three queries but we 
really only need one. I’ll take a look after this PR.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to