vincbeck commented on code in PR #43826:
URL: https://github.com/apache/airflow/pull/43826#discussion_r1842792069
##########
airflow/dag_processing/collection.py:
##########
@@ -425,3 +427,51 @@ def add_task_asset_references(
for task_id, asset_id in referenced_outlets
if (task_id, asset_id) not in orm_refs
)
+
+ def add_asset_trigger_references(
+ self, assets: dict[tuple[str, str], AssetModel], *, session: Session
+ ) -> None:
+ # Update references from assets being used
+ for name_uri, asset in self.assets.items():
+ asset_model = assets[name_uri]
+ trigger_class_path_to_asset_dict: dict[str, BaseTrigger] = {
+ trigger.serialize()[0]: trigger for trigger in asset.watchers
+ }
+
+ trigger_class_paths_from_asset: set[str] =
set(trigger_class_path_to_asset_dict.keys())
+ trigger_class_paths_from_asset_model: set[str] = {
+ trigger.classpath for trigger in asset_model.triggers
+ }
+
+ # Optimization: no diff between the DB and DAG definitions, no
update needed
+ if trigger_class_paths_from_asset ==
trigger_class_paths_from_asset_model:
+ continue
+
+ refs_to_add = trigger_class_paths_from_asset -
trigger_class_paths_from_asset_model
+ refs_to_remove = trigger_class_paths_from_asset_model -
trigger_class_paths_from_asset
+
+ # Remove old references
+ asset_model.triggers = [
+ trigger for trigger in asset_model.triggers if
trigger.classpath not in refs_to_remove
+ ]
+
+ # Add new references
+ for trigger_class_path in refs_to_add:
+ trigger_model = session.scalar(
+ select(Trigger).where(Trigger.classpath ==
trigger_class_path).limit(1)
+ )
+
+ # Create the trigger in the DB if it does not exist
+ if not trigger_model:
+ trigger_model =
Trigger.from_object(trigger_class_path_to_asset_dict[trigger_class_path])
+ session.add(trigger_model)
+
+ asset_model.triggers.append(trigger_model)
+
+ # Remove references from assets no longer used
+ all_assets = session.scalars(select(AssetModel))
+ # orphan_assets = set()
+ for asset_model in all_assets:
+ if (asset_model.name, asset_model.uri) not in self.assets:
+ asset_model.triggers = []
+ # orphan_assets.add(asset_model.id)
Review Comment:
Then the trigger will keep updating the asset in cases of events. More
importantly, if we keep the association between the asset and the trigger, it
will be impossible to clean-up these triggers. I want to be able to remove
triggers that are not used (meaning, not associated to a task and an asset).
Which means they will keep infinitely pooling an external resource. That could
be very costly.
On that same topic, when doing some testing, I noticed that this function is
called per DAG (am I wrong?). As a consequence, this piece of code removes the
associations I just created before. I need to fix that
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]