Lee-W commented on code in PR #43826:
URL: https://github.com/apache/airflow/pull/43826#discussion_r1851771313
##########
airflow/dag_processing/collection.py:
##########
@@ -425,3 +427,75 @@ def add_task_asset_references(
for task_id, asset_id in referenced_outlets
if (task_id, asset_id) not in orm_refs
)
+
+ def add_asset_trigger_references(
+ self, dags: dict[str, DagModel], assets: dict[tuple[str, str],
AssetModel], *, session: Session
+ ) -> None:
+ # Update references from assets being used
+ refs_to_add: dict[tuple[str, str], set[str]] = {}
+ refs_to_remove: dict[tuple[str, str], set[str]] = {}
+ triggers: dict[str, BaseTrigger] = {}
+ for name_uri, asset in self.assets.items():
+ asset_model = assets[name_uri]
+ trigger_class_path_to_trigger_dict: dict[str, BaseTrigger] = {
+ trigger.serialize()[0]: trigger for trigger in asset.watchers
+ }
+ triggers.update(trigger_class_path_to_trigger_dict)
+
+ trigger_class_paths_from_asset: set[str] =
set(trigger_class_path_to_trigger_dict.keys())
+ trigger_class_paths_from_asset_model: set[str] = {
+ trigger.classpath for trigger in asset_model.triggers
+ }
+
+ # Optimization: no diff between the DB and DAG definitions, no
update needed
+ if trigger_class_paths_from_asset ==
trigger_class_paths_from_asset_model:
+ continue
+
+ diff_to_add = trigger_class_paths_from_asset -
trigger_class_paths_from_asset_model
+ diff_to_remove = trigger_class_paths_from_asset_model -
trigger_class_paths_from_asset
+ if diff_to_add:
+ refs_to_add[name_uri] = diff_to_add
+ if diff_to_remove:
+ refs_to_remove[name_uri] = diff_to_remove
+
+ if refs_to_add:
+ all_classpaths = {classpath for classpaths in refs_to_add.values()
for classpath in classpaths}
+ orm_triggers: dict[str, Trigger] = {
+ trigger.classpath: trigger
+ for trigger in
session.scalars(select(Trigger).where(Trigger.classpath.in_(all_classpaths)))
+ }
+
+ # Create new triggers
+ new_trigger_models = [
+ trigger
+ for trigger in [
+ Trigger.from_object(triggers[classpath])
+ for classpath in all_classpaths
+ if classpath not in orm_triggers
+ ]
+ ]
+ session.add_all(new_trigger_models)
+ orm_triggers.update((trigger.classpath, trigger) for trigger in
new_trigger_models)
+
+ # Add new references
+ for name_uri, classpaths in refs_to_add.items():
+ asset_model = assets[name_uri]
+ asset_model.triggers.extend(
+ [orm_triggers.get(trigger_class_path) for
trigger_class_path in classpaths]
+ )
+
+ if refs_to_remove:
+ # Remove old references
+ for name_uri, classpaths in refs_to_remove.items():
+ asset_model = assets[name_uri]
+ asset_model.triggers = [
+ trigger for trigger in asset_model.triggers if
trigger.classpath not in classpaths
+ ]
+
+ # Remove references from assets no longer used
+ orphan_assets = session.scalars(
Review Comment:
> I could use AssetActive here but I actually need inactive assets, thus I
could do something like "all_assets - active_assets". I am not sure this is
better than the way I do it right now
sounds reasonable 🤔
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]