pankajastro commented on code in PR #40084: URL: https://github.com/apache/airflow/pull/40084#discussion_r1682428042
########## airflow/dag_processing/manager.py: ########## @@ -580,6 +580,7 @@ def _run_parsing_loop(self): pass elif isinstance(agent_signal, CallbackRequest): self._add_callback_to_queue(agent_signal) + self.log.debug("_add_callback_to_queue; agent signal; %s", agent_signal) Review Comment: I don’t have a strong opinion, but do we need to commit this log, or is it only for testing? ########## airflow/dag_processing/processor.py: ########## @@ -763,8 +763,29 @@ def _execute_dag_callbacks(self, dagbag: DagBag, request: DagCallbackRequest, se if callbacks and context: DAG.execute_callback(callbacks, context, dag.dag_id) - def _execute_task_callbacks(self, dagbag: DagBag | None, request: TaskCallbackRequest, session: Session): - if not request.is_failure_callback: + def _execute_task_callbacks( + self, dagbag: DagBag | None, request: TaskCallbackRequest, session: Session + ) -> None: + """ + Execute the task callbacks. + + :param dagbag: the DagBag to use to get the task instance + :param request: the task callback request + :param session: the session to use + """ + try: + callback_type = TaskInstanceState(request.task_callback_type) Review Comment: Why do we need to type cast here for request.task_callback_type? It seems that task_callback_type is either TaskInstanceState or None, or am I missing something? ########## airflow/sensors/time_sensor.py: ########## @@ -72,13 +73,8 @@ def __init__(self, *, target_time: datetime.time, **kwargs) -> None: self.target_datetime = timezone.convert_to_utc(aware_time) - def execute(self, context: Context) -> NoReturn: - trigger = DateTimeTrigger(moment=self.target_datetime) + def execute(self, context: Context) -> None: Review Comment: same here why we are changing NoReturn to None? ########## airflow/dag_processing/processor.py: ########## @@ -763,8 +763,29 @@ def _execute_dag_callbacks(self, dagbag: DagBag, request: DagCallbackRequest, se if callbacks and context: DAG.execute_callback(callbacks, context, dag.dag_id) - def _execute_task_callbacks(self, dagbag: DagBag | None, request: TaskCallbackRequest, session: Session): - if not request.is_failure_callback: + def _execute_task_callbacks( + self, dagbag: DagBag | None, request: TaskCallbackRequest, session: Session + ) -> None: + """ + Execute the task callbacks. + + :param dagbag: the DagBag to use to get the task instance + :param request: the task callback request + :param session: the session to use + """ + try: + callback_type = TaskInstanceState(request.task_callback_type) + except ValueError: + callback_type = None + is_remote = callback_type in (TaskInstanceState.SUCCESS, TaskInstanceState.FAILED) + + # previously we ignored any request besides failures. now if given callback type directly, + # then we respect it and execute it. additionally because in this scenario the callback + # is submitted remotely, we assume there is no need to mess with state; we simply run + # the callback + + if not is_remote and not request.is_failure_callback: + self.log.debug("not failure callback: %s", request) Review Comment: Do we need this debug here? ########## airflow/sensors/date_time.py: ########## Review Comment: I still feel that the number of use cases requiring a worker is greater than those that don't, so I was considering whether it makes sense to keep the old example and create a new one for when we do need a worker. What do you think? ########## airflow/dag_processing/processor.py: ########## @@ -763,8 +763,29 @@ def _execute_dag_callbacks(self, dagbag: DagBag, request: DagCallbackRequest, se if callbacks and context: DAG.execute_callback(callbacks, context, dag.dag_id) - def _execute_task_callbacks(self, dagbag: DagBag | None, request: TaskCallbackRequest, session: Session): - if not request.is_failure_callback: + def _execute_task_callbacks( + self, dagbag: DagBag | None, request: TaskCallbackRequest, session: Session + ) -> None: + """ + Execute the task callbacks. + + :param dagbag: the DagBag to use to get the task instance + :param request: the task callback request + :param session: the session to use + """ + try: + callback_type = TaskInstanceState(request.task_callback_type) + except ValueError: + callback_type = None + is_remote = callback_type in (TaskInstanceState.SUCCESS, TaskInstanceState.FAILED) Review Comment: Just curious why we are calling it remote? ########## airflow/triggers/base.py: ########## @@ -137,3 +150,105 @@ def __eq__(self, other): if isinstance(other, TriggerEvent): return other.payload == self.payload return False + + @provide_session + def handle_submit(self, *, task_instance: TaskInstance, session: Session = NEW_SESSION) -> None: + """ + Handle the submit event for a given task instance. + + This function sets the next method and next kwargs of the task instance, + as well as its state to scheduled. It also adds the event's payload + into the kwargs for the task. + + :param task_instance: The task instance to handle the submit event for. + :param session: The session to be used for the database callback sink. + """ + # Get the next kwargs of the task instance, or an empty dictionary if it doesn't exist + next_kwargs = task_instance.next_kwargs or {} + + # Add the event's payload into the kwargs for the task + next_kwargs["event"] = self.payload + + # Update the next kwargs of the task instance + task_instance.next_kwargs = next_kwargs + + # Remove ourselves as its trigger + task_instance.trigger_id = None + + # Set the state of the task instance to scheduled + task_instance.state = TaskInstanceState.SCHEDULED + + +class BaseTaskEndEvent(TriggerEvent): + """Base event class to end the task without resuming on worker.""" + + task_instance_state: TaskInstanceState + + def __init__(self, *, xcoms: dict[str, Any] | None = None, **kwargs) -> None: + """ + Initialize the class with the specified parameters. + + :param xcoms: A dictionary of XComs or None. + :param kwargs: Additional keyword arguments. + """ + if "payload" in kwargs: + raise ValueError("Param 'payload' not supported for this class.") + super().__init__(payload=self.task_instance_state) + self.xcoms = xcoms + + @provide_session + def handle_submit(self, *, task_instance: TaskInstance, session: Session = NEW_SESSION) -> None: + """ + Submit event for the given task instance. + + Marks the task with the state `task_instance_state` and optionally pushes xcom if applicable. + + :param task_instance: The task instance to be submitted. + :param session: The session to be used for the database callback sink. + """ + # Mark the task with terminal state and prevent it from resuming on worker + task_instance.trigger_id = None + task_instance.state = self.task_instance_state + + self._submit_callback_if_necessary(task_instance=task_instance, session=session) + self._push_xcoms_if_necessary(task_instance=task_instance) + + def _submit_callback_if_necessary(self, *, task_instance: TaskInstance, session) -> None: + """Submit a callback request if the task state is SUCCESS or FAILED.""" + is_failure = self.task_instance_state == TaskInstanceState.FAILED + if self.task_instance_state in [TaskInstanceState.SUCCESS, TaskInstanceState.FAILED]: + request = TaskCallbackRequest( + full_filepath=task_instance.dag_model.fileloc, + simple_task_instance=SimpleTaskInstance.from_ti(task_instance), + is_failure_callback=is_failure, + task_callback_type=self.task_instance_state, + ) + log.info("Sending callback: %s", request) + try: + DatabaseCallbackSink().send(callback=request, session=session) + except Exception as e: + log.error("Failed to send callback: %s", e) + + def _push_xcoms_if_necessary(self, *, task_instance: TaskInstance) -> None: + """Pushes XComs to the database if they are provided.""" + if self.xcoms: Review Comment: Do you have any examples to help me understand when we need to push values to XComs and where we set them? ########## airflow/sensors/date_time.py: ########## @@ -90,13 +91,8 @@ class DateTimeSensorAsync(DateTimeSensor): def __init__(self, **kwargs) -> None: super().__init__(**kwargs) - def execute(self, context: Context) -> NoReturn: - trigger = DateTimeTrigger(moment=timezone.parse(self.target_time)) + def execute(self, context: Context) -> None: Review Comment: why we are changing `NoReturn` to None? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org