This is an automated email from the ASF dual-hosted git repository.

jhtimmins pushed a commit to branch v2-1-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit 84df5ffd8870cb2a315f3e5007e6c392ced0c96b
Author: Tzu-ping Chung <t...@astronomer.io>
AuthorDate: Fri Aug 13 07:44:21 2021 +0800

    Rescue if a DagRun's DAG was removed from db (#17544)
    
    Fix #17442.
    
    The exception happens when a DAG is removed from the database (via web UI 
or something else), but there are still unfinished runs associated to it. This 
catches the scenario and use the existing fallback setting `max_active_runs` to 
zero.
    
    (cherry picked from commit 60ddcd10bbe5c2375b14307456b8e5f76c1d0dcd)
---
 airflow/www/views.py                  |  7 ++-
 tests/www/views/test_views_blocked.py | 84 +++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/airflow/www/views.py b/airflow/www/views.py
index 09d27e0..150ecc9 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -89,7 +89,7 @@ from airflow.api.common.experimental.mark_tasks import (
     set_dag_run_state_to_success,
 )
 from airflow.configuration import AIRFLOW_CONFIG, conf
-from airflow.exceptions import AirflowException
+from airflow.exceptions import AirflowException, SerializedDagNotFound
 from airflow.executors.executor_loader import ExecutorLoader
 from airflow.jobs.base_job import BaseJob
 from airflow.jobs.scheduler_job import SchedulerJob
@@ -1677,7 +1677,10 @@ class Airflow(AirflowBaseView):
         payload = []
         for dag_id, active_dag_runs in dags:
             max_active_runs = 0
-            dag = current_app.dag_bag.get_dag(dag_id)
+            try:
+                dag = current_app.dag_bag.get_dag(dag_id)
+            except SerializedDagNotFound:
+                dag = None
             if dag:
                 # TODO: Make max_active_runs a column so we can query for it 
directly
                 max_active_runs = dag.max_active_runs
diff --git a/tests/www/views/test_views_blocked.py 
b/tests/www/views/test_views_blocked.py
new file mode 100644
index 0000000..26ad7bd
--- /dev/null
+++ b/tests/www/views/test_views_blocked.py
@@ -0,0 +1,84 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import pytest
+
+from airflow.models import DagModel
+from airflow.models.dagbag import DagBag
+from airflow.models.serialized_dag import SerializedDagModel
+from airflow.operators.dummy import DummyOperator
+from airflow.operators.subdag import SubDagOperator
+from airflow.utils import timezone
+from airflow.utils.session import create_session
+from airflow.utils.state import State
+from tests.test_utils.db import clear_db_runs
+
+
+@pytest.fixture()
+def running_subdag(admin_client, dag_maker):
+    with dag_maker(dag_id="running_dag.subdag") as subdag:
+        DummyOperator(task_id="dummy")
+
+    with dag_maker(dag_id="running_dag") as dag:
+        SubDagOperator(task_id="subdag", subdag=subdag)
+
+    dag_bag = DagBag(include_examples=False, include_smart_sensor=False)
+    dag_bag.bag_dag(dag, root_dag=dag)
+
+    with create_session() as session:
+        # This writes both DAGs to DagModel, but only serialize the parent DAG.
+        dag_bag.sync_to_db(session=session)
+
+        # Simulate triggering the SubDagOperator to run the subdag.
+        subdag.create_dagrun(
+            run_id="blocked_run_example_bash_operator",
+            state=State.RUNNING,
+            execution_date=timezone.datetime(2016, 1, 1),
+            start_date=timezone.datetime(2016, 1, 1),
+            session=session,
+        )
+
+        # Now delete the parent DAG but leave the subdag.
+        session.query(DagModel).filter(DagModel.dag_id == dag.dag_id).delete()
+        session.query(SerializedDagModel).filter(SerializedDagModel.dag_id == 
dag.dag_id).delete()
+
+    yield subdag
+
+    with create_session() as session:
+        session.query(DagModel).filter(DagModel.dag_id == 
subdag.dag_id).delete()
+    clear_db_runs()
+
+
+def test_blocked_subdag_success(admin_client, running_subdag):
+    """Test the /blocked endpoint works when a DAG is deleted.
+
+    When a DAG is bagged, it is written to both DagModel and 
SerializedDagModel,
+    but its subdags are only written to DagModel (without serialization). Thus,
+    ``DagBag.get_dag(subdag_id)`` would raise ``SerializedDagNotFound`` if the
+    subdag was not previously bagged in the dagbag (perhaps due to its root DAG
+    being deleted). ``DagBag.get_dag()`` calls should catch the exception and
+    properly handle this situation.
+    """
+    resp = admin_client.post("/blocked", data={"dag_ids": 
[running_subdag.dag_id]})
+    assert resp.status_code == 200
+    assert resp.json == [
+        {
+            "dag_id": running_subdag.dag_id,
+            "active_dag_run": 1,
+            "max_active_runs": 0,  # Default value for an unserialized DAG.
+        },
+    ]

Reply via email to