[AIRFLOW-1074] Don't count queued tasks for concurrency limits There may be orphaned tasks queued but not in a running dag run that will not cleared. We should not count these as they will interfere.
I hate to do this, but I changed my mind on counting queued tasks. 1. Queued tasks that are actually queued generally get set to running pretty quickly. 2. Because of the worker-side check, we won't actually pass concurrency. I don't think the queued thing is a big deal because of this, I'm more worried about orphaned tasks that are in QUEUED state but not in a running dag_run (so they wont get reset) interfering with concurrency. There may be orphaned tasks queued but not in a running dag run that will not cleared. We should not count these as they will interfere. Closes #2221 from saguziel/aguziel-concurrency-2 Project: http://git-wip-us.apache.org/repos/asf/incubator-airflow/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-airflow/commit/70024935 Tree: http://git-wip-us.apache.org/repos/asf/incubator-airflow/tree/70024935 Diff: http://git-wip-us.apache.org/repos/asf/incubator-airflow/diff/70024935 Branch: refs/heads/v1-8-test Commit: 70024935f24e0ff3d2861c0ccfa69cdd38084b9d Parents: 708e8ad Author: Alex Guziel <alex.guz...@airbnb.com> Authored: Wed Apr 12 11:56:03 2017 -0700 Committer: Maxime Beauchemin <maximebeauche...@gmail.com> Committed: Thu Jun 8 08:36:20 2017 -0700 ---------------------------------------------------------------------- airflow/jobs.py | 3 ++- tests/jobs.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/70024935/airflow/jobs.py ---------------------------------------------------------------------- diff --git a/airflow/jobs.py b/airflow/jobs.py index edc7458..457966f 100644 --- a/airflow/jobs.py +++ b/airflow/jobs.py @@ -1062,11 +1062,12 @@ class SchedulerJob(BaseJob): dag_id = task_instance.dag_id if dag_id not in dag_id_to_possibly_running_task_count: + # TODO(saguziel): also check against QUEUED state, see AIRFLOW-1104 dag_id_to_possibly_running_task_count[dag_id] = \ DAG.get_num_task_instances( dag_id, simple_dag_bag.get_dag(dag_id).task_ids, - states=[State.RUNNING, State.QUEUED], + states=[State.RUNNING], session=session) current_task_concurrency = dag_id_to_possibly_running_task_count[dag_id] http://git-wip-us.apache.org/repos/asf/incubator-airflow/blob/70024935/tests/jobs.py ---------------------------------------------------------------------- diff --git a/tests/jobs.py b/tests/jobs.py index 428d3ac..21102e6 100644 --- a/tests/jobs.py +++ b/tests/jobs.py @@ -573,14 +573,14 @@ class SchedulerJobTest(unittest.TestCase): ti1.refresh_from_db() ti2.refresh_from_db() ti1.state = State.RUNNING - ti2.state = State.QUEUED + ti2.state = State.RUNNING session.merge(ti1) session.merge(ti2) session.commit() self.assertEqual(State.RUNNING, dr1.state) self.assertEqual(2, DAG.get_num_task_instances(dag_id, dag.task_ids, - states=[State.RUNNING, State.QUEUED], session=session)) + states=[State.RUNNING], session=session)) # create second dag run dr2 = scheduler.create_dag_run(dag) @@ -607,7 +607,7 @@ class SchedulerJobTest(unittest.TestCase): self.assertEqual(3, DAG.get_num_task_instances(dag_id, dag.task_ids, states=[State.RUNNING, State.QUEUED], session=session)) self.assertEqual(State.RUNNING, ti1.state) - self.assertEqual(State.QUEUED, ti2.state) + self.assertEqual(State.RUNNING, ti2.state) six.assertCountEqual(self, [State.QUEUED, State.SCHEDULED], [ti3.state, ti4.state]) session.close()