uranusjr commented on code in PR #32122:
URL: https://github.com/apache/airflow/pull/32122#discussion_r1262141663


##########
airflow/jobs/scheduler_job_runner.py:
##########
@@ -277,9 +284,44 @@ def __get_concurrency_maps(self, states: 
Iterable[TaskInstanceState], session: S
             .where(TI.state.in_(states))
             .group_by(TI.task_id, TI.run_id, TI.dag_id)
         )
-        return ConcurrencyMap.from_concurrency_map(
-            {(dag_id, run_id, task_id): count for task_id, run_id, dag_id, 
count in ti_concurrency_query}
+        ti_concurrency = {
+            (dag_id, run_id, task_id): count for task_id, run_id, dag_id, 
count in ti_concurrency_query
+        }
+
+        tg_concurrency_query: list[tuple[str, str, str, str, int]] = (
+            session.query(TI.task_id, TI.run_id, TI.dag_id, TI.map_index)
+            .filter(
+                TI.state.in_(
+                    [
+                        State.SCHEDULED,
+                        State.QUEUED,
+                        State.RUNNING,
+                        State.UP_FOR_RESCHEDULE,
+                        State.UP_FOR_RETRY,
+                    ]
+                )
+            )
+            .filter(TI.map_index >= 0)
+            .order_by(TI.dag_id, TI.run_id, TI.map_index)
         )
+        tg_concurrency: dict[tuple[str, str, str], tuple[int, set[int]]] = 
dict()
+        for (task_id, run_id, dag_id, map_index) in tg_concurrency_query:
+            dag = self.dagbag.get_dag(dag_id, session)
+            task = dag.get_task(task_id) if dag else None
+            if task and task.task_group and 
task.task_group.max_active_groups_per_dagrun is not None:
+                group_id = task.task_group.group_id
+                key = (dag_id, run_id, group_id)
+                if key not in tg_concurrency:
+                    tg_concurrency[key] = 
(task.task_group.max_active_groups_per_dagrun, set())

Review Comment:
   Maybe `tg_concurrency` can be a defaultdict(set) instead to avoid this



##########
airflow/jobs/scheduler_job_runner.py:
##########
@@ -277,9 +284,44 @@ def __get_concurrency_maps(self, states: 
Iterable[TaskInstanceState], session: S
             .where(TI.state.in_(states))
             .group_by(TI.task_id, TI.run_id, TI.dag_id)
         )
-        return ConcurrencyMap.from_concurrency_map(
-            {(dag_id, run_id, task_id): count for task_id, run_id, dag_id, 
count in ti_concurrency_query}
+        ti_concurrency = {
+            (dag_id, run_id, task_id): count for task_id, run_id, dag_id, 
count in ti_concurrency_query
+        }
+
+        tg_concurrency_query: list[tuple[str, str, str, str, int]] = (
+            session.query(TI.task_id, TI.run_id, TI.dag_id, TI.map_index)
+            .filter(
+                TI.state.in_(
+                    [
+                        State.SCHEDULED,
+                        State.QUEUED,
+                        State.RUNNING,
+                        State.UP_FOR_RESCHEDULE,
+                        State.UP_FOR_RETRY,
+                    ]
+                )
+            )
+            .filter(TI.map_index >= 0)
+            .order_by(TI.dag_id, TI.run_id, TI.map_index)
         )
+        tg_concurrency: dict[tuple[str, str, str], tuple[int, set[int]]] = 
dict()
+        for (task_id, run_id, dag_id, map_index) in tg_concurrency_query:
+            dag = self.dagbag.get_dag(dag_id, session)
+            task = dag.get_task(task_id) if dag else None
+            if task and task.task_group and 
task.task_group.max_active_groups_per_dagrun is not None:
+                group_id = task.task_group.group_id
+                key = (dag_id, run_id, group_id)
+                if key not in tg_concurrency:
+                    tg_concurrency[key] = 
(task.task_group.max_active_groups_per_dagrun, set())

Review Comment:
   Maybe `tg_concurrency` can be a defaultdict instead to avoid this



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to