bugraoz93 commented on code in PR #44332: URL: https://github.com/apache/airflow/pull/44332#discussion_r1859660647
########## airflow/api_fastapi/core_api/routes/ui/grid.py: ########## @@ -0,0 +1,292 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import collections +import itertools +import operator +from functools import cache +from typing import Annotated + +from fastapi import Depends, HTTPException, Request, status +from sqlalchemy import select +from sqlalchemy.orm import Session +from typing_extensions import Any + +from airflow import DAG +from airflow.api_fastapi.common.db.common import get_session, paginated_select +from airflow.api_fastapi.common.parameters import ( + DateTimeQuery, + QueryDagRunRunStatesFilter, + QueryDagRunRunTypesFilter, + QueryLimit, + QueryOffset, + SortParam, +) +from airflow.api_fastapi.common.router import AirflowRouter +from airflow.api_fastapi.core_api.datamodels.ui.grid import ( + GridDAGRun, + GridDAGRunwithTIs, + GridResponse, + GridTaskInstance, + GridTaskInstanceSummary, +) +from airflow.api_fastapi.core_api.openapi.exceptions import create_openapi_http_exception_doc +from airflow.configuration import conf +from airflow.exceptions import AirflowConfigException +from airflow.models import DagRun, TaskInstance +from airflow.models.baseoperator import BaseOperator +from airflow.utils.state import TaskInstanceState +from airflow.utils.task_group import TaskGroup + +grid_router = AirflowRouter(prefix="/grid", tags=["Grid"]) + + +@grid_router.get( + "/", + include_in_schema=False, + responses=create_openapi_http_exception_doc([status.HTTP_400_BAD_REQUEST, status.HTTP_404_NOT_FOUND]), +) +def grid_data( + dag_id: str, + base_date: DateTimeQuery, + run_types: QueryDagRunRunTypesFilter, + run_states: QueryDagRunRunStatesFilter, + session: Annotated[Session, Depends(get_session)], + offset: QueryOffset, + request: Request, + num_runs: QueryLimit, + root: str | None = None, + filter_upstream: bool = False, + filter_downstream: bool = False, +) -> GridResponse: + """Return grid data.""" + ## Database calls to retrieve the DAG Runs and Task Instances and validate the data + dag: DAG = request.app.state.dag_bag.get_dag(dag_id) + if not dag: + raise HTTPException(status.HTTP_404_NOT_FOUND, f"Dag with id {dag_id} was not found") + + if root: + dag = dag.partial_subset( + task_ids_or_regex=root, include_upstream=filter_upstream, include_downstream=filter_downstream + ) + + if num_runs is None: + num_runs = QueryLimit(conf.getint("webserver", "default_dag_run_display_number")) + + # Retrieve, sort and encode the previous DAG Runs + base_query = ( + select( + DagRun.run_id, + DagRun.queued_at, + DagRun.start_date, + DagRun.end_date, + DagRun.state, + DagRun.run_type, + DagRun.data_interval_start, + DagRun.data_interval_end, + DagRun.dag_version_id.label("version_number"), + ) + .select_from(DagRun) + .where(DagRun.dag_id == dag.dag_id, DagRun.logical_date <= base_date) + ) + + dag_runs_select_filter, _ = paginated_select( + select=base_query, + filters=[ + run_types, + run_states, + ], + order_by=SortParam(allowed_attrs=["id"], model=DagRun), + offset=offset, + limit=num_runs, + ) + + dag_runs = session.execute(dag_runs_select_filter) + # Validate the DAG Runs to have consistent data + dag_runs = [GridDAGRun(**dag_run) for dag_run in dag_runs.all()] + + # Check if there are any DAG Runs with given criteria to eliminate unnecessary queries/errors + if not dag_runs: + raise HTTPException( + status.HTTP_404_NOT_FOUND, + f"No DAG Runs found for DAG {dag.dag_id} with given criteria, please check the filters", + ) + + # Retrieve, sort and encode the Task Instances + tis_of_dag_runs, _ = paginated_select( + select=select( + TaskInstance.run_id, + TaskInstance.task_id, + TaskInstance.try_number, + TaskInstance.state, + TaskInstance.start_date, + TaskInstance.end_date, + TaskInstance.queued_dttm.label("queued_dttm"), + ) + .join(TaskInstance.task_instance_note, isouter=True) + .where(TaskInstance.dag_id == dag.dag_id), + filters=[], + order_by=SortParam(allowed_attrs=["task_id", "run_id"], model=TaskInstance), + offset=offset, + limit=None, + ) + + task_instances = session.execute(tis_of_dag_runs) + + # Validate the task instances to have consistent data + task_instances = [GridTaskInstance(**ti) for ti in task_instances] Review Comment: I used this as data class while implementing not for validation deleted :) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
