jason810496 commented on code in PR #49470: URL: https://github.com/apache/airflow/pull/49470#discussion_r2236194519
########## airflow-core/src/airflow/utils/log/file_task_handler.py: ########## @@ -19,51 +19,114 @@ from __future__ import annotations -import itertools +import heapq +import io import logging import os -from collections.abc import Callable, Iterable +from collections.abc import Callable, Generator, Iterator from contextlib import suppress from datetime import datetime from enum import Enum +from itertools import chain, islice from pathlib import Path -from typing import TYPE_CHECKING, Any +from types import GeneratorType +from typing import IO, TYPE_CHECKING, TypedDict, cast from urllib.parse import urljoin import pendulum from pydantic import BaseModel, ConfigDict, ValidationError +from typing_extensions import NotRequired from airflow.configuration import conf from airflow.executors.executor_loader import ExecutorLoader from airflow.utils.helpers import parse_template_string, render_template +from airflow.utils.log.log_stream_accumulator import LogStreamAccumulator from airflow.utils.log.logging_mixin import SetContextPropagate from airflow.utils.log.non_caching_file_handler import NonCachingRotatingFileHandler from airflow.utils.session import NEW_SESSION, provide_session from airflow.utils.state import State, TaskInstanceState if TYPE_CHECKING: + from requests import Response + from airflow.executors.base_executor import BaseExecutor from airflow.models.taskinstance import TaskInstance from airflow.models.taskinstancehistory import TaskInstanceHistory from airflow.typing_compat import TypeAlias +CHUNK_SIZE = 1024 * 1024 * 5 # 5MB +DEFAULT_SORT_DATETIME = pendulum.datetime(2000, 1, 1) +DEFAULT_SORT_TIMESTAMP = int(DEFAULT_SORT_DATETIME.timestamp() * 1000) +SORT_KEY_OFFSET = 10000000 +"""An offset used by the _create_sort_key utility. + +Assuming 50 characters per line, an offset of 10,000,000 can represent approximately 500 MB of file data, which is sufficient for use as a constant. +""" +HEAP_DUMP_SIZE = 5000 +HALF_HEAP_DUMP_SIZE = HEAP_DUMP_SIZE // 2 # These types are similar, but have distinct names to make processing them less error prone -LogMessages: TypeAlias = list["StructuredLogMessage"] | list[str] -"""The log messages themselves, either in already sturcutured form, or a single string blob to be parsed later""" +LogMessages: TypeAlias = list[str] +"""The legacy format of log messages before 3.0.2""" Review Comment: Yes, this patch will be included in `3.0.4` release. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@airflow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org