This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.1-lakehouse in repository https://gitbox.apache.org/repos/asf/doris.git
commit 1b18bee7ddb4f90b4042936927dd52229055bcaf Author: Mingyu Chen (Rayner) <[email protected]> AuthorDate: Thu Jan 16 13:58:49 2025 +0800 [opt](parquet) change parquet init footer read size to 48KB (#46904) ### What problem does this PR solve? Change the initial footer read size from 128KB to 48KB, to slightly reduce the read size. This is same as presto/trino, because typically, a 1GB parquet file usually has footer with size 30~40KB. And usercase shows when there are 30 thousands parquet file, the parse footer time can reduce from: ``` ParseFooterTime: avg 2s28ms, max 3s707ms, min 905.866ms ``` to ``` ParseFooterTime: avg 886.364ms, max 1s734ms, min 391.846ms ``` --- be/src/vec/exec/format/parquet/parquet_thrift_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/vec/exec/format/parquet/parquet_thrift_util.h b/be/src/vec/exec/format/parquet/parquet_thrift_util.h index 64ccda6fe2e..c7d9b807419 100644 --- a/be/src/vec/exec/format/parquet/parquet_thrift_util.h +++ b/be/src/vec/exec/format/parquet/parquet_thrift_util.h @@ -34,7 +34,7 @@ namespace doris::vectorized { constexpr uint8_t PARQUET_VERSION_NUMBER[4] = {'P', 'A', 'R', '1'}; constexpr uint32_t PARQUET_FOOTER_SIZE = 8; -constexpr size_t INIT_META_SIZE = 128 * 1024; // 128k +constexpr size_t INIT_META_SIZE = 48 * 1024; // 48k static Status parse_thrift_footer(io::FileReaderSPtr file, FileMetaData** file_metadata, size_t* meta_size, io::IOContext* io_ctx) { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
