This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 74d66e96502d825df50b2b45ad4c9de4f9032920 Author: Qi Chen <kaka11.c...@gmail.com> AuthorDate: Tue May 21 10:57:58 2024 +0800 [Fix](parquet-reader) Fix Timestamp Int96 min-max statistics is incorrect when was written by some old parquet writers by disable it. (#35041) Parquet INT96 timestamp values were compared incorrectly for the purposes of producing statistics by older parquet writers, so PARQUET-1065 deprecated them. The result is that any writer that produced stats was producing unusable incorrect values, except the special case where min == max and an incorrect ordering would not be material to the result. PARQUET-1026 made binary stats available and valid in that special case. --- be/src/vec/exec/format/parquet/parquet_pred_cmp.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h index 1f80aa7ecd5..916f3f64ee6 100644 --- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h +++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h @@ -257,6 +257,17 @@ private: ParquetInt96 datetime96_max = *reinterpret_cast<const ParquetInt96*>(encoded_max.data()); int64_t micros_max = datetime96_max.to_timestamp_micros(); + + // From Trino: Parquet INT96 timestamp values were compared incorrectly + // for the purposes of producing statistics by older parquet writers, + // so PARQUET-1065 deprecated them. The result is that any writer that produced stats + // was producing unusable incorrect values, except the special case where min == max + // and an incorrect ordering would not be material to the result. + // PARQUET-1026 made binary stats available and valid in that special case. + if (micros_min != micros_max) { + return false; + } + if constexpr (std::is_same_v<CppType, VecDateTimeValue> || std::is_same_v<CppType, DateV2Value<DateTimeV2ValueType>>) { min_value.from_unixtime(micros_min / 1000000, ctz); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org