pitrou commented on code in PR #46992: URL: https://github.com/apache/arrow/pull/46992#discussion_r2192901723
########## cpp/src/parquet/statistics_test.cc: ########## @@ -1598,31 +1648,108 @@ TEST(TestStatisticsSortOrderMinMax, Unsigned) { ASSERT_EQ(12, stats->num_values()); ASSERT_EQ(0x00, stats->EncodeMin()[0]); ASSERT_EQ(0x0b, stats->EncodeMax()[0]); + std::shared_ptr<EncodedStatistics> enc_stats = column_chunk->encoded_statistics(); + ASSERT_FALSE(enc_stats->is_max_value_exact.has_value()); + ASSERT_FALSE(enc_stats->is_min_value_exact.has_value()); +} + +// Test statistics for binary column with truncated max and min values +TEST(TestStatisticsTruncatedMinMax, Unsigned) { + std::string dir_string(test::get_data_dir()); + std::stringstream ss; + ss << dir_string << "/binary_truncated_min_max.parquet"; + auto path = ss.str(); + + // The file is generated by parquet-rs 55.1.0. It + // contains six columns of utf-8 and binary type. statistics_truncate_length + // is set to 2. Columns 0 and 1 will have truncation of min and max value, + // columns 2 and 3 will have truncation of min value only. Review Comment: That's not what I see in the comments below. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org