wgtmac commented on code in PR #1073: URL: https://github.com/apache/orc/pull/1073#discussion_r846901942
########## c++/test/TestSargsApplier.cc: ########## @@ -120,4 +120,75 @@ namespace orc { EXPECT_EQ(true, rowgroups[3]); } + TEST(TestSargsApplier, testStripeAndFileStats) { + auto type = std::unique_ptr<Type>( + Type::buildTypeFromString("struct<x:int,y:int>")); + auto sarg = SearchArgumentFactory::newBuilder() + ->startAnd() + .equals( + "x", + PredicateDataType::LONG, + Literal(static_cast<int64_t>(20))) + .equals( + "y", + PredicateDataType::LONG, + Literal(static_cast<int64_t>(40))) + .end() + .build(); + // Test stripe stats 0 <= x <= 10 and 0 <= y <= 50 + { + orc::proto::StripeStatistics stripeStats; + proto::ColumnStatistics structStatistics; + structStatistics.set_hasnull(false); + *stripeStats.add_colstats() = structStatistics; + *stripeStats.add_colstats() = createIntStats(0L, 10L); + *stripeStats.add_colstats() = createIntStats(0L, 50L); + SargsApplier applier(*type, sarg.get(), 1000, WriterVersion_ORC_135); + EXPECT_FALSE(applier.evaluateStripeStatistics(3000, stripeStats)); + } + // Test stripe stats 0 <= x <= 50 and 0 <= y <= 50 + { + orc::proto::StripeStatistics stripeStats; + proto::ColumnStatistics structStatistics; + structStatistics.set_hasnull(false); + *stripeStats.add_colstats() = structStatistics; + *stripeStats.add_colstats() = createIntStats(0L, 50L); + *stripeStats.add_colstats() = createIntStats(0L, 50L); + SargsApplier applier(*type, sarg.get(), 1000, WriterVersion_ORC_135); + EXPECT_TRUE(applier.evaluateStripeStatistics(3000, stripeStats)); + } + // Test file stats 0 <= x <= 10 and 0 <= y <= 50 + { + orc::proto::Footer footer; + proto::ColumnStatistics structStatistics; + structStatistics.set_hasnull(false); + *footer.add_statistics() = structStatistics; + *footer.add_statistics() = createIntStats(0L, 10L); + *footer.add_statistics() = createIntStats(0L, 50L); + SargsApplier applier(*type, sarg.get(), 1000, WriterVersion_ORC_135); + EXPECT_FALSE(applier.evaluateFileStatistics(footer)); + } + // Test file stats 0 <= x <= 50 and 0 <= y <= 30 + { + orc::proto::Footer footer; + proto::ColumnStatistics structStatistics; + structStatistics.set_hasnull(false); + *footer.add_statistics() = structStatistics; + *footer.add_statistics() = createIntStats(0L, 50L); + *footer.add_statistics() = createIntStats(0L, 30L); + SargsApplier applier(*type, sarg.get(), 1000, WriterVersion_ORC_135); + EXPECT_FALSE(applier.evaluateFileStatistics(footer)); + } + // Test file stats 0 <= x <= 50 and 0 <= y <= 50 + { + orc::proto::Footer footer; + proto::ColumnStatistics structStatistics; + structStatistics.set_hasnull(false); + *footer.add_statistics() = structStatistics; + *footer.add_statistics() = createIntStats(0L, 50L); + *footer.add_statistics() = createIntStats(0L, 50L); + SargsApplier applier(*type, sarg.get(), 1000, WriterVersion_ORC_135); + EXPECT_TRUE(applier.evaluateFileStatistics(footer)); + } + } Review Comment: I don't think the C++ code path has considered NaN and Infinity of floating types. The comparison is templated here: https://github.com/apache/orc/blob/main/c%2B%2B/src/sargs/PredicateLeaf.cc#L308. It may work well for infinity. We may add an extra check whether literal or min/max value is NaN and skip evaluation if true. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@orc.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org