pitrou commented on code in PR #38147: URL: https://github.com/apache/arrow/pull/38147#discussion_r1356409631
########## cpp/src/arrow/acero/hash_join_node_test.cc: ########## @@ -2124,5 +2124,266 @@ TEST(HashJoin, ChainedIntegerHashJoins) { } } +// This test case is related to GH-38147 +// To verify that the issue with offset handling has been fixed, the number of matching +// records needs to be larger than the mini-batch size (1024). +constexpr uint64_t NUM_MATCH_RECORDS = 1234; +constexpr uint64_t NUM_LEFT_RECORDS = 2000; +constexpr uint64_t NUM_RIGHT_RECORDS = 1500; + +std::string GenerateTimestamp() { Review Comment: Are the custom generation functions required to surface the bug, or can we just build on existing generation functions? We already have random generation functions, for example. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org