pitrou commented on code in PR #42188: URL: https://github.com/apache/arrow/pull/42188#discussion_r1651017028
########## cpp/src/arrow/compute/row/compare_internal_avx2.cc: ########## @@ -251,6 +253,35 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( } } +namespace { + +/// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we +/// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row +/// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat +/// it as negative offset and gather the data from undesired address. To avoid this issue, +/// we normalize the addresses by translating `base` `0x80000000` higher, and `offset` +/// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those +/// intrinsics are safe. + +constexpr auto two_gb = 0x80000000ull; + +template <int scale> +inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) { + auto normalized_base = base + two_gb / sizeof(int); + __m256i normalized_offset = _mm256_sub_epi32(offset, _mm256_set1_epi32(two_gb / scale)); + return _mm256_i32gather_epi32(normalized_base, normalized_offset, scale); +} + +template <int scale> +inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base, Review Comment: What is the use of `int64_for_gather_t` exactly? ########## cpp/src/arrow/compute/row/compare_internal_avx2.cc: ########## @@ -251,6 +253,35 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( } } +namespace { + +/// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we +/// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row +/// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat +/// it as negative offset and gather the data from undesired address. To avoid this issue, +/// we normalize the addresses by translating `base` `0x80000000` higher, and `offset` +/// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those +/// intrinsics are safe. + +constexpr auto two_gb = 0x80000000ull; + +template <int scale> Review Comment: Two things: 1) if we're using unsigned arithmetic below, the scale type should probably be unsigned for readability and sanity? 2) naming convention: can we make this `kScale`? ########## cpp/src/arrow/compute/row/compare_test.cc: ########## @@ -164,5 +166,128 @@ TEST(KeyCompare, CompareColumnsToRowsTempStackUsage) { } } +// Compare columns to rows at offsets over 2GB within a row table. +// Certain AVX2 instructions may behave unexpectedly causing troubles like GH-41813. +TEST(KeyCompare, CompareColumnsToRowsLarge) { Review Comment: What is the runtime of this test? Perhaps we need to disable it on Valgrind builds. ########## cpp/src/arrow/compute/row/compare_test.cc: ########## @@ -164,5 +166,128 @@ TEST(KeyCompare, CompareColumnsToRowsTempStackUsage) { } } +// Compare columns to rows at offsets over 2GB within a row table. +// Certain AVX2 instructions may behave unexpectedly causing troubles like GH-41813. +TEST(KeyCompare, CompareColumnsToRowsLarge) { + if constexpr (sizeof(void*) == 4) { + GTEST_SKIP() << "Test only works on 64-bit platforms"; + } + + // The idea of this case is to create a row table using several fixed length columns and + // one var length column (so the row is hence var length and has offset buffer), with + // the overall data size exceeding 2GB. Then compare each row with itself. + constexpr int64_t two_gb = 2ll * 1024ll * 1024ll * 1024ll; + // The compare function requires the row id of the left column to be uint16_t, hence the + // number of rows. + constexpr int64_t num_rows = std::numeric_limits<uint16_t>::max() + 1; + const std::vector<std::shared_ptr<DataType>> fixed_length_types{uint64(), uint32()}; + // The var length column should be a little smaller than 2GB to WAR the capacity Review Comment: "WAR"? ########## cpp/src/arrow/compute/row/compare_internal_avx2.cc: ########## @@ -236,6 +236,8 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( irow_right = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(left_to_right_map) + i); } + // TODO: Need to test if this gather is OK when irow_right is larger than Review Comment: When you say "in the future", is it in this PR or another one? ########## cpp/src/arrow/compute/row/compare_internal_avx2.cc: ########## @@ -251,6 +253,35 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( } } +namespace { + +/// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we +/// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row +/// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat +/// it as negative offset and gather the data from undesired address. To avoid this issue, +/// we normalize the addresses by translating `base` `0x80000000` higher, and `offset` +/// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those +/// intrinsics are safe. + +constexpr auto two_gb = 0x80000000ull; Review Comment: Can we make sure we use an explicit width type here? I'm not even sure what it is expected to be for correctness of the code using this constant (`uint32_t` or `uint64_t`?) ########## cpp/src/arrow/compute/row/compare_internal_avx2.cc: ########## @@ -251,6 +253,35 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2( } } +namespace { + +/// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we Review Comment: Can you use regular comments (`//`)? This isn't a docstring so shouldn't use the docstring-specific prefix (`///`) ########## cpp/src/arrow/compute/row/compare_test.cc: ########## @@ -164,5 +166,128 @@ TEST(KeyCompare, CompareColumnsToRowsTempStackUsage) { } } +// Compare columns to rows at offsets over 2GB within a row table. +// Certain AVX2 instructions may behave unexpectedly causing troubles like GH-41813. +TEST(KeyCompare, CompareColumnsToRowsLarge) { + if constexpr (sizeof(void*) == 4) { + GTEST_SKIP() << "Test only works on 64-bit platforms"; + } + + // The idea of this case is to create a row table using several fixed length columns and + // one var length column (so the row is hence var length and has offset buffer), with + // the overall data size exceeding 2GB. Then compare each row with itself. + constexpr int64_t two_gb = 2ll * 1024ll * 1024ll * 1024ll; + // The compare function requires the row id of the left column to be uint16_t, hence the + // number of rows. + constexpr int64_t num_rows = std::numeric_limits<uint16_t>::max() + 1; + const std::vector<std::shared_ptr<DataType>> fixed_length_types{uint64(), uint32()}; + // The var length column should be a little smaller than 2GB to WAR the capacity + // limitation in the var length builder. + constexpr int32_t var_length = two_gb / num_rows - 1; + auto row_size = std::accumulate(fixed_length_types.begin(), fixed_length_types.end(), + static_cast<int64_t>(var_length), + [](int64_t acc, const std::shared_ptr<DataType>& type) { + return acc + type->byte_width(); + }); + // The overall size should be larger than 2GB. + ASSERT_GT(row_size * num_rows, two_gb); + + MemoryPool* pool = default_memory_pool(); + TempVectorStack stack; + ASSERT_OK(stack.Init(pool, KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows))); + + std::vector<Datum> columns; + { + // Several fixed length arrays containing random content. + for (const auto& type : fixed_length_types) { + ASSERT_OK_AND_ASSIGN(auto column, ::arrow::gen::Random(type)->Generate(num_rows)); + columns.push_back(std::move(column)); + } + // A var length array containing 'X' repeated var_length times. + ASSERT_OK_AND_ASSIGN(auto column_var_length, + ::arrow::gen::Constant( + std::make_shared<BinaryScalar>(std::string(var_length, 'X'))) + ->Generate(num_rows)); + columns.push_back(std::move(column_var_length)); + } + ExecBatch batch(std::move(columns), num_rows); + + std::vector<KeyColumnMetadata> column_metadatas; + ASSERT_OK(ColumnMetadatasFromExecBatch(batch, &column_metadatas)); + std::vector<KeyColumnArray> column_arrays; + ASSERT_OK(ColumnArraysFromExecBatch(batch, &column_arrays)); + + // The row table (right side). + RowTableMetadata table_metadata_right; + table_metadata_right.FromColumnMetadataVector(column_metadatas, sizeof(uint64_t), + sizeof(uint64_t)); + RowTableImpl row_table; + ASSERT_OK(row_table.Init(pool, table_metadata_right)); + std::vector<uint16_t> row_ids_right(num_rows); + std::iota(row_ids_right.begin(), row_ids_right.end(), 0); + RowTableEncoder row_encoder; + row_encoder.Init(column_metadatas, sizeof(uint64_t), sizeof(uint64_t)); + row_encoder.PrepareEncodeSelected(0, num_rows, column_arrays); + ASSERT_OK(row_encoder.EncodeSelected(&row_table, static_cast<uint32_t>(num_rows), + row_ids_right.data())); + + ASSERT_TRUE(row_table.offsets()); Review Comment: I'm not sure what's that supposed to check (offsets being "true"?). Do we want to make the test a bit more self-documenting, or perhaps add a comment? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org