This is an automated email from the ASF dual-hosted git repository. raulcd pushed a commit to branch maint-12.0.0 in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 2c81132edfa33638312d8692dd2f5a12c7c207b2 Author: Will Jones <[email protected]> AuthorDate: Thu Apr 13 13:39:37 2023 -0700 GH-35008: [C++] Add printers for REETestData and PageIndexReaderParam to placate Valgrind (#35011) ### Rationale for this change It seems this is necessary to make Valgrind happy with parameterized GTest cases. See: https://stackoverflow.com/questions/33747056/c-using-gtest-value-parameterized-tests-with-structs-causes-valgrind-errors ### What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ### Are these changes tested? * [x] Run the crossbow job for valgrind ### Are there any user-facing changes? * Closes: #35008 Authored-by: Will Jones <[email protected]> Signed-off-by: Will Jones <[email protected]> --- .../arrow/compute/kernels/vector_run_end_encode_test.cc | 13 +++++++++++++ cpp/src/arrow/dataset/file_json_test.cc | 13 ++++++++++--- cpp/src/arrow/dataset/test_util_internal.h | 8 +++++++- cpp/src/arrow/util/utf8_util_test.cc | 4 ++++ cpp/src/parquet/page_index.cc | 6 ++++++ cpp/src/parquet/page_index.h | 3 +++ cpp/src/parquet/reader_test.cc | 16 ++++++++++++++++ 7 files changed, 59 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc b/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc index d415f9d14c..f718d82774 100644 --- a/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc @@ -131,6 +131,19 @@ struct REETestData { std::string description; }; +// For valgrind +std::ostream& operator<<(std::ostream& out, const REETestData& test_data) { + out << "REETestData{description = " << test_data.description + << ", input = " << test_data.input->ToString() << ", expected_values = "; + + for (const auto& expected_value : test_data.expected_values) { + out << expected_value->ToString() << ", "; + } + + out << ", chunked = " << test_data.chunked << "}"; + return out; +} + } // namespace class TestRunEndEncodeDecode : public ::testing::TestWithParam< diff --git a/cpp/src/arrow/dataset/file_json_test.cc b/cpp/src/arrow/dataset/file_json_test.cc index b34b5cddf1..2b4fcdd82f 100644 --- a/cpp/src/arrow/dataset/file_json_test.cc +++ b/cpp/src/arrow/dataset/file_json_test.cc @@ -243,11 +243,18 @@ class JsonScanMixin { T* const this_ = static_cast<T*>(this); }; -class TestJsonFormat - : public FileFormatFixtureMixin<JsonFormatHelper, json::kMaxParserNumRows> {}; +// Use a reduced number of rows in valgrind to avoid timeouts. +#ifndef ARROW_VALGRIND +constexpr static int64_t kTestMaxNumRows = json::kMaxParserNumRows; +#else +constexpr static int64_t kTestMaxNumRows = 1024; +#endif + +class TestJsonFormat : public FileFormatFixtureMixin<JsonFormatHelper, kTestMaxNumRows> { +}; class TestJsonFormatV2 - : public FileFormatFixtureMixinV2<JsonFormatHelper, json::kMaxParserNumRows> {}; + : public FileFormatFixtureMixinV2<JsonFormatHelper, kTestMaxNumRows> {}; class TestJsonScan : public FileFormatScanMixin<JsonFormatHelper>, public JsonScanMixin<TestJsonScan> { diff --git a/cpp/src/arrow/dataset/test_util_internal.h b/cpp/src/arrow/dataset/test_util_internal.h index 765e22c3fd..80dd4adbf9 100644 --- a/cpp/src/arrow/dataset/test_util_internal.h +++ b/cpp/src/arrow/dataset/test_util_internal.h @@ -432,8 +432,14 @@ struct TestFormatParams { static std::vector<TestFormatParams> Values() { std::vector<TestFormatParams> values; + // Use a reduced number of batches in valgrind to avoid timeouts. +#ifndef ARROW_VALGRIND + int num_batches = 16; +#else + int num_batches = 4; +#endif for (const bool use_threads : std::vector<bool>{true, false}) { - values.push_back(TestFormatParams{use_threads, 16, 1024}); + values.push_back(TestFormatParams{use_threads, num_batches, 1024}); } return values; } diff --git a/cpp/src/arrow/util/utf8_util_test.cc b/cpp/src/arrow/util/utf8_util_test.cc index 2af5ac954b..3c8059d904 100644 --- a/cpp/src/arrow/util/utf8_util_test.cc +++ b/cpp/src/arrow/util/utf8_util_test.cc @@ -346,6 +346,9 @@ TEST(SkipUTF8BOM, Basics) { CheckTruncated("\xef\xbb"); } +// Currently a known issue with Valgrind and wide string AVX2 instructions +// https://sourceware.org/bugzilla/show_bug.cgi?id=22954 +#ifndef ARROW_VALGRIND TEST(UTF8ToWideString, Basics) { auto CheckOk = [](const std::string& s, const std::wstring& expected) -> void { ASSERT_OK_AND_ASSIGN(std::wstring ws, UTF8ToWideString(s)); @@ -366,6 +369,7 @@ TEST(UTF8ToWideString, Basics) { CheckInvalid("\xff"); CheckInvalid("h\xc3"); } +#endif TEST(WideStringToUTF8, Basics) { auto CheckOk = [](const std::wstring& ws, const std::string& expected) -> void { diff --git a/cpp/src/parquet/page_index.cc b/cpp/src/parquet/page_index.cc index c2a42547ee..d29cc33eb5 100644 --- a/cpp/src/parquet/page_index.cc +++ b/cpp/src/parquet/page_index.cc @@ -914,4 +914,10 @@ std::unique_ptr<PageIndexBuilder> PageIndexBuilder::Make(const SchemaDescriptor* return std::make_unique<PageIndexBuilderImpl>(schema); } +std::ostream& operator<<(std::ostream& out, const PageIndexSelection& selection) { + out << "PageIndexSelection{column_index = " << selection.column_index + << ", offset_index = " << selection.offset_index << "}"; + return out; +} + } // namespace parquet diff --git a/cpp/src/parquet/page_index.h b/cpp/src/parquet/page_index.h index b6d27cfc41..b6ea5fd6ab 100644 --- a/cpp/src/parquet/page_index.h +++ b/cpp/src/parquet/page_index.h @@ -161,6 +161,9 @@ struct PageIndexSelection { bool offset_index = false; }; +PARQUET_EXPORT +std::ostream& operator<<(std::ostream& out, const PageIndexSelection& params); + struct RowGroupIndexReadRange { /// Base start and total size of column index of all column chunks in a row group. /// If none of the column chunks have column index, it is set to std::nullopt. diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index d12e2f9bff..0a73002846 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -1291,6 +1291,22 @@ struct PageIndexReaderParam { PageIndexSelection index_selection; }; +// For valgrind +std::ostream& operator<<(std::ostream& out, const PageIndexReaderParam& params) { + out << "PageIndexReaderParam{row_group_indices = "; + for (const auto& i : params.row_group_indices) { + out << i << ", "; + } + out << "column_indices = "; + for (const auto& i : params.column_indices) { + out << i << ", "; + } + + out << "index_selection = " << params.index_selection << "}"; + + return out; +} + class ParameterizedPageIndexReaderTest : public ::testing::TestWithParam<PageIndexReaderParam> {};
