This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch maint-12.0.0
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 2c81132edfa33638312d8692dd2f5a12c7c207b2
Author: Will Jones <[email protected]>
AuthorDate: Thu Apr 13 13:39:37 2023 -0700

    GH-35008: [C++] Add printers for REETestData and PageIndexReaderParam to 
placate Valgrind (#35011)
    
    ### Rationale for this change
    
    It seems this is necessary to make Valgrind happy with parameterized GTest 
cases. See: 
https://stackoverflow.com/questions/33747056/c-using-gtest-value-parameterized-tests-with-structs-causes-valgrind-errors
    
    ### What changes are included in this PR?
    
    <!--
    There is no need to duplicate the description in the issue here but it is 
sometimes worth providing a summary of the individual changes in this PR.
    -->
    
    ### Are these changes tested?
    
    * [x] Run the crossbow job for valgrind
    
    ### Are there any user-facing changes?
    
    
    * Closes: #35008
    
    Authored-by: Will Jones <[email protected]>
    Signed-off-by: Will Jones <[email protected]>
---
 .../arrow/compute/kernels/vector_run_end_encode_test.cc  | 13 +++++++++++++
 cpp/src/arrow/dataset/file_json_test.cc                  | 13 ++++++++++---
 cpp/src/arrow/dataset/test_util_internal.h               |  8 +++++++-
 cpp/src/arrow/util/utf8_util_test.cc                     |  4 ++++
 cpp/src/parquet/page_index.cc                            |  6 ++++++
 cpp/src/parquet/page_index.h                             |  3 +++
 cpp/src/parquet/reader_test.cc                           | 16 ++++++++++++++++
 7 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc 
b/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc
index d415f9d14c..f718d82774 100644
--- a/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc
@@ -131,6 +131,19 @@ struct REETestData {
   std::string description;
 };
 
+// For valgrind
+std::ostream& operator<<(std::ostream& out, const REETestData& test_data) {
+  out << "REETestData{description = " << test_data.description
+      << ", input = " << test_data.input->ToString() << ", expected_values = ";
+
+  for (const auto& expected_value : test_data.expected_values) {
+    out << expected_value->ToString() << ", ";
+  }
+
+  out << ", chunked = " << test_data.chunked << "}";
+  return out;
+}
+
 }  // namespace
 
 class TestRunEndEncodeDecode : public ::testing::TestWithParam<
diff --git a/cpp/src/arrow/dataset/file_json_test.cc 
b/cpp/src/arrow/dataset/file_json_test.cc
index b34b5cddf1..2b4fcdd82f 100644
--- a/cpp/src/arrow/dataset/file_json_test.cc
+++ b/cpp/src/arrow/dataset/file_json_test.cc
@@ -243,11 +243,18 @@ class JsonScanMixin {
   T* const this_ = static_cast<T*>(this);
 };
 
-class TestJsonFormat
-    : public FileFormatFixtureMixin<JsonFormatHelper, json::kMaxParserNumRows> 
{};
+// Use a reduced number of rows in valgrind to avoid timeouts.
+#ifndef ARROW_VALGRIND
+constexpr static int64_t kTestMaxNumRows = json::kMaxParserNumRows;
+#else
+constexpr static int64_t kTestMaxNumRows = 1024;
+#endif
+
+class TestJsonFormat : public FileFormatFixtureMixin<JsonFormatHelper, 
kTestMaxNumRows> {
+};
 
 class TestJsonFormatV2
-    : public FileFormatFixtureMixinV2<JsonFormatHelper, 
json::kMaxParserNumRows> {};
+    : public FileFormatFixtureMixinV2<JsonFormatHelper, kTestMaxNumRows> {};
 
 class TestJsonScan : public FileFormatScanMixin<JsonFormatHelper>,
                      public JsonScanMixin<TestJsonScan> {
diff --git a/cpp/src/arrow/dataset/test_util_internal.h 
b/cpp/src/arrow/dataset/test_util_internal.h
index 765e22c3fd..80dd4adbf9 100644
--- a/cpp/src/arrow/dataset/test_util_internal.h
+++ b/cpp/src/arrow/dataset/test_util_internal.h
@@ -432,8 +432,14 @@ struct TestFormatParams {
 
   static std::vector<TestFormatParams> Values() {
     std::vector<TestFormatParams> values;
+    // Use a reduced number of batches in valgrind to avoid timeouts.
+#ifndef ARROW_VALGRIND
+    int num_batches = 16;
+#else
+    int num_batches = 4;
+#endif
     for (const bool use_threads : std::vector<bool>{true, false}) {
-      values.push_back(TestFormatParams{use_threads, 16, 1024});
+      values.push_back(TestFormatParams{use_threads, num_batches, 1024});
     }
     return values;
   }
diff --git a/cpp/src/arrow/util/utf8_util_test.cc 
b/cpp/src/arrow/util/utf8_util_test.cc
index 2af5ac954b..3c8059d904 100644
--- a/cpp/src/arrow/util/utf8_util_test.cc
+++ b/cpp/src/arrow/util/utf8_util_test.cc
@@ -346,6 +346,9 @@ TEST(SkipUTF8BOM, Basics) {
   CheckTruncated("\xef\xbb");
 }
 
+// Currently a known issue with Valgrind and wide string AVX2 instructions
+// https://sourceware.org/bugzilla/show_bug.cgi?id=22954
+#ifndef ARROW_VALGRIND
 TEST(UTF8ToWideString, Basics) {
   auto CheckOk = [](const std::string& s, const std::wstring& expected) -> 
void {
     ASSERT_OK_AND_ASSIGN(std::wstring ws, UTF8ToWideString(s));
@@ -366,6 +369,7 @@ TEST(UTF8ToWideString, Basics) {
   CheckInvalid("\xff");
   CheckInvalid("h\xc3");
 }
+#endif
 
 TEST(WideStringToUTF8, Basics) {
   auto CheckOk = [](const std::wstring& ws, const std::string& expected) -> 
void {
diff --git a/cpp/src/parquet/page_index.cc b/cpp/src/parquet/page_index.cc
index c2a42547ee..d29cc33eb5 100644
--- a/cpp/src/parquet/page_index.cc
+++ b/cpp/src/parquet/page_index.cc
@@ -914,4 +914,10 @@ std::unique_ptr<PageIndexBuilder> 
PageIndexBuilder::Make(const SchemaDescriptor*
   return std::make_unique<PageIndexBuilderImpl>(schema);
 }
 
+std::ostream& operator<<(std::ostream& out, const PageIndexSelection& 
selection) {
+  out << "PageIndexSelection{column_index = " << selection.column_index
+      << ", offset_index = " << selection.offset_index << "}";
+  return out;
+}
+
 }  // namespace parquet
diff --git a/cpp/src/parquet/page_index.h b/cpp/src/parquet/page_index.h
index b6d27cfc41..b6ea5fd6ab 100644
--- a/cpp/src/parquet/page_index.h
+++ b/cpp/src/parquet/page_index.h
@@ -161,6 +161,9 @@ struct PageIndexSelection {
   bool offset_index = false;
 };
 
+PARQUET_EXPORT
+std::ostream& operator<<(std::ostream& out, const PageIndexSelection& params);
+
 struct RowGroupIndexReadRange {
   /// Base start and total size of column index of all column chunks in a row 
group.
   /// If none of the column chunks have column index, it is set to 
std::nullopt.
diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc
index d12e2f9bff..0a73002846 100644
--- a/cpp/src/parquet/reader_test.cc
+++ b/cpp/src/parquet/reader_test.cc
@@ -1291,6 +1291,22 @@ struct PageIndexReaderParam {
   PageIndexSelection index_selection;
 };
 
+// For valgrind
+std::ostream& operator<<(std::ostream& out, const PageIndexReaderParam& 
params) {
+  out << "PageIndexReaderParam{row_group_indices = ";
+  for (const auto& i : params.row_group_indices) {
+    out << i << ", ";
+  }
+  out << "column_indices = ";
+  for (const auto& i : params.column_indices) {
+    out << i << ", ";
+  }
+
+  out << "index_selection = " << params.index_selection << "}";
+
+  return out;
+}
+
 class ParameterizedPageIndexReaderTest
     : public ::testing::TestWithParam<PageIndexReaderParam> {};
 

Reply via email to