pitrou commented on code in PR #47294:
URL: https://github.com/apache/arrow/pull/47294#discussion_r2355146603


##########
cpp/src/arrow/util/rle_encoding_test.cc:
##########
@@ -602,20 +1038,126 @@ struct GetBatchSpacedTestCase {
   int bit_width;
 };
 
-TEST(RleDecoder, GetBatchSpaced) {
-  uint32_t kSeed = 1337;
-  ::arrow::random::RandomArrayGenerator rand(kSeed);
-
-  std::vector<GetBatchSpacedTestCase<int32_t>> int32_cases{
-      {1, 100000, 0.01, 1}, {1, 100000, 0.1, 1},    {1, 100000, 0.5, 1},
-      {4, 100000, 0.05, 3}, {100, 100000, 0.05, 7},
+template <typename T>
+void DoTestGetBatchSpacedRoundtrip() {
+  using Data = DataTestRleBitPacked<T>;
+  using ArrowType = typename Data::ArrowType;
+  using RandomPart = typename Data::RandomPart;
+  using NullPart = typename Data::NullPart;
+  using RepeatPart = typename Data::RepeatPart;
+
+  std::vector<Data> test_cases = {
+      {
+          {RandomPart{/* max=*/1, /* size=*/400, /* null_proba= */ 0.1}},
+          /* bit_width= */ 1,
+      },
+      {
+          {
+              RandomPart{/* max=*/7, /* size=*/10037, /* null_proba= */ 0.0},
+              NullPart{/* size= */ 1153},
+              RandomPart{/* max=*/7, /* size=*/800, /* null_proba= */ 0.5},
+          },
+          /* bit_width= */ 3,
+      },
+      {
+          {
+              NullPart{/* size= */ 80},
+              RandomPart{/* max=*/static_cast<T>(1023), /* size=*/800,
+                         /* null_proba= */ 0.01},
+              NullPart{/* size= */ 1023},
+          },
+          /* bit_width= */ 11,
+      },
+      {
+          {RepeatPart{/* value=*/13, /* size=*/100000, /* null_proba= */ 
0.01}},
+          /* bit_width= */ 10,
+      },
+      {
+          {
+              NullPart{/* size= */ 1024},
+              RepeatPart{/* value=*/static_cast<T>(10000), /* size=*/100000,
+                         /* null_proba= */ 0.1},
+              NullPart{/* size= */ 77},
+          },
+          /* bit_width= */ 23,
+      },
+      {
+          {
+              RepeatPart{/* value=*/13, /* size=*/100000, /* null_proba= */ 
0.0},
+              NullPart{/* size= */ 1153},
+              RepeatPart{/* value=*/72, /* size=*/100799, /* null_proba= */ 
0.5},
+          },
+          /* bit_width= */ 10,
+      },
+      {
+          {
+              RandomPart{/* max=*/1, /* size=*/1013, /* null_proba= */ 0.01},
+              NullPart{/* size=*/8},
+              RepeatPart{1, /* size= */ 256, /* null_proba= */ 0.1},
+              NullPart{/* size=*/128},
+              RepeatPart{0, /* size= */ 256, /* null_proba= */ 0.0},
+              NullPart{/* size=*/15},
+              RandomPart{/* max=*/1, /* size=*/8 * 1024, /* null_proba= */ 
0.01},
+          },
+          /* bit_width= */ 1,
+      },
   };
-  for (auto case_ : int32_cases) {
-    auto arr = rand.Int32(case_.size, /*min=*/0, case_.max_value, 
case_.null_probability);
-    CheckRoundTripSpaced<Int32Type>(*arr, case_.bit_width);
-    CheckRoundTripSpaced<Int32Type>(*arr->Slice(1), case_.bit_width);
+
+  ::arrow::random::RandomArrayGenerator rand(/* seed= */ 12);
+  // FRAGILE: we create a dictionary large enough so that any encoded value 
from the
+  // previous test cases can be used as an index in the dictionary.
+  // Its size must be increased accordingly if larger values are encoded in 
the test
+  // cases.
+  auto dict = std::static_pointer_cast<arrow::FloatArray>(rand.Float32(20000, 
-1.0, 1.0));
+
+  // Number of bits available in T to write a positive integer.
+  constexpr int kBitsAvailable = 8 * sizeof(T) - (std::is_signed_v<T> ? 1 : 0);

Review Comment:
   I mean that we always seem to test with the largest possible bit width, am I 
mistaken?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to