pitrou commented on code in PR #43389:
URL: https://github.com/apache/arrow/pull/43389#discussion_r1718430137


##########
cpp/src/arrow/compute/row/compare_test.cc:
##########
@@ -166,138 +171,306 @@ TEST(KeyCompare, CompareColumnsToRowsTempStackUsage) {
   }
 }
 
+namespace {
+
+Result<RowTableImpl> MakeRowTableFromExecBatch(const ExecBatch& batch) {
+  RowTableImpl row_table;
+
+  std::vector<KeyColumnMetadata> column_metadatas;
+  RETURN_NOT_OK(ColumnMetadatasFromExecBatch(batch, &column_metadatas));
+  RowTableMetadata table_metadata;
+  table_metadata.FromColumnMetadataVector(column_metadatas, sizeof(uint64_t),
+                                          sizeof(uint64_t));
+  RETURN_NOT_OK(row_table.Init(default_memory_pool(), table_metadata));
+  std::vector<uint16_t> row_ids(batch.length);
+  std::iota(row_ids.begin(), row_ids.end(), 0);
+  RowTableEncoder row_encoder;
+  row_encoder.Init(column_metadatas, sizeof(uint64_t), sizeof(uint64_t));
+  std::vector<KeyColumnArray> column_arrays;
+  RETURN_NOT_OK(ColumnArraysFromExecBatch(batch, &column_arrays));
+  row_encoder.PrepareEncodeSelected(0, batch.length, column_arrays);
+  RETURN_NOT_OK(row_encoder.EncodeSelected(
+      &row_table, static_cast<uint32_t>(batch.length), row_ids.data()));
+
+  return row_table;
+}
+
+Result<RowTableImpl> RepeatRowTableUntil(const RowTableImpl& seed, int64_t 
num_rows) {
+  RowTableImpl row_table;
+
+  RETURN_NOT_OK(row_table.Init(default_memory_pool(), seed.metadata()));
+  // Append the seed row table repeatedly to grow the row table to big enough.
+  while (row_table.length() < num_rows) {
+    RETURN_NOT_OK(row_table.AppendSelectionFrom(seed,
+                                                
static_cast<uint32_t>(seed.length()),
+                                                /*source_row_ids=*/NULLPTR));
+  }
+
+  return row_table;
+}
+
+void AssertCompareColumnsToRowsAllMatch(const std::vector<KeyColumnArray>& 
columns,
+                                        const RowTableImpl& row_table,
+                                        const std::vector<uint32_t>& 
row_ids_to_compare) {
+  uint32_t num_rows_to_compare = 
static_cast<uint32_t>(row_ids_to_compare.size());
+
+  TempVectorStack stack;
+  ASSERT_OK(
+      stack.Init(default_memory_pool(),
+                 
KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows_to_compare)));
+  LightContext ctx{CpuInfo::GetInstance()->hardware_flags(), &stack};
+
+  {
+    // No selection, output no match row ids.
+    uint32_t num_rows_no_match;
+    std::vector<uint16_t> row_ids_out(num_rows_to_compare);
+    KeyCompare::CompareColumnsToRows(num_rows_to_compare, 
/*sel_left_maybe_null=*/NULLPTR,
+                                     row_ids_to_compare.data(), &ctx, 
&num_rows_no_match,
+                                     row_ids_out.data(), columns, row_table,
+                                     /*are_cols_in_encoding_order=*/true,
+                                     
/*out_match_bitvector_maybe_null=*/NULLPTR);
+    ASSERT_EQ(num_rows_no_match, 0);
+  }
+
+  {
+    // No selection, output match bit vector.
+    std::vector<uint8_t> match_bitvector(BytesForBits(num_rows_to_compare));
+    KeyCompare::CompareColumnsToRows(
+        num_rows_to_compare, /*sel_left_maybe_null=*/NULLPTR, 
row_ids_to_compare.data(),
+        &ctx,
+        /*out_num_rows=*/NULLPTR, /*out_sel_left_maybe_same=*/NULLPTR, 
columns, row_table,
+        /*are_cols_in_encoding_order=*/true, match_bitvector.data());
+    ASSERT_EQ(CountSetBits(match_bitvector.data(), 0, num_rows_to_compare),
+              num_rows_to_compare);

Review Comment:
   Hmm, the docstring says that `CompareColumnsToRows` returns "a single 16-bit 
selection vector of rows that failed comparison". If rows are equal to columns, 
then surely all bits should be 0, not 1? Am I misunderstanding something?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to