Copilot commented on code in PR #12395:
URL: https://github.com/apache/gluten/pull/12395#discussion_r3492542786


##########
cpp/velox/benchmarks/DeltaBitmapBenchmark.cc:
##########
@@ -276,4 +280,78 @@ BENCHMARK_CAPTURE(
     ->Args({1 << 18, 64})
     ->Unit(benchmark::kMillisecond);
 
+// Benchmark for applyDeletionFilter: measures the hot path where a batch of
+// rows is checked against the deletion vector bitmap.
+// deletionPercent: fraction of rows in the total file that are deleted.
+// batchSize: number of rows per batch (typical Velox batch size).
+void BM_ApplyDeletionFilter(benchmark::State& state, double deletionPercent) {
+  const auto batchSize = static_cast<uint64_t>(state.range(0));
+  const uint64_t totalFileRows = 1000000; // 1M row file
+  const auto numDeleted =
+      static_cast<uint64_t>(totalFileRows * deletionPercent / 100.0);
+
+  // Build a DV with deletions spread across the file.
+  RoaringBitmapArray bitmap;
+  const uint64_t stride = numDeleted > 0 ? totalFileRows / numDeleted : 0;
+  for (uint64_t i = 0; i < numDeleted; ++i) {
+    bitmap.addSafe(i * stride);
+  }
+  const auto payload = bitmap.serializeToString(true);
+
+  // Load the DV reader.
+  DeltaDeletionVectorReader reader;
+  reader.loadSerializedDeletionVector(
+      std::string_view(payload.data(), payload.size()));
+
+  // Allocate the output bitmap buffer.
+  memory::MemoryManager::testingSetInstance(memory::MemoryManager::Options{});
+  auto pool = memory::memoryManager()->addLeafPool();
+  auto deleteBitmap = AlignedBuffer::allocate<uint64_t>(
+      bits::nwords(batchSize), pool.get());
+
+  // Simulate scanning through the file in batches.
+  const uint64_t numBatches = totalFileRows / batchSize;
+  uint64_t totalDeletedFound = 0;
+
+  for (auto _ : state) {
+    totalDeletedFound = 0;
+    for (uint64_t batch = 0; batch < numBatches; ++batch) {
+      reader.applyDeletionFilter(batch * batchSize, batchSize, deleteBitmap);
+      // Count bits set to prevent dead-code elimination.
+      auto* raw = deleteBitmap->as<uint64_t>();
+      for (uint64_t w = 0; w < bits::nwords(batchSize); ++w) {
+        totalDeletedFound += __builtin_popcountll(raw[w]);
+      }
+    }
+    benchmark::DoNotOptimize(totalDeletedFound);
+  }
+
+  state.SetItemsProcessed(state.iterations() * totalFileRows);
+  state.counters["batch_size"] = benchmark::Counter(batchSize);
+  state.counters["deletion_pct"] = benchmark::Counter(deletionPercent);
+  state.counters["deleted_found"] = benchmark::Counter(totalDeletedFound);
+  state.counters["total_batches"] = benchmark::Counter(numBatches);

Review Comment:
   BM_ApplyDeletionFilter currently computes `numBatches` via integer division 
and then iterates `batch < numBatches`, which skips the tail when 
`totalFileRows` is not divisible by `batchSize` (e.g. 1,000,000 / 4096 leaves 
576 rows). This also makes `state.SetItemsProcessed(state.iterations() * 
totalFileRows)` and the `total_batches` counter inconsistent with the actual 
work performed. Additionally, counting popcounts over `bits::nwords(batchSize)` 
reads padding bits that are not guaranteed to be zero when `batchSize` is not a 
multiple of 64 (e.g. 10,000), which can skew `deleted_found`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to