pitrou commented on code in PR #45217:
URL: https://github.com/apache/arrow/pull/45217#discussion_r1913145931
##########
cpp/src/arrow/compute/kernels/vector_rank.cc:
##########
@@ -28,114 +28,97 @@ namespace {
// ----------------------------------------------------------------------
// Rank implementation
-template <typename ValueSelector,
- typename T = std::decay_t<std::invoke_result_t<ValueSelector,
int64_t>>>
+// A bit that is set in the sort indices when the value at the current sort
index
+// is the same as the value at the previous sort index.
+constexpr uint64_t kDuplicateMask = 1ULL << 63;
+
+constexpr bool NeedsDuplicates(RankOptions::Tiebreaker tiebreaker) {
+ return tiebreaker != RankOptions::First;
+}
+
+template <typename ValueSelector>
+void MarkDuplicates(const NullPartitionResult& sorted, ValueSelector&&
value_selector) {
+ using T = decltype(value_selector(int64_t{}));
+
+ // Process non-nulls
+ if (sorted.non_nulls_end != sorted.non_nulls_begin) {
+ auto it = sorted.non_nulls_begin;
+ T prev_value = value_selector(*it);
+ while (++it < sorted.non_nulls_end) {
+ T curr_value = value_selector(*it);
+ if (curr_value == prev_value) {
+ *it |= kDuplicateMask;
Review Comment:
The "minor change" was to make `MarkDuplicates` generic on the index type.
But then I realize it couldn't, since `*it |= kDuplicateMask` requires the
index to be a 64-bit-int. So I reverted to the specific version.
##########
cpp/src/arrow/compute/kernels/vector_rank.cc:
##########
@@ -28,114 +28,97 @@ namespace {
// ----------------------------------------------------------------------
// Rank implementation
-template <typename ValueSelector,
- typename T = std::decay_t<std::invoke_result_t<ValueSelector,
int64_t>>>
+// A bit that is set in the sort indices when the value at the current sort
index
+// is the same as the value at the previous sort index.
+constexpr uint64_t kDuplicateMask = 1ULL << 63;
+
+constexpr bool NeedsDuplicates(RankOptions::Tiebreaker tiebreaker) {
+ return tiebreaker != RankOptions::First;
+}
+
+template <typename ValueSelector>
+void MarkDuplicates(const NullPartitionResult& sorted, ValueSelector&&
value_selector) {
+ using T = decltype(value_selector(int64_t{}));
+
+ // Process non-nulls
+ if (sorted.non_nulls_end != sorted.non_nulls_begin) {
+ auto it = sorted.non_nulls_begin;
+ T prev_value = value_selector(*it);
+ while (++it < sorted.non_nulls_end) {
+ T curr_value = value_selector(*it);
+ if (curr_value == prev_value) {
+ *it |= kDuplicateMask;
Review Comment:
The "minor change" was to make `MarkDuplicates` generic on the index type.
But then I realized it couldn't, since `*it |= kDuplicateMask` requires the
index to be a 64-bit-int. So I reverted to the specific version.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]