tadeja commented on code in PR #50248:
URL: https://github.com/apache/arrow/pull/50248#discussion_r3491727004
##########
cpp/src/arrow/compute/kernels/vector_sort.cc:
##########
@@ -160,47 +155,49 @@ class ChunkedArraySorter : public TypeVisitor {
// Reverse everything
sorted.resize(1);
- sorted[0] = chunk_sorted[0].TranslateTo(chunked_indices_begin,
indices_begin_);
+ sorted[0] = chunk_sorted[0].TranslateTo(chunked_indices.data(),
indices_.data());
RETURN_NOT_OK(chunked_mapper.PhysicalToLogical());
}
DCHECK_EQ(sorted.size(), 1);
- DCHECK_EQ(sorted[0].overall_begin(), indices_begin_);
- DCHECK_EQ(sorted[0].overall_end(), indices_end_);
- // Note that "nulls" can also include NaNs, hence the >= check
- DCHECK_GE(sorted[0].null_count(), null_count);
+ DCHECK_EQ(sorted[0].overall_begin(), indices_.data());
+ DCHECK_EQ(sorted[0].overall_end(), indices_.data() + indices_.size());
+ DCHECK_EQ(static_cast<int64_t>(sorted[0].non_null_like_range.size()),
+ non_null_like_count);
*output_ = sorted[0];
return Status::OK();
}
template <typename ArrayType>
- void MergeNonNulls(CompressedChunkLocation* range_begin,
- CompressedChunkLocation* range_middle,
- CompressedChunkLocation* range_end,
+ void MergeNonNulls(std::span<CompressedChunkLocation> left,
+ std::span<CompressedChunkLocation> right,
std::span<const Array* const> arrays,
- CompressedChunkLocation* temp_indices) {
+ std::span<CompressedChunkLocation> temp_indices) {
using ArrowType = typename ArrayType::TypeClass;
if (order_ == SortOrder::Ascending) {
- std::merge(range_begin, range_middle, range_middle, range_end,
temp_indices,
- [&](CompressedChunkLocation left, CompressedChunkLocation
right) {
- return ChunkValue<ArrowType>(arrays, left) <
- ChunkValue<ArrowType>(arrays, right);
- });
+ std::ranges::merge(
+ left, right, temp_indices.begin(),
+ [&](CompressedChunkLocation left, CompressedChunkLocation right) {
+ return ChunkValue<ArrowType>(arrays, left) <
+ ChunkValue<ArrowType>(arrays, right);
+ });
} else {
- std::merge(range_begin, range_middle, range_middle, range_end,
temp_indices,
- [&](CompressedChunkLocation left, CompressedChunkLocation
right) {
- // We don't use 'left > right' here to reduce required
- // operator. If we use 'right < left' here, '<' is only
- // required.
- return ChunkValue<ArrowType>(arrays, right) <
- ChunkValue<ArrowType>(arrays, left);
- });
+ std::ranges::merge(
+ left, right, temp_indices.begin(),
+ [&](CompressedChunkLocation left, CompressedChunkLocation right) {
+ // We don't use 'left > right' here to reduce required
+ // operator. If we use 'right < left' here, '<' is only
+ // required.
+ return ChunkValue<ArrowType>(arrays, right) <
+ ChunkValue<ArrowType>(arrays, left);
+ });
}
// Copy back temp area into main buffer
- std::copy(temp_indices, temp_indices + (range_end - range_begin),
range_begin);
+ std::ranges::copy(temp_indices.begin(),
+ temp_indices.begin() + left.size() + right.size(),
left.begin());
Review Comment:
@taepper I've checked only the failing job [Windows 2022
AVX2](https://github.com/apache/arrow/actions/runs/28353445441/job/83990956695?pr=50248#step:12:175)
where several tests failed with `0xc0000409` but that's probably a false
positive - https://github.com/microsoft/STL/issues/1435
```bash
C:\Program Files\Microsoft Visual
Studio\2022\Enterprise\VC\Tools\MSVC\14.44.35207\include\span(160) : Assertion
failed: cannot compare incompatible span iterators
...
Test #49: arrow-compute-vector-sort-test ...............Exit code
0xc0000409
***Exception: 0.19 sec
...
```
Perhaps replacing `left.begin()` with `left.data()` is a good approach here?
(or `range.data()` but if that's not in scope more changes in function
required)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]