wjones127 commented on PR #13857:
URL: https://github.com/apache/arrow/pull/13857#issuecomment-1252891290
After some more testing it seems like concatenating buffers and indexing
into that always wins over using `ChunkResolver`, both in primitive and string
case.
From my quick test of Take on chunked string arrays:
* Concatenating: items_per_second=50.3014M/s
* ChunkResolver: items_per_second=6.9115M/s
Unless there is a more performant way, we might just only have Take kernels
specialized for ChunkedArrays for String / Binary / List (and also Struct since
it will then need to handle rechunking of child arrays.)
<details>
<summary>String ChunkedArray Take Benchmark Code</summary>
```cpp
void BenchStringTest() {
// Create chunked string array
int32_t string_min_length = 0, string_max_length = 32;
const int64_t n_chunks = 10;
const int64_t array_size = args.size / n_chunks;
ArrayVector chunks;
for (int64_t i = 0; i < n_chunks; ++i) {
auto chunk = std::static_pointer_cast<StringArray>(
rand.String(args.size, string_min_length, string_max_length, 0));
chunks.push_back(chunk);
}
auto values = ChunkedArray(chunks);
// Create indices
auto indices =
rand.Int32(values.length(), 0, static_cast<int32_t>(values.length()
- 1), 0);
for (auto _ : state) {
TypedBufferBuilder<int32_t> offset_builder;
TypedBufferBuilder<uint8_t> data_builder;
const int32_t* indices_values = indices->data()->GetValues<int32_t>(1);
if (concat_chunks) {
// Concat the chunks
ASSIGN_OR_ABORT(std::shared_ptr<Array> values_combined,
Concatenate(values.chunks()));
const uint8_t* values_data =
values_combined->data()->GetValues<uint8_t>(1);
const int32_t* values_offsets =
values_combined->data()->GetValues<int32_t>(2);
// for each value
for (int i = 0; i < indices->length(); ++i) {
int32_t index = indices_values[i];
// get the offset and size
int32_t offset = values_offsets[index];
int64_t length = values_offsets[index + 1] - offset;
// throw them on the builder
data_builder.UnsafeAppend(values_data + offset, length);
}
} else {
using arrow::internal::ChunkLocation;
using arrow::internal::ChunkResolver;
ChunkResolver resolver(values.chunks());
std::vector<const uint8_t*> values_data(values.num_chunks());
std::vector<const int32_t*> values_offsets(values.num_chunks());
for (int i = 0; i < values.num_chunks(); ++i) {
values_data[i] = values.chunks()[i]->data()->GetValues<uint8_t>(1);
values_offsets[i] =
values.chunks()[i]->data()->GetValues<int32_t>(2);
}
// for each index
for (int i = 0; i < indices->length(); ++i) {
// Resolve the location
ChunkLocation location = resolver.Resolve(indices_values[i]);
// Get the offset and size
int32_t offset =
values_offsets[location.chunk_index][location.index_in_chunk];
int32_t length =
values_offsets[location.chunk_index][location.index_in_chunk +
1] - offset;
// throw them on the builder
data_builder.UnsafeAppend(values_data[location.chunk_index] +
offset, length);
}
}
}
}
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]