zanmato1984 commented on PR #48180:
URL: https://github.com/apache/arrow/pull/48180#issuecomment-3611660521
> Does this work?
>
> ```diff
> diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc
> index b90b3a6405..163a80d9d4 100644
> --- a/cpp/src/arrow/compute/util.cc
> +++ b/cpp/src/arrow/compute/util.cc
> @@ -30,33 +30,41 @@ namespace util {
> namespace bit_util {
>
> inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) {
> - // This will not be correct on big-endian architectures.
> -#if !ARROW_LITTLE_ENDIAN
> - ARROW_DCHECK(false);
> -#endif
> ARROW_DCHECK(num_bytes >= 0 && num_bytes <= 8);
> if (num_bytes == 8) {
> - return util::SafeLoad(reinterpret_cast<const uint64_t*>(bytes));
> + auto word = util::SafeLoad(reinterpret_cast<const uint64_t*>(bytes));
> +#if !ARROW_LITTLE_ENDIAN
> + word = bit_util::ByteSwap(word);
> +#endif
> + return word;
> } else {
> uint64_t word = 0;
> for (int i = 0; i < num_bytes; ++i) {
> +#if ARROW_LITTLE_ENDIAN
> word |= static_cast<uint64_t>(bytes[i]) << (8 * i);
> +#else
> + word |= static_cast<uint64_t>(bytes[num_bytes - 1 - i]) << (8 * i);
> +#endif
> }
> return word;
> }
> }
>
> inline void SafeStoreUpTo8Bytes(uint8_t* bytes, int num_bytes, uint64_t
value) {
> - // This will not be correct on big-endian architectures.
> -#if !ARROW_LITTLE_ENDIAN
> - ARROW_DCHECK(false);
> -#endif
> ARROW_DCHECK(num_bytes >= 0 && num_bytes <= 8);
> if (num_bytes == 8) {
> +#if ARROW_LITTLE_ENDIAN
> util::SafeStore(reinterpret_cast<uint64_t*>(bytes), value);
> +#else
> + util::SafeStore(reinterpret_cast<uint64_t*>(bytes),
bit_util::ByteSwap(value));
> +#endif
> } else {
> for (int i = 0; i < num_bytes; ++i) {
> +#if ARROW_LITTLE_ENDIAN
> bytes[i] = static_cast<uint8_t>(value >> (8 * i));
> +#else
> + bytes[i] = static_cast<uint8_t>(value >> (8 * (num_bytes - 1 - i)));
> +#endif
> }
> }
> }
> ```
Hi @kou, I have a question: why do we need to swap the bytes for big-endian
when `num_bytes == 8`? The underlying `util::SafeLoad/Store` are just `memcpy`
so the byte orders between `value` and the `bytes` should be the same right?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]