Copilot commented on code in PR #47427: URL: https://github.com/apache/arrow/pull/47427#discussion_r2300661443
########## cpp/src/parquet/properties.h: ########## @@ -1126,6 +1127,15 @@ class PARQUET_EXPORT ArrowReaderProperties { /// Return whether loading statistics as much as possible. bool should_load_statistics() const { return should_load_statistics_; } + /// \brief Set whether infer Decimal32/64 from parquet. + /// + /// Default is false. Review Comment: There's an extra space in the comment at line 132. Remove the trailing space after the `///`. ```suggestion /// ///Default is false. ``` ########## cpp/src/parquet/column_writer.cc: ########## @@ -2115,33 +2115,28 @@ struct SerializeFunctor< ArrowWriteContext* ctx, value_type* out) { if (array.null_count() == 0) { for (int64_t i = 0; i < array.length(); i++) { - out[i] = TransferValue<ArrowType::kByteWidth>(array.Value(i)); + out[i] = TransferValue(array.Value(i)); } } else { for (int64_t i = 0; i < array.length(); i++) { - out[i] = - array.IsValid(i) ? TransferValue<ArrowType::kByteWidth>(array.Value(i)) : 0; + out[i] = array.IsValid(i) ? TransferValue(array.Value(i)) : 0; } } return Status::OK(); } - template <int byte_width> + private: value_type TransferValue(const uint8_t* in) const { - static_assert(byte_width == 16 || byte_width == 32, - "only 16 and 32 byte Decimals supported"); - value_type value = 0; - if constexpr (byte_width == 16) { - ::arrow::Decimal128 decimal_value(in); - PARQUET_THROW_NOT_OK(decimal_value.ToInteger(&value)); + using DecimalValue = typename ::arrow::TypeTraits<ArrowType>::CType; + DecimalValue decimal_value(in); + if constexpr (std::is_same_v<ArrowType, ::arrow::Decimal256Type>) { + return static_cast<value_type>(decimal_value.low_bits()); } else { - ::arrow::Decimal256 decimal_value(in); - // Decimal256 does not provide ToInteger, but we are sure it fits in the target - // integer type. - value = static_cast<value_type>(decimal_value.low_bits()); + value_type value = 0; + PARQUET_THROW_NOT_OK(decimal_value.ToInteger(&value)); + return value; Review Comment: The logic should handle all four decimal types (Decimal32, Decimal64, Decimal128, Decimal256) explicitly. Currently, only Decimal256 is handled specifically while others fall through to the else branch. Consider adding explicit handling for Decimal32 and Decimal64 to make the intent clearer and more maintainable. ########## cpp/src/parquet/arrow/test_util.h: ########## @@ -47,25 +47,27 @@ using ::arrow::Array; using ::arrow::ChunkedArray; using ::arrow::Status; -template <int32_t PRECISION> -struct Decimal128WithPrecisionAndScale { - static_assert(PRECISION >= 1 && PRECISION <= 38, "Invalid precision value"); - - using type = ::arrow::Decimal128Type; - static constexpr ::arrow::Type::type type_id = ::arrow::Decimal128Type::type_id; +template <typename T, int32_t PRECISION, typename = ::arrow::enable_if_decimal<T>> +struct DecimalWithPrecisionAndScale { + using type = T; + static_assert(PRECISION >= T::kMinPrecision && PRECISION <= T::kMaxPrecision, + "Invalid precision value"); + static constexpr ::arrow::Type::type type_id = T::type_id; static constexpr int32_t precision = PRECISION; static constexpr int32_t scale = PRECISION - 1; }; - template <int32_t PRECISION> -struct Decimal256WithPrecisionAndScale { - static_assert(PRECISION >= 1 && PRECISION <= 76, "Invalid precision value"); - - using type = ::arrow::Decimal256Type; - static constexpr ::arrow::Type::type type_id = ::arrow::Decimal256Type::type_id; - static constexpr int32_t precision = PRECISION; - static constexpr int32_t scale = PRECISION - 1; -}; +using Decimal32WithPrecisionAndScale = + DecimalWithPrecisionAndScale<::arrow::Decimal32Type, PRECISION>; Review Comment: The template parameter `PRECISION` is not defined in this scope. This line should be wrapped in a template declaration like the other decimal type aliases below it. ########## cpp/src/parquet/column_writer.cc: ########## @@ -2491,21 +2490,21 @@ struct SerializeFunctor< if (array.null_count() == 0) { for (int64_t i = 0; i < array.length(); i++) { - out[i] = FixDecimalEndianness<ArrowType::kByteWidth>(array.GetValue(i), offset); + out[i] = FixDecimalEndianness(array.GetValue(i), offset); } } else { for (int64_t i = 0; i < array.length(); i++) { - out[i] = array.IsValid(i) ? FixDecimalEndianness<ArrowType::kByteWidth>( - array.GetValue(i), offset) + out[i] = array.IsValid(i) ? FixDecimalEndianness(array.GetValue(i), offset) : FixedLenByteArray(); } } return Status::OK(); } + private: // Parquet's Decimal are stored with FixedLength values where the length is - // proportional to the precision. Arrow's Decimal are always stored with 16/32 + // proportional to the precision. Arrow's Decimal are always stored with 4/8/16/32 // bytes. Thus the internal FLBA pointer must be adjusted by the offset calculated // here. Review Comment: The comment should clarify that the 4/8/16/32 bytes correspond to Decimal32/64/128/256 respectively, as this mapping is not immediately obvious from the comment alone. ```suggestion // bytes, corresponding to Decimal32/64/128/256 respectively. Thus the internal FLBA // pointer must be adjusted by the offset calculated here. ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org