write parquet [arrow]

via GitHub Tue, 26 Aug 2025 04:22:12 -0700


Copilot commented on code in PR #47427:
URL: https://github.com/apache/arrow/pull/47427#discussion_r2300661443



##########
cpp/src/parquet/properties.h:
##########
@@ -1126,6 +1127,15 @@ class PARQUET_EXPORT ArrowReaderProperties {
   /// Return whether loading statistics as much as possible.
   bool should_load_statistics() const { return should_load_statistics_; }
 
+  /// \brief Set whether infer Decimal32/64 from parquet.
+  /// 
+  /// Default is false.

Review Comment:
   There's an extra space in the comment at line 132. Remove the trailing space 
after the `///`.
   ```suggestion
     ///
     ///Default is false.
   ```



##########
cpp/src/parquet/column_writer.cc:
##########
@@ -2115,33 +2115,28 @@ struct SerializeFunctor<
                    ArrowWriteContext* ctx, value_type* out) {
     if (array.null_count() == 0) {
       for (int64_t i = 0; i < array.length(); i++) {
-        out[i] = TransferValue<ArrowType::kByteWidth>(array.Value(i));
+        out[i] = TransferValue(array.Value(i));
       }
     } else {
       for (int64_t i = 0; i < array.length(); i++) {
-        out[i] =
-            array.IsValid(i) ? 
TransferValue<ArrowType::kByteWidth>(array.Value(i)) : 0;
+        out[i] = array.IsValid(i) ? TransferValue(array.Value(i)) : 0;
       }
     }
 
     return Status::OK();
   }
 
-  template <int byte_width>
+ private:
   value_type TransferValue(const uint8_t* in) const {
-    static_assert(byte_width == 16 || byte_width == 32,
-                  "only 16 and 32 byte Decimals supported");
-    value_type value = 0;
-    if constexpr (byte_width == 16) {
-      ::arrow::Decimal128 decimal_value(in);
-      PARQUET_THROW_NOT_OK(decimal_value.ToInteger(&value));
+    using DecimalValue = typename ::arrow::TypeTraits<ArrowType>::CType;
+    DecimalValue decimal_value(in);
+    if constexpr (std::is_same_v<ArrowType, ::arrow::Decimal256Type>) {
+      return static_cast<value_type>(decimal_value.low_bits());
     } else {
-      ::arrow::Decimal256 decimal_value(in);
-      // Decimal256 does not provide ToInteger, but we are sure it fits in the 
target
-      // integer type.
-      value = static_cast<value_type>(decimal_value.low_bits());
+      value_type value = 0;
+      PARQUET_THROW_NOT_OK(decimal_value.ToInteger(&value));
+      return value;

Review Comment:
   The logic should handle all four decimal types (Decimal32, Decimal64, 
Decimal128, Decimal256) explicitly. Currently, only Decimal256 is handled 
specifically while others fall through to the else branch. Consider adding 
explicit handling for Decimal32 and Decimal64 to make the intent clearer and 
more maintainable.



##########
cpp/src/parquet/arrow/test_util.h:
##########
@@ -47,25 +47,27 @@ using ::arrow::Array;
 using ::arrow::ChunkedArray;
 using ::arrow::Status;
 
-template <int32_t PRECISION>
-struct Decimal128WithPrecisionAndScale {
-  static_assert(PRECISION >= 1 && PRECISION <= 38, "Invalid precision value");
-
-  using type = ::arrow::Decimal128Type;
-  static constexpr ::arrow::Type::type type_id = 
::arrow::Decimal128Type::type_id;
+template <typename T, int32_t PRECISION, typename = 
::arrow::enable_if_decimal<T>>
+struct DecimalWithPrecisionAndScale {
+  using type = T;
+  static_assert(PRECISION >= T::kMinPrecision && PRECISION <= T::kMaxPrecision,
+                "Invalid precision value");
+  static constexpr ::arrow::Type::type type_id = T::type_id;
   static constexpr int32_t precision = PRECISION;
   static constexpr int32_t scale = PRECISION - 1;
 };
-
 template <int32_t PRECISION>
-struct Decimal256WithPrecisionAndScale {
-  static_assert(PRECISION >= 1 && PRECISION <= 76, "Invalid precision value");
-
-  using type = ::arrow::Decimal256Type;
-  static constexpr ::arrow::Type::type type_id = 
::arrow::Decimal256Type::type_id;
-  static constexpr int32_t precision = PRECISION;
-  static constexpr int32_t scale = PRECISION - 1;
-};
+using Decimal32WithPrecisionAndScale =
+    DecimalWithPrecisionAndScale<::arrow::Decimal32Type, PRECISION>;

Review Comment:
   The template parameter `PRECISION` is not defined in this scope. This line 
should be wrapped in a template declaration like the other decimal type aliases 
below it.



##########
cpp/src/parquet/column_writer.cc:
##########
@@ -2491,21 +2490,21 @@ struct SerializeFunctor<
 
     if (array.null_count() == 0) {
       for (int64_t i = 0; i < array.length(); i++) {
-        out[i] = 
FixDecimalEndianness<ArrowType::kByteWidth>(array.GetValue(i), offset);
+        out[i] = FixDecimalEndianness(array.GetValue(i), offset);
       }
     } else {
       for (int64_t i = 0; i < array.length(); i++) {
-        out[i] = array.IsValid(i) ? 
FixDecimalEndianness<ArrowType::kByteWidth>(
-                                        array.GetValue(i), offset)
+        out[i] = array.IsValid(i) ? FixDecimalEndianness(array.GetValue(i), 
offset)
                                   : FixedLenByteArray();
       }
     }
 
     return Status::OK();
   }
 
+ private:
   // Parquet's Decimal are stored with FixedLength values where the length is
-  // proportional to the precision. Arrow's Decimal are always stored with 
16/32
+  // proportional to the precision. Arrow's Decimal are always stored with 
4/8/16/32
   // bytes. Thus the internal FLBA pointer must be adjusted by the offset 
calculated
   // here.

Review Comment:
   The comment should clarify that the 4/8/16/32 bytes correspond to 
Decimal32/64/128/256 respectively, as this mapping is not immediately obvious 
from the comment alone.
   ```suggestion
     // bytes, corresponding to Decimal32/64/128/256 respectively. Thus the 
internal FLBA
     // pointer must be adjusted by the offset calculated here.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Re: [PR] GH-44345: [C++] arrow Decimal32/64 read/write parquet [arrow]

Reply via email to