wgtmac commented on code in PR #45351:
URL: https://github.com/apache/arrow/pull/45351#discussion_r2015929488


##########
cpp/src/parquet/arrow/schema_internal.h:
##########
@@ -29,12 +29,16 @@ namespace parquet::arrow {
 
 using ::arrow::Result;
 
-Result<std::shared_ptr<::arrow::DataType>> FromByteArray(const LogicalType& 
logical_type,
-                                                         bool 
use_known_arrow_extensions);
-Result<std::shared_ptr<::arrow::DataType>> FromFLBA(const LogicalType& 
logical_type,
-                                                    int32_t physical_length);
-Result<std::shared_ptr<::arrow::DataType>> FromInt32(const LogicalType& 
logical_type);
-Result<std::shared_ptr<::arrow::DataType>> FromInt64(const LogicalType& 
logical_type);
+Result<std::shared_ptr<::arrow::DataType>> FromByteArray(
+    const LogicalType& logical_type, bool arrow_extensions_enabled = false,
+    bool smallest_decimal_enabled = false);

Review Comment:
   Can we avoid these default parameters?



##########
cpp/src/parquet/arrow/schema_internal.cc:
##########
@@ -110,19 +115,20 @@ Result<std::shared_ptr<ArrowType>> 
MakeArrowTimestamp(const LogicalType& logical
   }
 }
 
-Result<std::shared_ptr<ArrowType>> FromByteArray(
-    const LogicalType& logical_type, const ArrowReaderProperties& 
reader_properties) {
+Result<std::shared_ptr<ArrowType>> FromByteArray(const LogicalType& 
logical_type,

Review Comment:
   I'm just wondering why not simply passing `const ArrowReaderProperties& 
reader_properties` to all these functions? If we will add a third parameter 
which also comes from the properties, then we should directly use it.



##########
cpp/src/parquet/arrow/reader.h:
##########
@@ -303,6 +303,9 @@ class PARQUET_EXPORT FileReader {
   /// Set number of records to read per batch for the RecordBatchReader.
   virtual void set_batch_size(int64_t batch_size) = 0;
 
+  /// Set whether to enable smallest decimal arrow type
+  virtual void set_smallest_decimal_enabled(bool smallest_decimal_enabled) = 0;

Review Comment:
   This looks weird. We cannot change it after the reader has been created. 
Isn't it accessible via the ArrowReaderProperties?



##########
cpp/src/parquet/arrow/test_util.h:
##########
@@ -47,24 +47,58 @@ using ::arrow::Array;
 using ::arrow::ChunkedArray;
 using ::arrow::Status;
 
-template <int32_t PRECISION>
-struct Decimal128WithPrecisionAndScale {
-  static_assert(PRECISION >= 1 && PRECISION <= 38, "Invalid precision value");
+struct BaseDecimalWithPrecisionAndScale {};
+
+template <int32_t PRECISION, bool SMALLEST_DECIMAL_ENABLED = false>
+struct Decimal32WithPrecisionAndScale : BaseDecimalWithPrecisionAndScale {
+  static_assert(PRECISION >= ::arrow::Decimal32Type::kMinPrecision &&
+                    PRECISION <= ::arrow::Decimal32Type::kMaxPrecision,
+                "Invalid precision value");
+
+  using type = ::arrow::Decimal32Type;
+  static constexpr ::arrow::Type::type type_id = 
::arrow::Decimal32Type::type_id;
+  static constexpr int32_t precision = PRECISION;
+  static constexpr int32_t scale = PRECISION - 1;
+  static constexpr bool smallest_decimal_enabled = SMALLEST_DECIMAL_ENABLED;

Review Comment:
   Why do we need to add `smallest_decimal_enabled` to it?



##########
cpp/src/parquet/arrow/reader.h:
##########
@@ -403,7 +406,8 @@ ::arrow::Status 
OpenFile(std::shared_ptr<::arrow::io::RandomAccessFile>,
 /// Advanced settings are supported through the FileReaderBuilder class.
 PARQUET_EXPORT
 ::arrow::Result<std::unique_ptr<FileReader>> OpenFile(
-    std::shared_ptr<::arrow::io::RandomAccessFile>, ::arrow::MemoryPool* 
allocator);
+    std::shared_ptr<::arrow::io::RandomAccessFile>, ::arrow::MemoryPool* pool,

Review Comment:
   Please also revert this change.



##########
cpp/src/parquet/column_writer.cc:
##########
@@ -2383,29 +2391,45 @@ struct SerializeFunctor<
     int64_t non_null_count = array.length() - array.null_count();
     int64_t size = non_null_count * ArrowType::kByteWidth;
     scratch_buffer = AllocateBuffer(ctx->memory_pool, size);
-    scratch = reinterpret_cast<int64_t*>(scratch_buffer->mutable_data());
+    scratch_i32 = reinterpret_cast<int32_t*>(scratch_buffer->mutable_data());
+    scratch_i64 = reinterpret_cast<int64_t*>(scratch_buffer->mutable_data());

Review Comment:
   Perhaps we can remove `scratch_i32` and `scratch_i64` and delay the casting 
until we use the scratch buffer?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to