wgtmac commented on code in PR #13901: URL: https://github.com/apache/arrow/pull/13901#discussion_r1749043634
########## cpp/src/parquet/properties.h: ########## @@ -941,6 +942,18 @@ class PARQUET_EXPORT ArrowReaderProperties { return coerce_int96_timestamp_unit_; } + /// Enable Parquet supported Arrow ExtensionTypes. + /// + /// When enabled, Parquet will use supported Arrow ExtensionTypes by mapping correctly + /// mapping them to Arrow types at read time. Currently only arrow::extension::json() + /// extension type is supported. Columns whose LogicalType is JSON will be interpreted + /// as arrow::extension::json() ExtensionType with storage type utf8, large_utf8 or Review Comment: Is it possible for users to pick one from `utf8, large_utf8 or utf8_view` on read? ########## cpp/src/parquet/arrow/schema_internal.cc: ########## @@ -107,17 +110,24 @@ Result<std::shared_ptr<ArrowType>> MakeArrowTimestamp(const LogicalType& logical } } -Result<std::shared_ptr<ArrowType>> FromByteArray(const LogicalType& logical_type) { +Result<std::shared_ptr<ArrowType>> FromByteArray( + const LogicalType& logical_type, const ArrowReaderProperties& reader_properties) { switch (logical_type.type()) { case LogicalType::Type::STRING: return ::arrow::utf8(); case LogicalType::Type::DECIMAL: return MakeArrowDecimal(logical_type); case LogicalType::Type::NONE: case LogicalType::Type::ENUM: - case LogicalType::Type::JSON: case LogicalType::Type::BSON: return ::arrow::binary(); + case LogicalType::Type::JSON: + if (reader_properties.get_arrow_extensions_enabled()) { Review Comment: Sorry that I'm late on the review. Is this property too board? Should we restrict it to json specifically? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org