wgtmac commented on code in PR #41765: URL: https://github.com/apache/arrow/pull/41765#discussion_r2014293068
########## cpp/src/parquet/schema_test.cc: ########## @@ -1688,6 +1688,24 @@ TEST(TestSchemaNodeCreation, FactoryEquivalence) { ConfirmGroupNodeFactoryEquivalence("list", LogicalType::List(), ConvertedType::LIST); } +TEST(TestSchemaNodeCreation, FactoryUnknownLogicalType) { Review Comment: ```suggestion TEST(TestSchemaNodeCreation, FactoryUndefinedLogicalType) { ``` ########## cpp/src/parquet/properties.h: ########## @@ -1003,9 +1004,21 @@ class PARQUET_EXPORT ArrowReaderProperties { void set_should_load_statistics(bool should_load_statistics) { should_load_statistics_ = should_load_statistics; } + /// Return whether loading statistics as much as possible. bool should_load_statistics() const { return should_load_statistics_; } + /// Convert unknown logical types as their underlying physical type + /// + /// When enabled, the Arrow reader will use the underlying physical type + /// of a logical type that it does not recognize (e.g., one that was added + /// to the spec but not implemented in Parquet C++). + void set_convert_unknown_logical_types(bool convert_unknown_logical_types) { Review Comment: As suggested by @mapleFU, should we rename it to `set_convert_undefined_logical_types`? `Unknown` means null logical type in parquet. ########## cpp/src/parquet/arrow/schema_internal.cc: ########## @@ -191,10 +191,18 @@ Result<std::shared_ptr<ArrowType>> FromInt64(const LogicalType& logical_type) { Result<std::shared_ptr<ArrowType>> GetArrowType( Type::type physical_type, const LogicalType& logical_type, int type_length, const ArrowReaderProperties& reader_properties) { - if (logical_type.is_invalid() || logical_type.is_null()) { + if (logical_type.is_null()) { return ::arrow::null(); } + if (logical_type.is_invalid() && reader_properties.convert_unknown_logical_types()) { + return GetArrowType(physical_type, *NoLogicalType::Make(), type_length, + reader_properties); + } else if (logical_type.is_invalid()) { + return Status::NotImplemented( Review Comment: Ah, you're right. Before this change, it throws `ParquetException("Metadata contains Thrift LogicalType that is not recognized")`; -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org