pitrou commented on a change in pull request #8366:
URL: https://github.com/apache/arrow/pull/8366#discussion_r500862082



##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -689,10 +686,62 @@ Status GetOriginSchema(const std::shared_ptr<const 
KeyValueMetadata>& metadata,
 // but that is not necessarily present in the field reconstitued from Parquet 
data
 // (for example, Parquet timestamp types doesn't carry timezone information).
 
-Status ApplyOriginalStorageMetadata(const Field& origin_field, SchemaField* 
inferred) {
+Result<bool> ApplyOriginalMetadata(const Field& origin_field, SchemaField* 
inferred);
+
+std::function<std::shared_ptr<::arrow::DataType>(FieldVector)> 
GetNestedFactory(
+    const ArrowType& origin_type, const ArrowType& inferred_type) {
+  switch (inferred_type.id()) {
+    case ::arrow::Type::STRUCT:
+      if (origin_type.id() == ::arrow::Type::STRUCT) {
+        return ::arrow::struct_;
+      }
+      break;
+    case ::arrow::Type::LIST:
+      // TODO also allow LARGE_LIST and FIXED_SIZE_LIST
+      if (origin_type.id() == ::arrow::Type::LIST) {
+        return [](FieldVector fields) {
+          DCHECK_EQ(fields.size(), 1);
+          return ::arrow::list(std::move(fields[0]));
+        };
+      }
+      break;
+    default:
+      break;
+  }
+  return {};
+}
+
+Result<bool> ApplyOriginalStorageMetadata(const Field& origin_field,
+                                          SchemaField* inferred) {
+  bool modified = false;
+
   auto origin_type = origin_field.type();
   auto inferred_type = inferred->field->type();
 
+  const int num_children = inferred_type->num_fields();
+
+  if (num_children > 0 && origin_type->num_fields() == num_children) {
+    DCHECK_EQ(static_cast<int>(inferred->children.size()), num_children);
+    if (auto factory = GetNestedFactory(*origin_type, *inferred_type)) {

Review comment:
       What would you prefer? Avoid combining `if` and assignment?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to