wgtmac commented on code in PR #49125:
URL: https://github.com/apache/arrow/pull/49125#discussion_r2783197621
##########
cpp/src/parquet/arrow/schema.cc:
##########
@@ -620,7 +621,8 @@ Status MapToSchemaField(const GroupNode& group, LevelInfo
current_levels,
if (group.field_count() != 1) {
return Status::Invalid("MAP-annotated groups must have a single child.");
}
- if (group.is_repeated()) {
+ if (group.is_repeated() &&
Review Comment:
Let's add a comment like `MAP-annotated groups cannot be repeated unless in
a legacy two-level LIST-annotated group. In this case, the MAP-annotated group
is implicitly to be required.`
##########
cpp/src/parquet/arrow/schema.cc:
##########
@@ -694,17 +698,76 @@ Status MapToSchemaField(const GroupNode& group, LevelInfo
current_levels,
return Status::OK();
}
+Status ResolveList(const GroupNode& group, const Node& list_node,
+ LevelInfo current_levels, SchemaTreeContext* ctx,
+ const SchemaField* out, SchemaField* child_field) {
Review Comment:
It looks strange that `out` is const.
##########
cpp/src/parquet/arrow/schema.cc:
##########
@@ -694,17 +698,76 @@ Status MapToSchemaField(const GroupNode& group, LevelInfo
current_levels,
return Status::OK();
}
+Status ResolveList(const GroupNode& group, const Node& list_node,
+ LevelInfo current_levels, SchemaTreeContext* ctx,
+ const SchemaField* out, SchemaField* child_field) {
+ auto check_two_level_list_repeated = [](const GroupNode& group) -> Status {
+ // When it is repeated, the LIST-annotated 2-level structure can only
serve as an
+ // element within another LIST-annotated 2-level structure.
+ if (group.is_repeated() &&
+ (group.parent() == nullptr ||
!group.parent()->logical_type()->is_list())) {
+ return Status::Invalid("LIST-annotated groups must not be repeated.");
+ }
+ return {};
+ };
+
+ if (list_node.is_group()) {
+ const auto& list_group = static_cast<const GroupNode&>(list_node);
+ if (list_group.field_count() > 1) {
+ // The inner type of the list should be a struct when there are multiple
fields
+ // in the repeated group
+ RETURN_NOT_OK(check_two_level_list_repeated(group));
+ return GroupToStruct(list_group, current_levels, ctx, out, child_field);
+ }
+ if (list_group.field_count() == 0) {
+ return Status::Invalid("Group must have at least one child.");
+ }
+
+ if (list_group.logical_type()->is_none() && HasListElementName(list_group,
group)) {
+ // Backward-compatibility rule 4
Review Comment:
It would be better to explicitly call out the example because even Parquet
developers may not know what is the rule 4.
##########
cpp/src/parquet/arrow/schema.cc:
##########
@@ -720,88 +783,8 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo
current_levels,
}
int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated();
- if (list_node.is_group()) {
- const auto& list_group = static_cast<const GroupNode&>(list_node);
- if (list_group.field_count() > 1) {
- // The inner type of the list should be a struct when there are multiple
fields
- // in the repeated group
- RETURN_NOT_OK(GroupToStruct(list_group, current_levels, ctx, out,
child_field));
- } else if (list_group.field_count() == 1) {
- const auto& repeated_field = list_group.field(0);
- if (repeated_field->is_repeated()) {
- // Special case where the inner type might be a list with two-level
encoding
Review Comment:
Can we try to preserve these examples in the comment to help understand them?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]