pitrou commented on a change in pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#discussion_r477352114



##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), 
repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t 
repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null
+  // value for the field). For list fields definition levels
+  // greater then or equal to this field indicate a present
+  // , possibly null, element.
+  int16_t def_level = 0;
+
+  // The repetition level corresponding to this element
+  // or the closest repeated ancestor.  Any repetition
+  // level less than this indicates either a new list OR
+  // an empty list (which is determined in conjunction
+  // definition_level).
+  int16_t rep_level = 0;
+
+  // The definition level indicating the level at which the closest
+  // repeated ancestor was not empty.  This is used to discriminate
+  // between a value less than |definition_level|
+  // being null or excluded entirely.
+  // For instance if we have an arrow schema like:
+  // list(struct(f0: int)).  Then then there are the following
+  // definition levels:
+  // 0 = null list
+  // 1 = present but empty list.
+  // 2 = a null value in the list
+  // 3 = a non null struct but null integer.
+  // 4 = a present integer.

Review comment:
       +1, thanks for this example!

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), 
repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t 
repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null
+  // value for the field). For list fields definition levels
+  // greater then or equal to this field indicate a present
+  // , possibly null, element.
+  int16_t def_level = 0;
+
+  // The repetition level corresponding to this element
+  // or the closest repeated ancestor.  Any repetition
+  // level less than this indicates either a new list OR
+  // an empty list (which is determined in conjunction
+  // definition_level).
+  int16_t rep_level = 0;
+
+  // The definition level indicating the level at which the closest
+  // repeated ancestor was not empty.  This is used to discriminate

Review comment:
       Do you mean logical ancestor (in Arrow terms)? Or physical ancestor (in 
Parquet nesting)?

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), 
repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t 
repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null

Review comment:
       "or equal to this value"?

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), 
repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t 
repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null
+  // value for the field). For list fields definition levels
+  // greater then or equal to this field indicate a present
+  // , possibly null, element.
+  int16_t def_level = 0;
+
+  // The repetition level corresponding to this element
+  // or the closest repeated ancestor.  Any repetition
+  // level less than this indicates either a new list OR
+  // an empty list (which is determined in conjunction
+  // definition_level).

Review comment:
       Do you mean "def_level"? Or "repeated_ancestor_def_level" perhaps?

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -477,12 +484,11 @@ Status ListToSchemaField(const GroupNode& group, int16_t 
current_def_level,
   const Node& list_node = *group.field(0);
 
   if (!list_node.is_repeated()) {
-    return Status::NotImplemented(
+    return Status::Invalid(
         "Non-repeated nodes in a LIST-annotated group are not supported.");
   }
 
-  ++current_def_level;
-  ++current_rep_level;
+  int16_t repeated_ancesor_def_level = current_levels.IncrementRepeated();

Review comment:
       "ancestor"?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, 
/*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive 
field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) 
non null) not
+  // null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},  // optional child 
struct
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // repeated field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // innter field

Review comment:
       "inner"

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, 
/*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive 
field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) 
non null) not
+  // null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},  // optional child 
struct
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // repeated field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // innter field
+
+  // Arrow schema: list(struct(child_list: list(struct(f0: bool f1: bool 
no-required )))
+  // not null) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          LogicalType::List())}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  // Def_level=2 is handled together with def_level=3
+                  // When decoding.  Def_level=2 indicate present but empty
+                  // list.  def_level=3 indicates a present element in the
+                  // list.
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // list field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/4, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3},  // inner struct 
field
+
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/5, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // f0 bool field

Review comment:
       Hmm, ok, now I don't understand why 4 LevelInfo are returned. I would 
expect 5:
   * for outer `list`
   * for outer `list(struct)`
   * for `list(struct(child: list))`
   * for `list(struct(child: list(struct)))`
   * for `list(struct(child: list(struct: f0)))`
   

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), 
repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t 
repitition_level,

Review comment:
       "repetition_level"

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), 
repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t 
repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null
+  // value for the field). For list fields definition levels
+  // greater then or equal to this field indicate a present
+  // , possibly null, element.
+  int16_t def_level = 0;
+
+  // The repetition level corresponding to this element
+  // or the closest repeated ancestor.  Any repetition
+  // level less than this indicates either a new list OR
+  // an empty list (which is determined in conjunction
+  // definition_level).
+  int16_t rep_level = 0;
+
+  // The definition level indicating the level at which the closest
+  // repeated ancestor was not empty.  This is used to discriminate
+  // between a value less than |definition_level|
+  // being null or excluded entirely.
+  // For instance if we have an arrow schema like:
+  // list(struct(f0: int)).  Then then there are the following
+  // definition levels:
+  // 0 = null list
+  // 1 = present but empty list.
+  // 2 = a null value in the list
+  // 3 = a non null struct but null integer.
+  // 4 = a present integer.
+  // When reconstructing the struct and integer Array's
+  // repeated_ancestor_def_level would be 2.  Any

Review comment:
       The sentence is not clear here, could you rephrase?
   Do you mean something like:
   > When reconstructing the struct and integer, their 
`repeated_ancestor_def_level` would be 2.

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), 
repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t 
repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.

Review comment:
       In other words, is it the number of definition levels per null element?
   (I assume "slot" doesn't mean "value slot" because nulls are not encoded in 
values)

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, 
/*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive 
field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) 
non null) not

Review comment:
       The outer struct is nullable and there seem to be too many nesting 
levels, so I'd say
   ```
   list(struct(child: list(bool not null) not null)) not null
   ```

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, 
/*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive 
field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {

Review comment:
       Sorry to ask more, but can we also have simpler tests with a single 
top-level list field?
   (e.g. `list(bool not null)`, `list(bool)`, `list(bool) not null`...)
   
   Once we have that, it's easier to reason about the more complicated ones.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, 
/*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive 
field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) 
non null) not
+  // null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},  // optional child 
struct
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // repeated field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // innter field
+
+  // Arrow schema: list(struct(child_list: list(struct(f0: bool f1: bool 
no-required )))
+  // not null) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          LogicalType::List())}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  // Def_level=2 is handled together with def_level=3
+                  // When decoding.  Def_level=2 indicate present but empty
+                  // list.  def_level=3 indicates a present element in the
+                  // list.
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // list field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/4, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3},  // inner struct 
field
+
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/5, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/1));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  // Def_level=2 is handled together with def_level=3
+                  // When decoding.  Def_level=2 indicate present but empty
+                  // list.  def_level=3 indicates a present element in the
+                  // list.
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // list field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/4, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3},  // inner struct 
field
+
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/4, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Arrow schema: list(struct(child_list: list(bool not null)) not null) not 
null
+  // Legacy 2-level necoding (required for backwards compatibility.  See

Review comment:
       "encoding"

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, 
/*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive 
field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) 
non null) not
+  // null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, 
/*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},  // optional child 
struct
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // repeated field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, 
/*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // innter field
+
+  // Arrow schema: list(struct(child_list: list(struct(f0: bool f1: bool 
no-required )))
+  // not null) not null

Review comment:
       The outer struct seems nullable, also can we use "not null" everywhere 
instead of "no-required"?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool 
field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool 
field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner 
struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);
+  }

Review comment:
       However, I see this kind of comments in `path_internal_test.cc`:
   ```
     // Translates to parquet schema:
     // optional group bag {
     //   repeated group [unseen] (List) {
     //       required int64 Entries;
     //   }
     // }
   ```
   Should the List annotation be on the toplevel "bag" node, rather than on the 
middle "[unseen]" node?
   (also, what does "[unseen]" mean? shouldn't it be named "list" as per the 
Parquet spec?)

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool 
field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool 
field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner 
struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);
+  }

Review comment:
       Ok, I read the spec now, I understand better :-)

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, 
&arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& 
manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const 
::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, 
ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  
primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, 
ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional 
child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated 
field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter 
field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, 
ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool 
field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner 
struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool 
field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, 
/*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a 
null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner 
struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, 
ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);

Review comment:
       I see, thank you.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to