stiga-huang commented on a change in pull request #990:
URL: https://github.com/apache/orc/pull/990#discussion_r782798933



##########
File path: c++/test/TestReader.cc
##########
@@ -137,4 +139,89 @@ namespace orc {
     CheckFileWithSargs("bad_bloom_filter_1.6.11.orc", "ORC C++ 1.6.11");
     CheckFileWithSargs("bad_bloom_filter_1.6.0.orc", "ORC C++");
   }
+
+  /**
+   * Read complextypes_iceberg.orc and verify the resolved selections.
+   *
+   * The ORC file has the following schema:
+   *   struct<
+   *     id:bigint,
+   *     int_array:array<int>,
+   *     int_array_array:array<array<int>>,
+   *     int_map:map<string,int>,
+   *     int_map_array:array<map<string,int>>,
+   *     nested_struct:struct<
+   *       a:int,
+   *       b:array<int>,
+   *       c:struct<
+   *         d:array<array<struct<
+   *           e:int,
+   *           f:string
+   *         >>>
+   *       >,
+   *       g:map<string,struct<
+   *         h:struct<
+   *           i:array<double>
+   *         >
+   *       >>
+   *     >
+   *   >
+   *
+   * @param readIntents TypeReadIntents describing the section.

Review comment:
       typo? "section" => "selection" ?

##########
File path: c++/test/TestReader.cc
##########
@@ -137,4 +139,89 @@ namespace orc {
     CheckFileWithSargs("bad_bloom_filter_1.6.11.orc", "ORC C++ 1.6.11");
     CheckFileWithSargs("bad_bloom_filter_1.6.0.orc", "ORC C++");
   }
+
+  /**
+   * Read complextypes_iceberg.orc and verify the resolved selections.
+   *
+   * The ORC file has the following schema:
+   *   struct<
+   *     id:bigint,
+   *     int_array:array<int>,
+   *     int_array_array:array<array<int>>,
+   *     int_map:map<string,int>,
+   *     int_map_array:array<map<string,int>>,
+   *     nested_struct:struct<
+   *       a:int,
+   *       b:array<int>,
+   *       c:struct<
+   *         d:array<array<struct<
+   *           e:int,
+   *           f:string
+   *         >>>
+   *       >,
+   *       g:map<string,struct<
+   *         h:struct<
+   *           i:array<double>
+   *         >
+   *       >>
+   *     >
+   *   >
+   *
+   * @param readIntents TypeReadIntents describing the section.
+   * @param expectedSelection expected TypeIds that will be selected from given
+   * readIntents.
+   */
+  void verifySelection(const RowReaderOptions::TypeReadIntents &readIntents,
+                       const std::vector<uint32_t> &expectedSelection) {
+    std::string fileName = "complextypes_iceberg.orc";
+    std::stringstream ss;
+    if (const char* example_dir = std::getenv("ORC_EXAMPLE_DIR")) {
+      ss << example_dir;
+    } else {
+      ss << "../../../examples";
+    }
+    ss << "/" << fileName;
+    ReaderOptions readerOpts;
+    std::unique_ptr<Reader> reader =
+        createReader(readLocalFile(ss.str().c_str()), readerOpts);
+
+    RowReaderOptions rowReaderOpts;
+    rowReaderOpts.includeTypesWithIntents(readIntents);
+    std::unique_ptr<RowReader> rowReader =
+        reader->createRowReader(rowReaderOpts);
+    std::vector<bool> expected(reader->getType().getMaximumColumnId() + 1,
+                               false);
+    for (auto id : expectedSelection) {
+      expected[id] = true;
+    }
+    ASSERT_THAT(rowReader->getSelectedColumns(), ElementsAreArray(expected));
+  }
+
+  TEST(TestReadIntent, testListAll) {
+    // select all of int_array_array.
+    verifySelection({{4, ReadIntent_ALL}}, {0, 4, 5, 6});
+  }
+
+  TEST(TestReadIntent, testListOffsets) {
+    // select only the offsets of int_array_array.
+    verifySelection({{4, ReadIntent_OFFSETS}}, {0, 4});

Review comment:
       Can we also verify that the output batches only contains offsets (i.e. 
don't have elements)? Or add another simple test for verifying the batches?

##########
File path: c++/test/TestReader.cc
##########
@@ -137,4 +139,89 @@ namespace orc {
     CheckFileWithSargs("bad_bloom_filter_1.6.11.orc", "ORC C++ 1.6.11");
     CheckFileWithSargs("bad_bloom_filter_1.6.0.orc", "ORC C++");
   }
+
+  /**
+   * Read complextypes_iceberg.orc and verify the resolved selections.
+   *
+   * The ORC file has the following schema:
+   *   struct<
+   *     id:bigint,
+   *     int_array:array<int>,
+   *     int_array_array:array<array<int>>,
+   *     int_map:map<string,int>,
+   *     int_map_array:array<map<string,int>>,
+   *     nested_struct:struct<
+   *       a:int,
+   *       b:array<int>,
+   *       c:struct<
+   *         d:array<array<struct<
+   *           e:int,
+   *           f:string
+   *         >>>
+   *       >,
+   *       g:map<string,struct<
+   *         h:struct<
+   *           i:array<double>
+   *         >
+   *       >>
+   *     >
+   *   >
+   *
+   * @param readIntents TypeReadIntents describing the section.
+   * @param expectedSelection expected TypeIds that will be selected from given
+   * readIntents.
+   */
+  void verifySelection(const RowReaderOptions::TypeReadIntents &readIntents,
+                       const std::vector<uint32_t> &expectedSelection) {
+    std::string fileName = "complextypes_iceberg.orc";
+    std::stringstream ss;
+    if (const char* example_dir = std::getenv("ORC_EXAMPLE_DIR")) {
+      ss << example_dir;
+    } else {
+      ss << "../../../examples";
+    }
+    ss << "/" << fileName;
+    ReaderOptions readerOpts;
+    std::unique_ptr<Reader> reader =
+        createReader(readLocalFile(ss.str().c_str()), readerOpts);
+
+    RowReaderOptions rowReaderOpts;
+    rowReaderOpts.includeTypesWithIntents(readIntents);
+    std::unique_ptr<RowReader> rowReader =
+        reader->createRowReader(rowReaderOpts);
+    std::vector<bool> expected(reader->getType().getMaximumColumnId() + 1,
+                               false);
+    for (auto id : expectedSelection) {
+      expected[id] = true;
+    }
+    ASSERT_THAT(rowReader->getSelectedColumns(), ElementsAreArray(expected));
+  }
+
+  TEST(TestReadIntent, testListAll) {
+    // select all of int_array_array.
+    verifySelection({{4, ReadIntent_ALL}}, {0, 4, 5, 6});
+  }
+
+  TEST(TestReadIntent, testListOffsets) {
+    // select only the offsets of int_array_array.
+    verifySelection({{4, ReadIntent_OFFSETS}}, {0, 4});
+
+    // select only the offsets of int_array_array.item.
+    verifySelection({{4, ReadIntent_OFFSETS}, {5, ReadIntent_OFFSETS}},
+                    {0, 4, 5});

Review comment:
       Can we add a test for a column of array<array<array<int>>>, and 
selecting the most outer and most inner offsets together?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to