rizaon commented on a change in pull request #1024:
URL: https://github.com/apache/orc/pull/1024#discussion_r796240798
##########
File path: c++/test/TestReader.cc
##########
@@ -250,4 +252,206 @@ namespace orc {
EXPECT_EQ(3, intArrayArrayArrayBatch.offsets.data()[1]);
EXPECT_EQ(nullptr, intArrayArrayArrayBatch.elements.get());
}
+
+ /**
+ * Read TestOrcFile.nestedMap.orc and verify the resolved selections.
+ *
+ * The ORC fie has the following schema:
Review comment:
Removed this method in
https://github.com/apache/orc/pull/1024/commits/9d4565d9ec7907ad20dcaab3f105bab265e164a0
##########
File path: c++/test/TestReader.cc
##########
@@ -250,4 +252,206 @@ namespace orc {
EXPECT_EQ(3, intArrayArrayArrayBatch.offsets.data()[1]);
EXPECT_EQ(nullptr, intArrayArrayArrayBatch.elements.get());
}
+
+ /**
+ * Read TestOrcFile.nestedMap.orc and verify the resolved selections.
+ *
+ * The ORC fie has the following schema:
+ * struct<
+ * id:int,
+ * single_map:map<string,string>,
+ * nested_map:map<string,map<string,map<string,string>>>
+ * >
+ */
+ void verifyMapSelection(const RowReaderOptions::IdReadIntentMap&
idReadIntentMap,
Review comment:
Done
##########
File path: c++/test/TestReader.cc
##########
@@ -250,4 +252,206 @@ namespace orc {
EXPECT_EQ(3, intArrayArrayArrayBatch.offsets.data()[1]);
EXPECT_EQ(nullptr, intArrayArrayArrayBatch.elements.get());
}
+
+ /**
+ * Read TestOrcFile.nestedMap.orc and verify the resolved selections.
+ *
+ * The ORC fie has the following schema:
+ * struct<
+ * id:int,
+ * single_map:map<string,string>,
+ * nested_map:map<string,map<string,map<string,string>>>
+ * >
+ */
+ void verifyMapSelection(const RowReaderOptions::IdReadIntentMap&
idReadIntentMap,
+ const std::vector<uint32_t>& expectedSelection) {
+ std::string fileName = "TestOrcFile.nestedMap.orc";
+ std::unique_ptr<Reader> reader = createExampleReader(fileName);
+
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypesWithIntents(idReadIntentMap);
+ std::unique_ptr<RowReader> rowReader =
reader->createRowReader(rowReaderOpts);
+ std::vector<bool> expected(reader->getType().getMaximumColumnId() + 1,
false);
+ for (auto id : expectedSelection) {
+ expected[id] = true;
+ }
+ ASSERT_THAT(rowReader->getSelectedColumns(), ElementsAreArray(expected));
+ }
+
+ TEST(TestReadIntent, testMapAll) {
+ // select all of single_map.
+ verifyMapSelection({{2, ReadIntent_ALL}}, {0, 2, 3, 4});
+ }
+
+ TEST(TestReadIntent, testMapOffsets) {
+ // select only the offsets of single_map.
+ verifyMapSelection({{2, ReadIntent_OFFSETS}}, {0, 2});
+
+ // select only the offsets of single_map and the outermost offsets of
nested_map.
+ verifyMapSelection({{2, ReadIntent_OFFSETS}, {5, ReadIntent_OFFSETS}}, {0,
2, 5});
+
+ // select the entire offsets of nested_map without the map items of the
innermost map.
+ verifyMapSelection({{5, ReadIntent_OFFSETS}, {9, ReadIntent_OFFSETS}}, {0,
5, 7, 9});
+ }
+
+ TEST(TestReadIntent, testMapAllAndOffsets) {
+ // select all of single_map and only the outermost offsets of nested_map.
+ verifyMapSelection({{2, ReadIntent_ALL}, {5, ReadIntent_OFFSETS}}, {0, 2,
3, 4, 5});
+ }
+
+ TEST(TestReadIntent, testMapConflictingIntent) {
+ // test conflicting ReadIntent on nested_map.
+ verifyMapSelection({{5, ReadIntent_OFFSETS}, {9, ReadIntent_ALL}}, {0, 5,
7, 9, 10, 11});
+ verifyMapSelection({{5, ReadIntent_ALL}, {9, ReadIntent_OFFSETS}}, {0, 5,
6, 7, 8, 9, 10, 11});
+ verifyMapSelection({{5, ReadIntent_OFFSETS}, {7, ReadIntent_ALL}, {9,
ReadIntent_OFFSETS}},
+ {0, 5, 7, 8, 9, 10, 11});
+ }
+
+ TEST(TestReadIntent, testMapRowBatchContent) {
+ std::unique_ptr<Reader> reader =
createExampleReader("TestOrcFile.nestedMap.orc");
+
+ // select all of single_map and only the offsets of nested_map.
+ RowReaderOptions::IdReadIntentMap idReadIntentMap = {{2, ReadIntent_ALL},
+ {5,
ReadIntent_OFFSETS}};
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypesWithIntents(idReadIntentMap);
+ std::unique_ptr<RowReader> rowReader =
reader->createRowReader(rowReaderOpts);
+
+ // Read a row batch.
+ std::unique_ptr<ColumnVectorBatch> batch = rowReader->createRowBatch(1024);
+ EXPECT_TRUE(rowReader->next(*batch));
+ EXPECT_EQ(1, batch->numElements);
+ auto& structBatch = dynamic_cast<StructVectorBatch&>(*batch);
+
+ // verify content of int_array selection.
+ auto& mapBatch = dynamic_cast<MapVectorBatch&>(*structBatch.fields[0]);
+ auto& keyBatch = dynamic_cast<StringVectorBatch&>(*mapBatch.keys);
+ auto& valueBatch = dynamic_cast<StringVectorBatch&>(*mapBatch.elements);
+ EXPECT_EQ(1, mapBatch.numElements);
+ EXPECT_EQ(0, mapBatch.offsets.data()[0]);
+ EXPECT_EQ(1, mapBatch.offsets.data()[1]);
+ // verify key content.
+ EXPECT_EQ(1, keyBatch.numElements);
+ EXPECT_EQ(2, keyBatch.length.data()[0]);
+ EXPECT_EQ(0, strncmp("k0", keyBatch.data.data()[0], 2));
+ // verify value content.
+ EXPECT_EQ(1, valueBatch.numElements);
+ EXPECT_EQ(2, valueBatch.length.data()[0]);
+ EXPECT_EQ(0, strncmp("v0", valueBatch.data.data()[0], 2));
+
+ // verify content of nested_map selection.
+ auto& nestedMapBatch =
dynamic_cast<MapVectorBatch&>(*structBatch.fields[1]);
+ EXPECT_EQ(1, nestedMapBatch.numElements);
+ EXPECT_EQ(0, nestedMapBatch.offsets.data()[0]);
+ EXPECT_EQ(1, nestedMapBatch.offsets.data()[1]);
+ EXPECT_EQ(nullptr, nestedMapBatch.keys.get());
+ EXPECT_EQ(nullptr, nestedMapBatch.elements.get());
+ }
+
+ /**
+ * Read TestOrcFile.nestedUnion.orc and verify the resolved selections.
+ *
+ * The ORC fie has the following schema:
+ * struct<
+ * id:int,
+ * single_union:uniontype<int,string>,
+ * nested_union:uniontype<uniontype<int,uniontype<int,string>>,int>
+ * >
+ */
+ void verifyUnionSelection(const RowReaderOptions::IdReadIntentMap&
idReadIntentMap,
Review comment:
Done
##########
File path: c++/test/TestReader.cc
##########
@@ -250,4 +252,206 @@ namespace orc {
EXPECT_EQ(3, intArrayArrayArrayBatch.offsets.data()[1]);
EXPECT_EQ(nullptr, intArrayArrayArrayBatch.elements.get());
}
+
+ /**
+ * Read TestOrcFile.nestedMap.orc and verify the resolved selections.
+ *
+ * The ORC fie has the following schema:
+ * struct<
+ * id:int,
+ * single_map:map<string,string>,
+ * nested_map:map<string,map<string,map<string,string>>>
+ * >
+ */
+ void verifyMapSelection(const RowReaderOptions::IdReadIntentMap&
idReadIntentMap,
+ const std::vector<uint32_t>& expectedSelection) {
+ std::string fileName = "TestOrcFile.nestedMap.orc";
+ std::unique_ptr<Reader> reader = createExampleReader(fileName);
+
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypesWithIntents(idReadIntentMap);
+ std::unique_ptr<RowReader> rowReader =
reader->createRowReader(rowReaderOpts);
+ std::vector<bool> expected(reader->getType().getMaximumColumnId() + 1,
false);
+ for (auto id : expectedSelection) {
+ expected[id] = true;
+ }
+ ASSERT_THAT(rowReader->getSelectedColumns(), ElementsAreArray(expected));
+ }
+
+ TEST(TestReadIntent, testMapAll) {
+ // select all of single_map.
+ verifyMapSelection({{2, ReadIntent_ALL}}, {0, 2, 3, 4});
+ }
+
+ TEST(TestReadIntent, testMapOffsets) {
+ // select only the offsets of single_map.
+ verifyMapSelection({{2, ReadIntent_OFFSETS}}, {0, 2});
+
+ // select only the offsets of single_map and the outermost offsets of
nested_map.
+ verifyMapSelection({{2, ReadIntent_OFFSETS}, {5, ReadIntent_OFFSETS}}, {0,
2, 5});
+
+ // select the entire offsets of nested_map without the map items of the
innermost map.
+ verifyMapSelection({{5, ReadIntent_OFFSETS}, {9, ReadIntent_OFFSETS}}, {0,
5, 7, 9});
+ }
+
+ TEST(TestReadIntent, testMapAllAndOffsets) {
+ // select all of single_map and only the outermost offsets of nested_map.
+ verifyMapSelection({{2, ReadIntent_ALL}, {5, ReadIntent_OFFSETS}}, {0, 2,
3, 4, 5});
+ }
+
+ TEST(TestReadIntent, testMapConflictingIntent) {
+ // test conflicting ReadIntent on nested_map.
+ verifyMapSelection({{5, ReadIntent_OFFSETS}, {9, ReadIntent_ALL}}, {0, 5,
7, 9, 10, 11});
+ verifyMapSelection({{5, ReadIntent_ALL}, {9, ReadIntent_OFFSETS}}, {0, 5,
6, 7, 8, 9, 10, 11});
+ verifyMapSelection({{5, ReadIntent_OFFSETS}, {7, ReadIntent_ALL}, {9,
ReadIntent_OFFSETS}},
+ {0, 5, 7, 8, 9, 10, 11});
+ }
+
+ TEST(TestReadIntent, testMapRowBatchContent) {
+ std::unique_ptr<Reader> reader =
createExampleReader("TestOrcFile.nestedMap.orc");
+
+ // select all of single_map and only the offsets of nested_map.
+ RowReaderOptions::IdReadIntentMap idReadIntentMap = {{2, ReadIntent_ALL},
+ {5,
ReadIntent_OFFSETS}};
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypesWithIntents(idReadIntentMap);
+ std::unique_ptr<RowReader> rowReader =
reader->createRowReader(rowReaderOpts);
+
+ // Read a row batch.
+ std::unique_ptr<ColumnVectorBatch> batch = rowReader->createRowBatch(1024);
+ EXPECT_TRUE(rowReader->next(*batch));
+ EXPECT_EQ(1, batch->numElements);
+ auto& structBatch = dynamic_cast<StructVectorBatch&>(*batch);
+
+ // verify content of int_array selection.
Review comment:
Done
##########
File path: c++/test/TestReader.cc
##########
@@ -250,4 +252,206 @@ namespace orc {
EXPECT_EQ(3, intArrayArrayArrayBatch.offsets.data()[1]);
EXPECT_EQ(nullptr, intArrayArrayArrayBatch.elements.get());
}
+
+ /**
+ * Read TestOrcFile.nestedMap.orc and verify the resolved selections.
+ *
+ * The ORC fie has the following schema:
+ * struct<
+ * id:int,
+ * single_map:map<string,string>,
+ * nested_map:map<string,map<string,map<string,string>>>
+ * >
+ */
+ void verifyMapSelection(const RowReaderOptions::IdReadIntentMap&
idReadIntentMap,
+ const std::vector<uint32_t>& expectedSelection) {
+ std::string fileName = "TestOrcFile.nestedMap.orc";
+ std::unique_ptr<Reader> reader = createExampleReader(fileName);
+
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypesWithIntents(idReadIntentMap);
+ std::unique_ptr<RowReader> rowReader =
reader->createRowReader(rowReaderOpts);
+ std::vector<bool> expected(reader->getType().getMaximumColumnId() + 1,
false);
+ for (auto id : expectedSelection) {
+ expected[id] = true;
+ }
+ ASSERT_THAT(rowReader->getSelectedColumns(), ElementsAreArray(expected));
+ }
+
+ TEST(TestReadIntent, testMapAll) {
+ // select all of single_map.
+ verifyMapSelection({{2, ReadIntent_ALL}}, {0, 2, 3, 4});
+ }
+
+ TEST(TestReadIntent, testMapOffsets) {
+ // select only the offsets of single_map.
+ verifyMapSelection({{2, ReadIntent_OFFSETS}}, {0, 2});
+
+ // select only the offsets of single_map and the outermost offsets of
nested_map.
+ verifyMapSelection({{2, ReadIntent_OFFSETS}, {5, ReadIntent_OFFSETS}}, {0,
2, 5});
+
+ // select the entire offsets of nested_map without the map items of the
innermost map.
+ verifyMapSelection({{5, ReadIntent_OFFSETS}, {9, ReadIntent_OFFSETS}}, {0,
5, 7, 9});
+ }
+
+ TEST(TestReadIntent, testMapAllAndOffsets) {
+ // select all of single_map and only the outermost offsets of nested_map.
+ verifyMapSelection({{2, ReadIntent_ALL}, {5, ReadIntent_OFFSETS}}, {0, 2,
3, 4, 5});
+ }
+
+ TEST(TestReadIntent, testMapConflictingIntent) {
+ // test conflicting ReadIntent on nested_map.
+ verifyMapSelection({{5, ReadIntent_OFFSETS}, {9, ReadIntent_ALL}}, {0, 5,
7, 9, 10, 11});
+ verifyMapSelection({{5, ReadIntent_ALL}, {9, ReadIntent_OFFSETS}}, {0, 5,
6, 7, 8, 9, 10, 11});
+ verifyMapSelection({{5, ReadIntent_OFFSETS}, {7, ReadIntent_ALL}, {9,
ReadIntent_OFFSETS}},
+ {0, 5, 7, 8, 9, 10, 11});
+ }
+
+ TEST(TestReadIntent, testMapRowBatchContent) {
+ std::unique_ptr<Reader> reader =
createExampleReader("TestOrcFile.nestedMap.orc");
+
+ // select all of single_map and only the offsets of nested_map.
+ RowReaderOptions::IdReadIntentMap idReadIntentMap = {{2, ReadIntent_ALL},
+ {5,
ReadIntent_OFFSETS}};
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypesWithIntents(idReadIntentMap);
+ std::unique_ptr<RowReader> rowReader =
reader->createRowReader(rowReaderOpts);
+
+ // Read a row batch.
+ std::unique_ptr<ColumnVectorBatch> batch = rowReader->createRowBatch(1024);
+ EXPECT_TRUE(rowReader->next(*batch));
+ EXPECT_EQ(1, batch->numElements);
+ auto& structBatch = dynamic_cast<StructVectorBatch&>(*batch);
+
+ // verify content of int_array selection.
+ auto& mapBatch = dynamic_cast<MapVectorBatch&>(*structBatch.fields[0]);
+ auto& keyBatch = dynamic_cast<StringVectorBatch&>(*mapBatch.keys);
+ auto& valueBatch = dynamic_cast<StringVectorBatch&>(*mapBatch.elements);
+ EXPECT_EQ(1, mapBatch.numElements);
+ EXPECT_EQ(0, mapBatch.offsets.data()[0]);
+ EXPECT_EQ(1, mapBatch.offsets.data()[1]);
+ // verify key content.
+ EXPECT_EQ(1, keyBatch.numElements);
+ EXPECT_EQ(2, keyBatch.length.data()[0]);
+ EXPECT_EQ(0, strncmp("k0", keyBatch.data.data()[0], 2));
+ // verify value content.
+ EXPECT_EQ(1, valueBatch.numElements);
+ EXPECT_EQ(2, valueBatch.length.data()[0]);
+ EXPECT_EQ(0, strncmp("v0", valueBatch.data.data()[0], 2));
+
+ // verify content of nested_map selection.
+ auto& nestedMapBatch =
dynamic_cast<MapVectorBatch&>(*structBatch.fields[1]);
+ EXPECT_EQ(1, nestedMapBatch.numElements);
+ EXPECT_EQ(0, nestedMapBatch.offsets.data()[0]);
+ EXPECT_EQ(1, nestedMapBatch.offsets.data()[1]);
+ EXPECT_EQ(nullptr, nestedMapBatch.keys.get());
+ EXPECT_EQ(nullptr, nestedMapBatch.elements.get());
+ }
+
+ /**
+ * Read TestOrcFile.nestedUnion.orc and verify the resolved selections.
+ *
+ * The ORC fie has the following schema:
+ * struct<
+ * id:int,
+ * single_union:uniontype<int,string>,
+ * nested_union:uniontype<uniontype<int,uniontype<int,string>>,int>
+ * >
+ */
+ void verifyUnionSelection(const RowReaderOptions::IdReadIntentMap&
idReadIntentMap,
+ const std::vector<uint32_t>& expectedSelection) {
+ std::string fileName = "TestOrcFile.nestedUnion.orc";
+ std::unique_ptr<Reader> reader = createExampleReader(fileName);
+
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypesWithIntents(idReadIntentMap);
+ std::unique_ptr<RowReader> rowReader =
reader->createRowReader(rowReaderOpts);
+ std::vector<bool> expected(reader->getType().getMaximumColumnId() + 1,
false);
+ for (auto id : expectedSelection) {
+ expected[id] = true;
+ }
+ ASSERT_THAT(rowReader->getSelectedColumns(), ElementsAreArray(expected));
+ }
+
+ TEST(TestReadIntent, testUnionAll) {
+ // select all of single_union.
+ verifyUnionSelection({{2, ReadIntent_ALL}}, {0, 2, 3, 4});
+ }
+
+ TEST(TestReadIntent, testUnionOffsets) {
+ // select only the offsets of single_union.
+ verifyUnionSelection({{2, ReadIntent_OFFSETS}}, {0, 2});
+
+ // select only the offsets of single_union and the outermost offsets of
nested_union.
+ verifyUnionSelection({{2, ReadIntent_OFFSETS}, {5, ReadIntent_OFFSETS}},
{0, 2, 5});
+
+ // select only the offsets of single_union and the innermost offsets of
nested_union.
+ verifyUnionSelection({{2, ReadIntent_OFFSETS}, {8, ReadIntent_OFFSETS}},
+ {0, 2, 5, 6, 7, 8, 11});
+ }
+
+ TEST(TestReadIntent, testUnionAllAndOffsets) {
+ // select all of single_union and only the outermost offsets of
nested_union.
+ verifyUnionSelection({{2, ReadIntent_ALL}, {5, ReadIntent_OFFSETS}}, {0,
2, 3, 4, 5});
+ }
+
+ TEST(TestReadIntent, testUnionConflictingIntent) {
+ // test conflicting ReadIntent on nested_union.
+ verifyUnionSelection({{5, ReadIntent_OFFSETS}, {8, ReadIntent_ALL}},
+ {0, 5, 6, 7, 8, 9, 10, 11});
+ verifyUnionSelection({{5, ReadIntent_ALL}, {8, ReadIntent_OFFSETS}},
+ {0, 5, 6, 7, 8, 9, 10, 11});
+ verifyUnionSelection({{5, ReadIntent_OFFSETS}, {6, ReadIntent_ALL}, {8,
ReadIntent_OFFSETS}},
+ {0, 5, 6, 7, 8, 9, 10, 11});
+ }
+
+ TEST(TestReadIntent, testUnionRowBatchContent) {
+ std::unique_ptr<Reader> reader =
createExampleReader("TestOrcFile.nestedUnion.orc");
+
+ // select all of single_union and only the offsets of nested_union.
+ RowReaderOptions::IdReadIntentMap idReadIntentMap = {{2, ReadIntent_ALL},
+ {5,
ReadIntent_OFFSETS}};
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypesWithIntents(idReadIntentMap);
+ std::unique_ptr<RowReader> rowReader =
reader->createRowReader(rowReaderOpts);
+
+ // Read a row batch.
+ std::unique_ptr<ColumnVectorBatch> batch = rowReader->createRowBatch(1024);
+ EXPECT_TRUE(rowReader->next(*batch));
+ EXPECT_EQ(4, batch->numElements);
+ auto& structBatch = dynamic_cast<StructVectorBatch&>(*batch);
+
+ // verify content of int_array selection.
Review comment:
Done
##########
File path: c++/test/TestReader.cc
##########
@@ -250,4 +252,206 @@ namespace orc {
EXPECT_EQ(3, intArrayArrayArrayBatch.offsets.data()[1]);
EXPECT_EQ(nullptr, intArrayArrayArrayBatch.elements.get());
}
+
+ /**
+ * Read TestOrcFile.nestedMap.orc and verify the resolved selections.
+ *
+ * The ORC fie has the following schema:
+ * struct<
+ * id:int,
+ * single_map:map<string,string>,
+ * nested_map:map<string,map<string,map<string,string>>>
+ * >
+ */
+ void verifyMapSelection(const RowReaderOptions::IdReadIntentMap&
idReadIntentMap,
+ const std::vector<uint32_t>& expectedSelection) {
+ std::string fileName = "TestOrcFile.nestedMap.orc";
+ std::unique_ptr<Reader> reader = createExampleReader(fileName);
+
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypesWithIntents(idReadIntentMap);
+ std::unique_ptr<RowReader> rowReader =
reader->createRowReader(rowReaderOpts);
+ std::vector<bool> expected(reader->getType().getMaximumColumnId() + 1,
false);
+ for (auto id : expectedSelection) {
+ expected[id] = true;
+ }
+ ASSERT_THAT(rowReader->getSelectedColumns(), ElementsAreArray(expected));
+ }
+
+ TEST(TestReadIntent, testMapAll) {
+ // select all of single_map.
+ verifyMapSelection({{2, ReadIntent_ALL}}, {0, 2, 3, 4});
+ }
+
+ TEST(TestReadIntent, testMapOffsets) {
+ // select only the offsets of single_map.
+ verifyMapSelection({{2, ReadIntent_OFFSETS}}, {0, 2});
+
+ // select only the offsets of single_map and the outermost offsets of
nested_map.
+ verifyMapSelection({{2, ReadIntent_OFFSETS}, {5, ReadIntent_OFFSETS}}, {0,
2, 5});
+
+ // select the entire offsets of nested_map without the map items of the
innermost map.
+ verifyMapSelection({{5, ReadIntent_OFFSETS}, {9, ReadIntent_OFFSETS}}, {0,
5, 7, 9});
+ }
+
+ TEST(TestReadIntent, testMapAllAndOffsets) {
+ // select all of single_map and only the outermost offsets of nested_map.
+ verifyMapSelection({{2, ReadIntent_ALL}, {5, ReadIntent_OFFSETS}}, {0, 2,
3, 4, 5});
+ }
+
+ TEST(TestReadIntent, testMapConflictingIntent) {
+ // test conflicting ReadIntent on nested_map.
+ verifyMapSelection({{5, ReadIntent_OFFSETS}, {9, ReadIntent_ALL}}, {0, 5,
7, 9, 10, 11});
+ verifyMapSelection({{5, ReadIntent_ALL}, {9, ReadIntent_OFFSETS}}, {0, 5,
6, 7, 8, 9, 10, 11});
+ verifyMapSelection({{5, ReadIntent_OFFSETS}, {7, ReadIntent_ALL}, {9,
ReadIntent_OFFSETS}},
+ {0, 5, 7, 8, 9, 10, 11});
+ }
+
+ TEST(TestReadIntent, testMapRowBatchContent) {
+ std::unique_ptr<Reader> reader =
createExampleReader("TestOrcFile.nestedMap.orc");
+
+ // select all of single_map and only the offsets of nested_map.
+ RowReaderOptions::IdReadIntentMap idReadIntentMap = {{2, ReadIntent_ALL},
+ {5,
ReadIntent_OFFSETS}};
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypesWithIntents(idReadIntentMap);
+ std::unique_ptr<RowReader> rowReader =
reader->createRowReader(rowReaderOpts);
+
+ // Read a row batch.
+ std::unique_ptr<ColumnVectorBatch> batch = rowReader->createRowBatch(1024);
+ EXPECT_TRUE(rowReader->next(*batch));
+ EXPECT_EQ(1, batch->numElements);
+ auto& structBatch = dynamic_cast<StructVectorBatch&>(*batch);
+
+ // verify content of int_array selection.
+ auto& mapBatch = dynamic_cast<MapVectorBatch&>(*structBatch.fields[0]);
+ auto& keyBatch = dynamic_cast<StringVectorBatch&>(*mapBatch.keys);
+ auto& valueBatch = dynamic_cast<StringVectorBatch&>(*mapBatch.elements);
+ EXPECT_EQ(1, mapBatch.numElements);
+ EXPECT_EQ(0, mapBatch.offsets.data()[0]);
+ EXPECT_EQ(1, mapBatch.offsets.data()[1]);
+ // verify key content.
+ EXPECT_EQ(1, keyBatch.numElements);
+ EXPECT_EQ(2, keyBatch.length.data()[0]);
+ EXPECT_EQ(0, strncmp("k0", keyBatch.data.data()[0], 2));
+ // verify value content.
+ EXPECT_EQ(1, valueBatch.numElements);
+ EXPECT_EQ(2, valueBatch.length.data()[0]);
+ EXPECT_EQ(0, strncmp("v0", valueBatch.data.data()[0], 2));
+
+ // verify content of nested_map selection.
+ auto& nestedMapBatch =
dynamic_cast<MapVectorBatch&>(*structBatch.fields[1]);
+ EXPECT_EQ(1, nestedMapBatch.numElements);
+ EXPECT_EQ(0, nestedMapBatch.offsets.data()[0]);
+ EXPECT_EQ(1, nestedMapBatch.offsets.data()[1]);
+ EXPECT_EQ(nullptr, nestedMapBatch.keys.get());
+ EXPECT_EQ(nullptr, nestedMapBatch.elements.get());
+ }
+
+ /**
+ * Read TestOrcFile.nestedUnion.orc and verify the resolved selections.
+ *
+ * The ORC fie has the following schema:
+ * struct<
+ * id:int,
+ * single_union:uniontype<int,string>,
+ * nested_union:uniontype<uniontype<int,uniontype<int,string>>,int>
+ * >
+ */
+ void verifyUnionSelection(const RowReaderOptions::IdReadIntentMap&
idReadIntentMap,
+ const std::vector<uint32_t>& expectedSelection) {
+ std::string fileName = "TestOrcFile.nestedUnion.orc";
+ std::unique_ptr<Reader> reader = createExampleReader(fileName);
+
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypesWithIntents(idReadIntentMap);
+ std::unique_ptr<RowReader> rowReader =
reader->createRowReader(rowReaderOpts);
+ std::vector<bool> expected(reader->getType().getMaximumColumnId() + 1,
false);
+ for (auto id : expectedSelection) {
+ expected[id] = true;
+ }
+ ASSERT_THAT(rowReader->getSelectedColumns(), ElementsAreArray(expected));
+ }
+
+ TEST(TestReadIntent, testUnionAll) {
+ // select all of single_union.
+ verifyUnionSelection({{2, ReadIntent_ALL}}, {0, 2, 3, 4});
+ }
+
+ TEST(TestReadIntent, testUnionOffsets) {
+ // select only the offsets of single_union.
+ verifyUnionSelection({{2, ReadIntent_OFFSETS}}, {0, 2});
+
+ // select only the offsets of single_union and the outermost offsets of
nested_union.
+ verifyUnionSelection({{2, ReadIntent_OFFSETS}, {5, ReadIntent_OFFSETS}},
{0, 2, 5});
+
+ // select only the offsets of single_union and the innermost offsets of
nested_union.
+ verifyUnionSelection({{2, ReadIntent_OFFSETS}, {8, ReadIntent_OFFSETS}},
+ {0, 2, 5, 6, 7, 8, 11});
+ }
+
+ TEST(TestReadIntent, testUnionAllAndOffsets) {
+ // select all of single_union and only the outermost offsets of
nested_union.
+ verifyUnionSelection({{2, ReadIntent_ALL}, {5, ReadIntent_OFFSETS}}, {0,
2, 3, 4, 5});
+ }
+
+ TEST(TestReadIntent, testUnionConflictingIntent) {
+ // test conflicting ReadIntent on nested_union.
+ verifyUnionSelection({{5, ReadIntent_OFFSETS}, {8, ReadIntent_ALL}},
+ {0, 5, 6, 7, 8, 9, 10, 11});
+ verifyUnionSelection({{5, ReadIntent_ALL}, {8, ReadIntent_OFFSETS}},
+ {0, 5, 6, 7, 8, 9, 10, 11});
+ verifyUnionSelection({{5, ReadIntent_OFFSETS}, {6, ReadIntent_ALL}, {8,
ReadIntent_OFFSETS}},
+ {0, 5, 6, 7, 8, 9, 10, 11});
+ }
+
+ TEST(TestReadIntent, testUnionRowBatchContent) {
+ std::unique_ptr<Reader> reader =
createExampleReader("TestOrcFile.nestedUnion.orc");
+
+ // select all of single_union and only the offsets of nested_union.
+ RowReaderOptions::IdReadIntentMap idReadIntentMap = {{2, ReadIntent_ALL},
+ {5,
ReadIntent_OFFSETS}};
+ RowReaderOptions rowReaderOpts;
+ rowReaderOpts.includeTypesWithIntents(idReadIntentMap);
+ std::unique_ptr<RowReader> rowReader =
reader->createRowReader(rowReaderOpts);
+
+ // Read a row batch.
+ std::unique_ptr<ColumnVectorBatch> batch = rowReader->createRowBatch(1024);
+ EXPECT_TRUE(rowReader->next(*batch));
+ EXPECT_EQ(4, batch->numElements);
+ auto& structBatch = dynamic_cast<StructVectorBatch&>(*batch);
+
+ // verify content of int_array selection.
+ auto& unionBatch = dynamic_cast<UnionVectorBatch&>(*structBatch.fields[0]);
+ EXPECT_EQ(4, unionBatch.numElements);
+ EXPECT_EQ(2, unionBatch.children.size());
+ auto& longBatch = dynamic_cast<LongVectorBatch&>(*unionBatch.children[0]);
+ auto& stringBatch =
dynamic_cast<StringVectorBatch&>(*unionBatch.children[1]);
+ EXPECT_EQ(3, longBatch.numElements);
+ EXPECT_EQ(1, stringBatch.numElements);
+ // verify content of the first row.
+ EXPECT_EQ(0, unionBatch.tags.data()[0]);
+ EXPECT_EQ(0, unionBatch.offsets.data()[0]);
+ EXPECT_EQ(0, longBatch.data.data()[0]);
+ // verify content of the second row.
+ EXPECT_EQ(1, unionBatch.tags.data()[1]);
+ EXPECT_EQ(0, unionBatch.offsets.data()[1]);
+ EXPECT_EQ(2, stringBatch.length.data()[0]);
+ EXPECT_EQ(0, strncmp("s1", stringBatch.data.data()[0], 2));
Review comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]