Repository: orc Updated Branches: refs/heads/master b39302f59 -> 3945f0663
http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/test/TestColumnReader.cc ---------------------------------------------------------------------- diff --git a/c++/test/TestColumnReader.cc b/c++/test/TestColumnReader.cc index 4b1b4b1..075a069 100644 --- a/c++/test/TestColumnReader.cc +++ b/c++/test/TestColumnReader.cc @@ -37,14 +37,14 @@ namespace orc { class MockStripeStreams: public StripeStreams { public: ~MockStripeStreams(); - std::unique_ptr<SeekableInputStream> getStream(int64_t columnId, + std::unique_ptr<SeekableInputStream> getStream(uint64_t columnId, proto::Stream_Kind kind, bool stream) const override; MOCK_CONST_METHOD0(getReaderOptions, const ReaderOptions&()); MOCK_CONST_METHOD0(getSelectedColumns, const std::vector<bool>()); - MOCK_CONST_METHOD1(getEncoding, proto::ColumnEncoding (int64_t)); + MOCK_CONST_METHOD1(getEncoding, proto::ColumnEncoding (uint64_t)); MOCK_CONST_METHOD3(getStreamProxy, SeekableInputStream* - (int64_t, proto::Stream_Kind, bool)); + (uint64_t, proto::Stream_Kind, bool)); MemoryPool& getMemoryPool() const { return *getDefaultPool(); } @@ -60,7 +60,7 @@ MockStripeStreams::~MockStripeStreams() { } std::unique_ptr<SeekableInputStream> -MockStripeStreams::getStream(int64_t columnId, +MockStripeStreams::getStream(uint64_t columnId, proto::Stream_Kind kind, bool shouldStream) const { return std::unique_ptr < SeekableInputStream > @@ -98,8 +98,7 @@ TEST(TestColumnReader, testBooleanWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(BOOLEAN), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(BOOLEAN)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); LongVectorBatch *longBatch = new LongVectorBatch(1024, *getDefaultPool()); @@ -152,8 +151,7 @@ TEST(TestColumnReader, testBooleanSkipsWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(BOOLEAN), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(BOOLEAN)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); LongVectorBatch *longBatch = new LongVectorBatch(1024, *getDefaultPool()); @@ -217,8 +215,7 @@ TEST(TestColumnReader, testByteWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(BYTE), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(BYTE)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -281,8 +278,7 @@ TEST(TestColumnReader, testByteSkipsWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(BYTE), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(BYTE)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -313,7 +309,7 @@ TEST(TestColumnReader, testIntegerWithNulls) { // set getSelectedColumns() std::vector<bool> selectedColumns(2, true); - + EXPECT_CALL(streams, getSelectedColumns()) .WillRepeatedly(testing::Return(selectedColumns)); @@ -337,8 +333,7 @@ TEST(TestColumnReader, testIntegerWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(INT), "myInt" ); - rowType->assignIds(0); + rowType->addStructField("myInt", createPrimitiveType(INT)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -403,8 +398,7 @@ TEST(TestColumnReader, testDictionaryWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(STRING), "myString"); - rowType->assignIds(0); + rowType->addStructField("myString", createPrimitiveType(STRING)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -503,10 +497,9 @@ TEST(TestColumnReader, testVarcharDictionaryWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(VARCHAR), "col0"); - rowType->addStructField(createPrimitiveType(CHAR), "col1"); - rowType->addStructField(createPrimitiveType(STRING), "col2"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(VARCHAR)) + ->addStructField("col1", createPrimitiveType(CHAR)) + ->addStructField("col2", createPrimitiveType(STRING)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -576,11 +569,14 @@ TEST(TestColumnReader, testSubstructsWithNulls) { (buffer4, ARRAY_SIZE(buffer4)))); // create the row type + std::unique_ptr<Type> innerType = createStructType(); + innerType->addStructField("col2", createPrimitiveType(LONG)); + + std::unique_ptr<Type> middleType = createStructType(); + middleType->addStructField("col1", std::move(innerType)); + std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createStructType(), "col0") - .addStructField(createStructType(), "col1") - .addStructField(createPrimitiveType(LONG), "col2"); - rowType->assignIds(0); + rowType->addStructField("col0", std::move(middleType)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -684,9 +680,8 @@ TEST(TestColumnReader, testSkipWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(INT), "myInt"); - rowType->addStructField(createPrimitiveType(STRING), "myString"); - rowType->assignIds(0); + rowType->addStructField("myInt", createPrimitiveType(INT)); + rowType->addStructField("myString", createPrimitiveType(STRING)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -766,8 +761,7 @@ TEST(TestColumnReader, testBinaryDirect) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(BINARY), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(BINARY)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -829,8 +823,7 @@ TEST(TestColumnReader, testBinaryDirectWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(BINARY), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(BINARY)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -881,7 +874,7 @@ TEST(TestColumnReader, testShortBlobError) { EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_DATA, true)) .WillRepeatedly(testing::Return(new SeekableArrayInputStream (blob, ARRAY_SIZE(blob)))); - + const unsigned char buffer1[] = {0x61, 0x00, 0x02}; EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_LENGTH, true)) .WillRepeatedly(testing::Return(new SeekableArrayInputStream @@ -889,8 +882,7 @@ TEST(TestColumnReader, testShortBlobError) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(STRING), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(STRING)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -939,8 +931,7 @@ TEST(TestColumnReader, testStringDirectShortBuffer) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(STRING), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(STRING)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -1002,8 +993,7 @@ TEST(TestColumnReader, testStringDirectShortBufferWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(STRING), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(STRING)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -1073,8 +1063,7 @@ TEST(TestColumnReader, testStringDirectNullAcrossWindow) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(STRING), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(STRING)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -1132,7 +1121,7 @@ TEST(TestColumnReader, testStringDirectSkip) { (blob, BLOB_SIZE, 200))); // the stream of 0 to 1199 - const unsigned char buffer1[] = + const unsigned char buffer1[] = { 0x7f, 0x01, 0x00, 0x7f, 0x01, 0x82, 0x01, 0x7f, 0x01, 0x84, 0x02, @@ -1149,8 +1138,7 @@ TEST(TestColumnReader, testStringDirectSkip) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(STRING), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(STRING)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -1250,8 +1238,7 @@ TEST(TestColumnReader, testStringDirectSkipWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(STRING), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createPrimitiveType(STRING)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -1338,8 +1325,7 @@ TEST(TestColumnReader, testList) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createListType(createPrimitiveType(LONG)), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createListType(createPrimitiveType(LONG))); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -1371,10 +1357,11 @@ TEST(TestColumnReader, testListPropagateNulls) { EXPECT_CALL(streams, getSelectedColumns()) .WillRepeatedly(testing::Return(selectedColumns)); + std::unique_ptr<Type> innerType = createStructType(); + innerType->addStructField("col0_0", + createListType(createPrimitiveType(LONG))); std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createStructType(), "col0") - .addStructField(createListType(createPrimitiveType(LONG)), "col0_0"); - rowType->assignIds(0); + rowType->addStructField("col0", std::move(innerType)); // set getEncoding proto::ColumnEncoding directEncoding; @@ -1495,8 +1482,7 @@ TEST(TestColumnReader, testListWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createListType(createPrimitiveType(LONG)), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createListType(createPrimitiveType(LONG))); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -1658,8 +1644,7 @@ TEST(TestColumnReader, testListSkipWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createListType(createPrimitiveType(LONG)), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createListType(createPrimitiveType(LONG))); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -1759,8 +1744,7 @@ TEST(TestColumnReader, testListSkipWithNullsNoData) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createListType(createPrimitiveType(LONG)), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createListType(createPrimitiveType(LONG))); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -1857,10 +1841,8 @@ TEST(TestColumnReader, testMap) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createMapType(createPrimitiveType(LONG), - createPrimitiveType(LONG)), - "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createMapType(createPrimitiveType(LONG), + createPrimitiveType(LONG))); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -1976,10 +1958,8 @@ TEST(TestColumnReader, testMapWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createMapType(createPrimitiveType(LONG), - createPrimitiveType(LONG)), - "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createMapType(createPrimitiveType(LONG), + createPrimitiveType(LONG))); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -2186,10 +2166,8 @@ TEST(TestColumnReader, testMapSkipWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createMapType(createPrimitiveType(LONG), - createPrimitiveType(LONG)), - "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createMapType(createPrimitiveType(LONG), + createPrimitiveType(LONG))); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -2296,10 +2274,8 @@ TEST(TestColumnReader, testMapSkipWithNullsNoData) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createMapType(createPrimitiveType(LONG), - createPrimitiveType(LONG)), - "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createMapType(createPrimitiveType(LONG), + createPrimitiveType(LONG))); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -2384,8 +2360,7 @@ TEST(TestColumnReader, testFloatWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(FLOAT), "myFloat"); - rowType->assignIds(0); + rowType->addStructField("myFloat", createPrimitiveType(FLOAT)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -2448,8 +2423,7 @@ TEST(TestColumnReader, testFloatSkipWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(FLOAT), "myFloat"); - rowType->assignIds(0); + rowType->addStructField("myFloat", createPrimitiveType(FLOAT)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -2550,8 +2524,7 @@ TEST(TestColumnReader, testDoubleWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(DOUBLE), "myDouble"); - rowType->assignIds(0); + rowType->addStructField("myDouble", createPrimitiveType(DOUBLE)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -2615,8 +2588,7 @@ TEST(TestColumnReader, testDoubleSkipWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(DOUBLE), "myDouble"); - rowType->assignIds(0); + rowType->addStructField("myDouble", createPrimitiveType(DOUBLE)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -2687,7 +2659,7 @@ TEST(TestColumnReader, testTimestampSkipWithNulls) { .WillRepeatedly(testing::Return(new SeekableArrayInputStream (buffer1, ARRAY_SIZE(buffer1)))); - const unsigned char buffer2[] = { 0xfc, 0xbb, 0xb5, 0xbe, 0x31, 0xa1, 0xee, + const unsigned char buffer2[] = { 0xfc, 0xbb, 0xb5, 0xbe, 0x31, 0xa1, 0xee, 0xe2, 0x10, 0xf8, 0x92, 0xee, 0xf, 0x92, 0xa0, 0xd4, 0x30 }; EXPECT_CALL(streams, getStreamProxy(1, proto::Stream_Kind_DATA, true)) @@ -2701,8 +2673,7 @@ TEST(TestColumnReader, testTimestampSkipWithNulls) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(TIMESTAMP), "myTimestamp"); - rowType->assignIds(0); + rowType->addStructField("myTimestamp", createPrimitiveType(TIMESTAMP)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -2812,8 +2783,7 @@ TEST(TestColumnReader, testTimestamp) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createPrimitiveType(TIMESTAMP), "myTimestamp"); - rowType->assignIds(0); + rowType->addStructField("myTimestamp", createPrimitiveType(TIMESTAMP)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -2902,8 +2872,7 @@ TEST(DecimalColumnReader, testDecimal64) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createDecimalType(12, 2), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createDecimalType(12, 2)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -2983,8 +2952,7 @@ TEST(DecimalColumnReader, testDecimal64Skip) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createDecimalType(12, 10), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createDecimalType(12, 10)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -3059,8 +3027,7 @@ TEST(DecimalColumnReader, testDecimal128) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createDecimalType(32, 2), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createDecimalType(32, 2)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -3153,8 +3120,7 @@ TEST(DecimalColumnReader, testDecimal128Skip) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createDecimalType(38, 37), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createDecimalType(38, 37)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -3241,8 +3207,7 @@ TEST(DecimalColumnReader, testDecimalHive11) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createDecimalType(0, 0), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createDecimalType(0, 0)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -3341,8 +3306,7 @@ TEST(DecimalColumnReader, testDecimalHive11Skip) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createDecimalType(0, 0), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createDecimalType(0, 0)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -3427,8 +3391,7 @@ TEST(DecimalColumnReader, testDecimalHive11ScaleUp) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createDecimalType(0, 0), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createDecimalType(0, 0)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -3516,8 +3479,7 @@ TEST(DecimalColumnReader, testDecimalHive11ScaleDown) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createDecimalType(0, 0), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createDecimalType(0, 0)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -3587,8 +3549,7 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowException) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createDecimalType(0, 0), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createDecimalType(0, 0)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -3646,8 +3607,7 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowExceptionNull) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createDecimalType(0, 0), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createDecimalType(0, 0)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -3712,8 +3672,7 @@ TEST(DecimalColumnReader, testDecimalHive11OverflowNull) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createDecimalType(0, 0), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createDecimalType(0, 0)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -3795,8 +3754,7 @@ TEST(DecimalColumnReader, testDecimalHive11BigBatches) { // create the row type std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createDecimalType(0, 0), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", createDecimalType(0, 0)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -3892,12 +3850,11 @@ TEST(TestColumnReader, testUnion) { (buffer3, ARRAY_SIZE(buffer3)))); // create the row type - std::vector<Type*> childrenTypes; - childrenTypes.push_back(createPrimitiveType(LONG).release()); - childrenTypes.push_back(createPrimitiveType(INT).release()); + std::unique_ptr<Type> unionType = createUnionType(); + unionType->addUnionChild(createPrimitiveType(LONG)); + unionType->addUnionChild(createPrimitiveType(INT)); std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createUnionType(childrenTypes), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", std::move(unionType)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -4043,12 +4000,11 @@ TEST(TestColumnReader, testUnionWithNulls) { (buffer4, ARRAY_SIZE(buffer4)))); // create the row type - std::vector<Type*> childrenTypes; - childrenTypes.push_back(createPrimitiveType(LONG).release()); - childrenTypes.push_back(createPrimitiveType(INT).release()); + std::unique_ptr<Type> unionType = createUnionType(); + unionType->addUnionChild(createPrimitiveType(LONG)); + unionType->addUnionChild(createPrimitiveType(INT)); std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createUnionType(childrenTypes), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", std::move(unionType)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -4138,12 +4094,11 @@ TEST(TestColumnReader, testUnionSkips) { (buffer3, ARRAY_SIZE(buffer3)))); // create the row type - std::vector<Type*> childrenTypes; - childrenTypes.push_back(createPrimitiveType(LONG).release()); - childrenTypes.push_back(createPrimitiveType(INT).release()); + std::unique_ptr<Type> unionType = createUnionType(); + unionType->addUnionChild(createPrimitiveType(LONG)); + unionType->addUnionChild(createPrimitiveType(INT)); std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createUnionType(childrenTypes), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", std::move(unionType)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -4242,12 +4197,11 @@ TEST(TestColumnReader, testUnionLongSkip) { (buffer2, ARRAY_SIZE(buffer2)))); // create the row type - std::vector<Type*> childrenTypes; - childrenTypes.push_back(createPrimitiveType(LONG).release()); - childrenTypes.push_back(createPrimitiveType(INT).release()); + std::unique_ptr<Type> unionType = createUnionType(); + unionType->addUnionChild(createPrimitiveType(LONG)); + unionType->addUnionChild(createPrimitiveType(INT)); std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createUnionType(childrenTypes), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", std::move(unionType)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -4326,7 +4280,7 @@ TEST(TestColumnReader, testUnionWithManyVariants) { // for variant in range(0, 130): // [variant & 0x3f, (variant & 0x3f) + 1, (variant & 0x3f) + 2] unsigned char buffer[3 * 130]; - for(int variant = 0; variant < 130; ++variant) { + for(uint variant = 0; variant < 130; ++variant) { buffer[3 * variant] = 0x00; buffer[3 * variant + 1] = 0x01; buffer[3 * variant + 2] = static_cast<unsigned char>((variant * 2) & 0x7f); @@ -4337,13 +4291,12 @@ TEST(TestColumnReader, testUnionWithManyVariants) { } // create the row type - std::vector<Type*> childrenTypes; + std::unique_ptr<Type> unionType = createUnionType(); for(size_t variant=0; variant < 130; ++variant) { - childrenTypes.push_back(createPrimitiveType(LONG).release()); + unionType->addUnionChild(createPrimitiveType(LONG)); } std::unique_ptr<Type> rowType = createStructType(); - rowType->addStructField(createUnionType(childrenTypes), "col0"); - rowType->assignIds(0); + rowType->addStructField("col0", std::move(unionType)); std::unique_ptr<ColumnReader> reader = buildReader(*rowType, streams); @@ -4366,7 +4319,7 @@ TEST(TestColumnReader, testUnionWithManyVariants) { for (size_t i = 0; i < batch.numElements; ++i) { EXPECT_EQ(i, unions->tags[i]); EXPECT_EQ(0, unions->offsets[i]); - EXPECT_EQ(i & 0x3f, + EXPECT_EQ(i & 0x3f, dynamic_cast<LongVectorBatch*>(unions->children[unions->tags[i]]) ->data[unions->offsets[i]]); } http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/c++/test/TestType.cc ---------------------------------------------------------------------- diff --git a/c++/test/TestType.cc b/c++/test/TestType.cc new file mode 100644 index 0000000..3c595d0 --- /dev/null +++ b/c++/test/TestType.cc @@ -0,0 +1,277 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Adaptor.hh" +#include "OrcTest.hh" +#include "orc/Type.hh" +#include "wrap/gtest-wrapper.h" + +#include "TypeImpl.hh" + +namespace orc { + + uint64_t checkIds(const Type* type, uint64_t next) { + EXPECT_EQ(next, type->getColumnId()) + << "Wrong id for " << type->toString(); + next += 1; + for(uint64_t child = 0; child < type->getSubtypeCount(); ++child) { + next = checkIds(type->getSubtype(child), next) + 1; + } + EXPECT_EQ(next - 1, type->getMaximumColumnId()) + << "Wrong maximum id for " << type->toString(); + return type->getMaximumColumnId(); + } + + TEST(TestType, simple) { + std::unique_ptr<Type> myType = createStructType(); + myType->addStructField("myInt", createPrimitiveType(INT)); + myType->addStructField("myString", createPrimitiveType(STRING)); + myType->addStructField("myFloat", createPrimitiveType(FLOAT)); + myType->addStructField("list", createListType(createPrimitiveType(LONG))); + myType->addStructField("bool", createPrimitiveType(BOOLEAN)); + + EXPECT_EQ(0, myType->getColumnId()); + EXPECT_EQ(6, myType->getMaximumColumnId()); + EXPECT_EQ(5, myType->getSubtypeCount()); + EXPECT_EQ(STRUCT, myType->getKind()); + EXPECT_EQ("struct<myInt:int,myString:string,myFloat:float," + "list:array<bigint>,bool:boolean>", + myType->toString()); + checkIds(myType.get(), 0); + + const Type* child = myType->getSubtype(0); + EXPECT_EQ(1, child->getColumnId()); + EXPECT_EQ(1, child->getMaximumColumnId()); + EXPECT_EQ(INT, child->getKind()); + EXPECT_EQ(0, child->getSubtypeCount()); + + child = myType->getSubtype(1); + EXPECT_EQ(2, child->getColumnId()); + EXPECT_EQ(2, child->getMaximumColumnId()); + EXPECT_EQ(STRING, child->getKind()); + EXPECT_EQ(0, child->getSubtypeCount()); + + child = myType->getSubtype(2); + EXPECT_EQ(3, child->getColumnId()); + EXPECT_EQ(3, child->getMaximumColumnId()); + EXPECT_EQ(FLOAT, child->getKind()); + EXPECT_EQ(0, child->getSubtypeCount()); + + child = myType->getSubtype(3); + EXPECT_EQ(4, child->getColumnId()); + EXPECT_EQ(5, child->getMaximumColumnId()); + EXPECT_EQ(LIST, child->getKind()); + EXPECT_EQ(1, child->getSubtypeCount()); + EXPECT_EQ("array<bigint>", child->toString()); + + child = child->getSubtype(0); + EXPECT_EQ(5, child->getColumnId()); + EXPECT_EQ(5, child->getMaximumColumnId()); + EXPECT_EQ(LONG, child->getKind()); + EXPECT_EQ(0, child->getSubtypeCount()); + + child = myType->getSubtype(4); + EXPECT_EQ(6, child->getColumnId()); + EXPECT_EQ(6, child->getMaximumColumnId()); + EXPECT_EQ(BOOLEAN, child->getKind()); + EXPECT_EQ(0, child->getSubtypeCount()); + } + + TEST(TestType, nested) { + std::unique_ptr<Type> myType = createStructType(); + { + std::unique_ptr<Type> innerStruct = createStructType(); + innerStruct->addStructField("col0", createPrimitiveType(INT)); + + std::unique_ptr<Type> unionType = createUnionType(); + unionType->addUnionChild(std::move(innerStruct)); + unionType->addUnionChild(createPrimitiveType(STRING)); + + myType->addStructField("myList", + createListType + (createMapType(createPrimitiveType(STRING), + std::move(unionType)))); + } + + // get a pointer to the bottom type + const Type* listType = myType->getSubtype(0); + const Type* mapType = listType->getSubtype(0); + const Type* unionType = mapType->getSubtype(1); + const Type* structType = unionType->getSubtype(0); + const Type* intType = structType->getSubtype(0); + + // calculate the id of the child to make sure that we climb correctly + EXPECT_EQ(6, intType->getColumnId()); + EXPECT_EQ(6, intType->getMaximumColumnId()); + EXPECT_EQ("int", intType->toString()); + + checkIds(myType.get(), 0); + + EXPECT_EQ(5, structType->getColumnId()); + EXPECT_EQ(6, structType->getMaximumColumnId()); + EXPECT_EQ("struct<col0:int>", structType->toString()); + + EXPECT_EQ(4, unionType->getColumnId()); + EXPECT_EQ(7, unionType->getMaximumColumnId()); + EXPECT_EQ("uniontype<struct<col0:int>,string>", unionType->toString()); + + EXPECT_EQ(2, mapType->getColumnId()); + EXPECT_EQ(7, mapType->getMaximumColumnId()); + EXPECT_EQ("map<string,uniontype<struct<col0:int>,string>>", + mapType->toString()); + + EXPECT_EQ(1, listType->getColumnId()); + EXPECT_EQ(7, listType->getMaximumColumnId()); + EXPECT_EQ("array<map<string,uniontype<struct<col0:int>,string>>>", + listType->toString()); + + EXPECT_EQ(0, myType->getColumnId()); + EXPECT_EQ(7, myType->getMaximumColumnId()); + EXPECT_EQ("struct<myList:array<map<string,uniontype<struct<col0:int>," + "string>>>>", + myType->toString()); + } + + TEST(TestType, selectedType) { + std::unique_ptr<Type> myType = createStructType(); + myType->addStructField("col0", createPrimitiveType(BYTE)); + myType->addStructField("col1", createPrimitiveType(SHORT)); + myType->addStructField("col2", + createListType(createPrimitiveType(STRING))); + myType->addStructField("col3", + createMapType(createPrimitiveType(FLOAT), + createPrimitiveType(DOUBLE))); + std::unique_ptr<Type> unionType = createUnionType(); + unionType->addUnionChild(createCharType(CHAR, 100)); + unionType->addUnionChild(createCharType(VARCHAR, 200)); + myType->addStructField("col4", std::move(unionType)); + myType->addStructField("col5", createPrimitiveType(INT)); + myType->addStructField("col6", createPrimitiveType(LONG)); + myType->addStructField("col7", createDecimalType(10, 2)); + + checkIds(myType.get(), 0); + EXPECT_EQ("struct<col0:tinyint,col1:smallint,col2:array<string>," + "col3:map<float,double>,col4:uniontype<char(100),varchar(200)>," + "col5:int,col6:bigint,col7:decimal(10,2)>", myType->toString()); + EXPECT_EQ(0, myType->getColumnId()); + EXPECT_EQ(13, myType->getMaximumColumnId()); + + std::vector<bool> selected(14); + selected[0] = true; + selected[2] = true; + std::unique_ptr<Type> cutType = buildSelectedType(myType.get(), + selected); + EXPECT_EQ("struct<col1:smallint>", cutType->toString()); + EXPECT_EQ(0, cutType->getColumnId()); + EXPECT_EQ(13, cutType->getMaximumColumnId()); + EXPECT_EQ(2, cutType->getSubtype(0)->getColumnId()); + + selected.assign(14, true); + cutType = buildSelectedType(myType.get(), selected); + EXPECT_EQ("struct<col0:tinyint,col1:smallint,col2:array<string>," + "col3:map<float,double>,col4:uniontype<char(100),varchar(200)>," + "col5:int,col6:bigint,col7:decimal(10,2)>", cutType->toString()); + EXPECT_EQ(0, cutType->getColumnId()); + EXPECT_EQ(13, cutType->getMaximumColumnId()); + + selected.assign(14, false); + selected[0] = true; + selected[8] = true; + selected[10] = true; + cutType = buildSelectedType(myType.get(), selected); + EXPECT_EQ("struct<col4:uniontype<varchar(200)>>", cutType->toString()); + EXPECT_EQ(0, cutType->getColumnId()); + EXPECT_EQ(13, cutType->getMaximumColumnId()); + EXPECT_EQ(8, cutType->getSubtype(0)->getColumnId()); + EXPECT_EQ(10, cutType->getSubtype(0)->getMaximumColumnId()); + EXPECT_EQ(10, cutType->getSubtype(0)->getSubtype(0)->getColumnId()); + + selected.assign(14, false); + selected[0] = true; + selected[8] = true; + cutType = buildSelectedType(myType.get(), selected); + EXPECT_EQ("struct<col4:uniontype<>>", cutType->toString()); + + selected.assign(14, false); + selected[0] = true; + cutType = buildSelectedType(myType.get(), selected); + EXPECT_EQ("struct<>", cutType->toString()); + + selected.assign(14, false); + selected[0] = true; + selected[3] = true; + selected[4] = true; + cutType = buildSelectedType(myType.get(), selected); + EXPECT_EQ("struct<col2:array<string>>", cutType->toString()); + + selected.assign(14, false); + selected[0] = true; + selected[3] = true; + cutType = buildSelectedType(myType.get(), selected); + EXPECT_EQ("struct<col2:array<void>>", cutType->toString()); + EXPECT_EQ(3, cutType->getSubtype(0)->getColumnId()); + EXPECT_EQ(4, cutType->getSubtype(0)->getMaximumColumnId()); + + selected.assign(14, false); + selected[0] = true; + selected[5] = true; + selected[6] = true; + selected[7] = true; + cutType = buildSelectedType(myType.get(), selected); + EXPECT_EQ("struct<col3:map<float,double>>", cutType->toString()); + EXPECT_EQ(5, cutType->getSubtype(0)->getColumnId()); + EXPECT_EQ(7, cutType->getSubtype(0)->getMaximumColumnId()); + + selected.assign(14, false); + selected[0] = true; + selected[5] = true; + cutType = buildSelectedType(myType.get(), selected); + EXPECT_EQ("struct<col3:map<void,void>>", cutType->toString()); + EXPECT_EQ(5, cutType->getSubtype(0)->getColumnId()); + EXPECT_EQ(7, cutType->getSubtype(0)->getMaximumColumnId()); + + selected.assign(14, false); + selected[0] = true; + selected[5] = true; + selected[6] = true; + cutType = buildSelectedType(myType.get(), selected); + EXPECT_EQ("struct<col3:map<float,void>>", cutType->toString()); + EXPECT_EQ(5, cutType->getSubtype(0)->getColumnId()); + EXPECT_EQ(7, cutType->getSubtype(0)->getMaximumColumnId()); + + selected.assign(14, false); + selected[0] = true; + selected[5] = true; + selected[7] = true; + cutType = buildSelectedType(myType.get(), selected); + EXPECT_EQ("struct<col3:map<void,double>>", cutType->toString()); + EXPECT_EQ(5, cutType->getSubtype(0)->getColumnId()); + EXPECT_EQ(7, cutType->getSubtype(0)->getMaximumColumnId()); + + selected.assign(14, false); + selected[0] = true; + selected[1] = true; + selected[13] = true; + cutType = buildSelectedType(myType.get(), selected); + EXPECT_EQ("struct<col0:tinyint,col7:decimal(10,2)>", cutType->toString()); + EXPECT_EQ(1, cutType->getSubtype(0)->getColumnId()); + EXPECT_EQ(1, cutType->getSubtype(0)->getMaximumColumnId()); + EXPECT_EQ(13, cutType->getSubtype(1)->getColumnId()); + EXPECT_EQ(13, cutType->getSubtype(1)->getMaximumColumnId()); + } +} http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/tools/src/FileContents.cc ---------------------------------------------------------------------- diff --git a/tools/src/FileContents.cc b/tools/src/FileContents.cc index 694fea3..ff7eb72 100644 --- a/tools/src/FileContents.cc +++ b/tools/src/FileContents.cc @@ -32,7 +32,7 @@ void printContents(const char* filename, const orc::ReaderOptions opts) { std::unique_ptr<orc::ColumnVectorBatch> batch = reader->createRowBatch(1000); std::string line; std::unique_ptr<orc::ColumnPrinter> printer = - createColumnPrinter(line, reader->getType()); + createColumnPrinter(line, &reader->getSelectedType()); while (reader->next(*batch)) { printer->reset(*batch); @@ -48,12 +48,36 @@ void printContents(const char* filename, const orc::ReaderOptions opts) { int main(int argc, char* argv[]) { if (argc < 2) { - std::cout << "Usage: file-contents <filename>\n"; + std::cout << "Usage: file-contents <filename> [--columns=1,2,...]\n" + << "Print contents of <filename>.\n" + << "If columns are specified, only these top-level (logical) columns are printed.\n" ; return 1; } try { + const std::string COLUMNS_PREFIX = "--columns="; + std::list<uint64_t> cols; + char* filename = ORC_NULLPTR; + + // Read command-line options + char *param, *value; + for (int i = 1; i < argc; i++) { + if ( (param = std::strstr(argv[i], COLUMNS_PREFIX.c_str())) ) { + value = std::strtok(param+COLUMNS_PREFIX.length(), "," ); + while (value) { + cols.push_back(static_cast<uint64_t>(std::atoi(value))); + value = std::strtok(nullptr, "," ); + } + } else { + filename = argv[i]; + } + } orc::ReaderOptions opts; - printContents(argv[1], opts); + if (cols.size() > 0) { + opts.include(cols); + } + if (filename != ORC_NULLPTR) { + printContents(filename, opts); + } } catch (std::exception& ex) { std::cerr << "Caught exception: " << ex.what() << "\n"; return 1; http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/tools/src/FileMemory.cc ---------------------------------------------------------------------- diff --git a/tools/src/FileMemory.cc b/tools/src/FileMemory.cc index ba8459a..2bfd21a 100644 --- a/tools/src/FileMemory.cc +++ b/tools/src/FileMemory.cc @@ -70,7 +70,7 @@ int main(int argc, char* argv[]) { char* filename = ORC_NULLPTR; // Default parameters - std::list<int64_t> cols; + std::list<uint64_t> cols; uint32_t batchSize = 1000; // Read command-line options @@ -79,7 +79,7 @@ int main(int argc, char* argv[]) { if ( (param = std::strstr(argv[i], COLUMNS_PREFIX.c_str())) ) { value = std::strtok(param+COLUMNS_PREFIX.length(), "," ); while (value) { - cols.push_back(std::atoi(value)); + cols.push_back(static_cast<uint64_t>(std::atoi(value))); value = std::strtok(nullptr, "," ); } } else if ( (param=strstr(argv[i], BATCH_PREFIX.c_str())) ) { http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/tools/src/FileScan.cc ---------------------------------------------------------------------- diff --git a/tools/src/FileScan.cc b/tools/src/FileScan.cc index cbb5980..4683847 100644 --- a/tools/src/FileScan.cc +++ b/tools/src/FileScan.cc @@ -31,10 +31,6 @@ int main(int argc, char* argv[]) { } orc::ReaderOptions opts; - std::list<int64_t> cols; - cols.push_back(0); - opts.include(cols); - std::unique_ptr<orc::Reader> reader; try{ reader = orc::createReader(orc::readLocalFile(std::string(argv[1])), opts); http://git-wip-us.apache.org/repos/asf/orc/blob/3945f066/tools/test/TestReader.cc ---------------------------------------------------------------------- diff --git a/tools/test/TestReader.cc b/tools/test/TestReader.cc index 0d337ca..7862eff 100644 --- a/tools/test/TestReader.cc +++ b/tools/test/TestReader.cc @@ -140,7 +140,7 @@ namespace orc { std::unique_ptr<ColumnVectorBatch> batch = reader->createRowBatch(1024); std::string line; std::unique_ptr<orc::ColumnPrinter> printer = - orc::createColumnPrinter(line, reader->getType()); + orc::createColumnPrinter(line, &reader->getSelectedType()); GzipTextReader expected(getJsonFilename()); std::string expectedLine; while (reader->next(*batch)) { @@ -511,8 +511,8 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest, TEST(Reader, columnSelectionTest) { ReaderOptions opts; - std::list<int64_t> includes; - for(int i=1; i < 10; i += 2) { + std::list<uint64_t> includes; + for(uint64_t i=0; i < 9; i += 2) { includes.push_back(i); } opts.include(includes); @@ -545,17 +545,17 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest, EXPECT_EQ("_col6", rootType.getFieldName(6)); EXPECT_EQ("_col7", rootType.getFieldName(7)); EXPECT_EQ("_col8", rootType.getFieldName(8)); - EXPECT_EQ(INT, rootType.getSubtype(0).getKind()); - EXPECT_EQ(STRING, rootType.getSubtype(1).getKind()); - EXPECT_EQ(STRING, rootType.getSubtype(2).getKind()); - EXPECT_EQ(STRING, rootType.getSubtype(3).getKind()); - EXPECT_EQ(INT, rootType.getSubtype(4).getKind()); - EXPECT_EQ(STRING, rootType.getSubtype(5).getKind()); - EXPECT_EQ(INT, rootType.getSubtype(6).getKind()); - EXPECT_EQ(INT, rootType.getSubtype(7).getKind()); - EXPECT_EQ(INT, rootType.getSubtype(8).getKind()); + EXPECT_EQ(INT, rootType.getSubtype(0)->getKind()); + EXPECT_EQ(STRING, rootType.getSubtype(1)->getKind()); + EXPECT_EQ(STRING, rootType.getSubtype(2)->getKind()); + EXPECT_EQ(STRING, rootType.getSubtype(3)->getKind()); + EXPECT_EQ(INT, rootType.getSubtype(4)->getKind()); + EXPECT_EQ(STRING, rootType.getSubtype(5)->getKind()); + EXPECT_EQ(INT, rootType.getSubtype(6)->getKind()); + EXPECT_EQ(INT, rootType.getSubtype(7)->getKind()); + EXPECT_EQ(INT, rootType.getSubtype(8)->getKind()); for(unsigned int i=0; i < 9; ++i) { - EXPECT_EQ(i + 1, rootType.getSubtype(i).getColumnId()) + EXPECT_EQ(i + 1, rootType.getSubtype(i)->getColumnId()) << "fail on " << i; } @@ -637,6 +637,11 @@ INSTANTIATE_TEST_CASE_P(TestReader1900, MatchTest, StructVectorBatch *fullStructBatch = dynamic_cast<StructVectorBatch*>(fullBatch.get()); ASSERT_TRUE(fullStructBatch != nullptr); + std::cout << "OOM fullBatch " << reinterpret_cast<uint64_t>(fullStructBatch) + << "\n"; + std::cout << "OOM fields.size() " + << fullStructBatch->fields.size() + << "\n"; LongVectorBatch* fullLongVector = dynamic_cast<LongVectorBatch*>(fullStructBatch->fields[0]); ASSERT_TRUE(fullLongVector != nullptr); @@ -911,11 +916,8 @@ TEST(Reader, selectColumns) { orc::ReaderOptions opts; std::ostringstream filename; filename << exampleDirectory << "/TestOrcFile.testSeek.orc"; - std::list<int64_t> cols; // All columns - cols.push_back(0); - opts.include(cols); std::unique_ptr<orc::Reader> reader = orc::createReader(orc::readLocalFile(filename.str()), opts); std::vector<bool> c = reader->getSelectedColumns(); @@ -923,10 +925,30 @@ TEST(Reader, selectColumns) { for (unsigned int i=0; i < c.size(); i++) { EXPECT_TRUE(c[i]); } + std::unique_ptr<orc::ColumnVectorBatch> batch = reader->createRowBatch(1); + std::string line; + std::unique_ptr<orc::ColumnPrinter> printer = + createColumnPrinter(line, &reader->getSelectedType()); + reader->next(*batch); + printer->reset(*batch); + printer->printRow(0); + std::ostringstream expected; + expected << "{\"boolean1\": true, \"byte1\": -76, " + << "\"short1\": 21684, \"int1\": -941468492, " + << "\"long1\": -6863419716327549772, \"float1\": 0.7762409, " + << "\"double1\": 0.77624090391187, \"bytes1\": [123, 108, 207, 27, 93, " + << "157, 139, 233, 181, 90, 14, 60, 34, 120, 26, 119, 231, 50, 155, 121], " + << "\"string1\": \"887336a7\", \"middle\": {\"list\": [{\"int1\": " + << "-941468492, \"string1\": \"887336a7\"}, {\"int1\": -1598014431, " + << "\"string1\": \"ba419d35-x\"}]}, \"list\": [], \"map\": [{\"key\": " + << "\"ba419d35-x\", \"value\": {\"int1\": -1598014431, \"string1\": " + << "\"ba419d35-x\"}}, {\"key\": \"887336a7\", \"value\": {\"int1\": " + << "-941468492, \"string1\": \"887336a7\"}}]}"; + EXPECT_EQ(expected.str(), line); // Int column #2 - cols.clear(); - cols.push_back(2); + std::list<uint64_t> cols; + cols.push_back(1); opts.include(cols); reader = orc::createReader(orc::readLocalFile(filename.str()), opts); c = reader->getSelectedColumns(); @@ -936,10 +958,19 @@ TEST(Reader, selectColumns) { else EXPECT_TRUE(!c[i]); } + batch = reader->createRowBatch(1); + line.clear(); + printer = createColumnPrinter(line, &reader->getSelectedType()); + reader->next(*batch); + printer->reset(*batch); + printer->printRow(0); + std::string expectedInt("{\"byte1\": -76}"); + EXPECT_EQ(expectedInt, line); + // Struct column #10 cols.clear(); - cols.push_back(10); + cols.push_back(9); opts.include(cols); reader = orc::createReader(orc::readLocalFile(filename.str()), opts); c = reader->getSelectedColumns(); @@ -949,10 +980,21 @@ TEST(Reader, selectColumns) { else EXPECT_TRUE(!c[i]); } + batch = reader->createRowBatch(1); + line.clear(); + printer = createColumnPrinter(line, &reader->getSelectedType()); + reader->next(*batch); + printer->reset(*batch); + printer->printRow(0); + std::ostringstream expectedStruct; + expectedStruct << "{\"middle\": {\"list\": " + << "[{\"int1\": -941468492, \"string1\": \"887336a7\"}, " + << "{\"int1\": -1598014431, \"string1\": \"ba419d35-x\"}]}}"; + EXPECT_EQ(expectedStruct.str(), line); // Array column #11 cols.clear(); - cols.push_back(11); + cols.push_back(10); opts.include(cols); reader = orc::createReader(orc::readLocalFile(filename.str()), opts); c = reader->getSelectedColumns(); @@ -962,10 +1004,18 @@ TEST(Reader, selectColumns) { else EXPECT_TRUE(!c[i]); } + batch = reader->createRowBatch(1); + line.clear(); + printer = createColumnPrinter(line, &reader->getSelectedType()); + reader->next(*batch); + printer->reset(*batch); + printer->printRow(0); + std::string expectedArray("{\"list\": []}"); + EXPECT_EQ(expectedArray, line); // Map column #12 cols.clear(); - cols.push_back(12); + cols.push_back(11); opts.include(cols); reader = orc::createReader(orc::readLocalFile(filename.str()), opts); c = reader->getSelectedColumns(); @@ -975,6 +1025,18 @@ TEST(Reader, selectColumns) { else EXPECT_TRUE(!c[i]); } + batch = reader->createRowBatch(1); + line.clear(); + printer = createColumnPrinter(line, &reader->getSelectedType()); + reader->next(*batch); + printer->reset(*batch); + printer->printRow(0); + std::ostringstream expectedMap; + expectedMap << "{\"map\": [{\"key\": \"ba419d35-x\", \"value\": {\"int1\":" + << " -1598014431, \"string1\": \"ba419d35-x\"}}, {\"key\": " + << "\"887336a7\", \"value\": {\"int1\": -941468492, \"string1\": " + << "\"887336a7\"}}]}"; + EXPECT_EQ(expectedMap.str(), line); } TEST(Reader, memoryUse) { @@ -983,10 +1045,10 @@ TEST(Reader, memoryUse) { std::unique_ptr<orc::Reader> reader; std::unique_ptr<orc::ColumnVectorBatch> batch; orc::ReaderOptions opts; - std::list<int64_t> cols; + std::list<uint64_t> cols; // Int column - cols.push_back(2); + cols.push_back(1); opts.include(cols); reader = orc::createReader(orc::readLocalFile(filename.str()), opts); EXPECT_EQ(483517, reader->getMemoryUse()); @@ -998,7 +1060,7 @@ TEST(Reader, memoryUse) { // Binary column cols.clear(); - cols.push_back(8); + cols.push_back(7); opts.include(cols); reader = orc::createReader(orc::readLocalFile(filename.str()), opts); EXPECT_EQ(835906, reader->getMemoryUse()); @@ -1008,7 +1070,7 @@ TEST(Reader, memoryUse) { // String column cols.clear(); - cols.push_back(9); + cols.push_back(8); opts.include(cols); reader = orc::createReader(orc::readLocalFile(filename.str()), opts); EXPECT_EQ(901442, reader->getMemoryUse()); @@ -1018,7 +1080,7 @@ TEST(Reader, memoryUse) { // Struct column (with a List subcolumn) cols.clear(); - cols.push_back(10); + cols.push_back(9); opts.include(cols); reader = orc::createReader(orc::readLocalFile(filename.str()), opts); EXPECT_EQ(1294658, reader->getMemoryUse()); @@ -1028,7 +1090,7 @@ TEST(Reader, memoryUse) { // List column cols.clear(); - cols.push_back(11); + cols.push_back(10); opts.include(cols); reader = orc::createReader(orc::readLocalFile(filename.str()), opts); EXPECT_EQ(1229122, reader->getMemoryUse()); @@ -1038,7 +1100,7 @@ TEST(Reader, memoryUse) { // Map column cols.clear(); - cols.push_back(12); + cols.push_back(11); opts.include(cols); reader = orc::createReader(orc::readLocalFile(filename.str()), opts); EXPECT_EQ(1491266, reader->getMemoryUse()); @@ -1048,7 +1110,9 @@ TEST(Reader, memoryUse) { // All columns cols.clear(); - cols.push_back(0); + for(uint64_t c=0; c < 12; ++c) { + cols.push_back(c); + } opts.include(cols); reader = orc::createReader(orc::readLocalFile(filename.str()), opts); EXPECT_EQ(4112706, reader->getMemoryUse());
