ZhangXin created ORC-1304:
-----------------------------
Summary: [C++] throw ParseError when using SearchArgument with
nested struct
Key: ORC-1304
URL: https://issues.apache.org/jira/browse/ORC-1304
Project: ORC
Issue Type: Bug
Reporter: ZhangXin
[link title|https://github.com/apache/orc/issues/1296]
code example:
{code:c++}
WriterOptions options;
auto stream = writeLocalFile("orc_file_test");
MemoryPool* pool = getDefaultPool();
std::unique_ptr<Type> type(Type::buildTypeFromString(
"struct<col0:struct<col1:int>,col2:struct<col3:int>>"));
size_t num = 50000;
std::unique_ptr<Writer> writer = createWriter(*type, stream.get(), options);
std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(num);
StructVectorBatch* structBatch =
dynamic_cast<StructVectorBatch*>(batch.get());
StructVectorBatch* structBatch2 =
dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
LongVectorBatch* intBatch =
dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);
StructVectorBatch* structBatch3 =
dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);
LongVectorBatch* intBatch2 =
dynamic_cast<LongVectorBatch*>(structBatch3->fields[0]);
structBatch->numElements = num;
structBatch2->numElements = num;
structBatch3->numElements = num;
structBatch3->hasNulls = true;
for (int64_t i = 0; i < num; ++i) {
intBatch->data.data()[i] = rand() % 150000;
intBatch->notNull[i] = 1;
intBatch2->notNull[i] = 0;
intBatch2->hasNulls = true;
structBatch3->notNull[i] = 0;
}
intBatch->hasNulls = false;
writer->add(*batch);
writer->close();
ReaderOptions readOptions;
readOptions.setMemoryPool(*getDefaultPool());
auto reader = createReader(readLocalFile("orc_file_test"), readOptions);
orc::RowReaderOptions rowOptions;
rowOptions.searchArgument(
SearchArgumentFactory::newBuilder()
->startAnd()
.equals(2, PredicateDataType::LONG, Literal((int64_t)5))
.end()
.build());
std::unique_ptr<RowReader> rowReader = reader->createRowReader(rowOptions);
batch = rowReader->createRowBatch(num);
structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
structBatch2 = dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
intBatch = dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);
structBatch3 = dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);
while (rowReader->next(*batch)) {
for (size_t i = 0; i < batch->numElements; i++) {
}
}
{code}
stack trace
{code:java}
terminate called after throwing an instance of 'orc::ParseError'
what(): bad read in nextBuffer
*** Aborted at 1666816640 (Unix time, try 'date -d @1666816640') ***
*** Signal 6 (SIGABRT) (0x2035c0002b7ad) received by PID 178093 (pthread TID
0x7ffb12545a80) (linux TID 178093) (maybe from PID 178093, UID 131932) (code:
-6), stack trace: ***
@ 0000000000000000 (unknown)
@ 000000000009c9d3 __GI___pthread_kill
@ 00000000000444ec __GI_raise
@ 000000000002c432 __GI_abort
@ 00000000000a3fd4 __gnu_cxx::__verbose_terminate_handler()
@ 00000000000a1b39 __cxxabiv1::__terminate(void (*)())
@ 00000000000a1ba4 std::terminate()
@ 00000000000a1e6f __cxa_throw
@ 0000000001efcd55 __cxa_throw
@ 00000000075b676c orc::BooleanRleDecoderImpl::seek(orc::PositionProvider&)
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ByteRLE.cc:526
@ 00000000075af711
orc::IntegerColumnReader::seekToRowGroup(std::unordered_map<unsigned long,
orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>,
std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:120
@ 00000000075af67f
orc::StructColumnReader::seekToRowGroup(std::unordered_map<unsigned long,
orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>,
std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:965
@ 00000000075af67f
orc::StructColumnReader::seekToRowGroup(std::unordered_map<unsigned long,
orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>,
std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:965
@ 0000000007598179 orc::RowReaderImpl::seekToRowGroup(unsigned int)
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:440
@ 000000000759d700 orc::RowReaderImpl::startNextStripe()
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:1037
@ 000000000759daf4 orc::RowReaderImpl::next(orc::ColumnVectorBatch&)
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:1055
@ 0000000002fba9bc main
@ 000000000002c656 __libc_start_call_main
@ 000000000002c717 __libc_start_main_alias_2
@ 0000000002fb2780 _start
{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)