ZhangXin created ORC-1304:
-----------------------------

             Summary: [C++] throw ParseError when using SearchArgument with 
nested struct
                 Key: ORC-1304
                 URL: https://issues.apache.org/jira/browse/ORC-1304
             Project: ORC
          Issue Type: Bug
            Reporter: ZhangXin


[link title|https://github.com/apache/orc/issues/1296]
code example: 
{code:c++}
WriterOptions options;
  auto stream = writeLocalFile("orc_file_test");
  MemoryPool* pool = getDefaultPool();
  std::unique_ptr<Type> type(Type::buildTypeFromString(
      "struct<col0:struct<col1:int>,col2:struct<col3:int>>"));

  size_t num = 50000;
  std::unique_ptr<Writer> writer = createWriter(*type, stream.get(), options);

  std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(num);
  StructVectorBatch* structBatch =
      dynamic_cast<StructVectorBatch*>(batch.get());
  StructVectorBatch* structBatch2 =
      dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
  LongVectorBatch* intBatch =
      dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);

  StructVectorBatch* structBatch3 =
      dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);
  LongVectorBatch* intBatch2 =
      dynamic_cast<LongVectorBatch*>(structBatch3->fields[0]);

  structBatch->numElements = num;
  structBatch2->numElements = num;

  structBatch3->numElements = num;
  structBatch3->hasNulls = true;

  for (int64_t i = 0; i < num; ++i) {
    intBatch->data.data()[i] = rand() % 150000;
    intBatch->notNull[i] = 1;

    intBatch2->notNull[i] = 0;
    intBatch2->hasNulls = true;

    structBatch3->notNull[i] = 0;
  }
  intBatch->hasNulls = false;

  writer->add(*batch);
  writer->close();

  ReaderOptions readOptions;
  readOptions.setMemoryPool(*getDefaultPool());
  auto reader = createReader(readLocalFile("orc_file_test"), readOptions);
  orc::RowReaderOptions rowOptions;
  rowOptions.searchArgument(
      SearchArgumentFactory::newBuilder()
          ->startAnd()
          .equals(2, PredicateDataType::LONG, Literal((int64_t)5))
          .end()
          .build());
  std::unique_ptr<RowReader> rowReader = reader->createRowReader(rowOptions);

  batch = rowReader->createRowBatch(num);
  structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
  structBatch2 = dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
  intBatch = dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);

  structBatch3 = dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);

  while (rowReader->next(*batch)) {
    for (size_t i = 0; i < batch->numElements; i++) {
      
    }
  }
{code}

stack trace

{code:java}
terminate called after throwing an instance of 'orc::ParseError'
  what():  bad read in nextBuffer
*** Aborted at 1666816640 (Unix time, try 'date -d @1666816640') ***
*** Signal 6 (SIGABRT) (0x2035c0002b7ad) received by PID 178093 (pthread TID 
0x7ffb12545a80) (linux TID 178093) (maybe from PID 178093, UID 131932) (code: 
-6), stack trace: ***
    @ 0000000000000000 (unknown)
    @ 000000000009c9d3 __GI___pthread_kill
    @ 00000000000444ec __GI_raise
    @ 000000000002c432 __GI_abort
    @ 00000000000a3fd4 __gnu_cxx::__verbose_terminate_handler()
    @ 00000000000a1b39 __cxxabiv1::__terminate(void (*)())
    @ 00000000000a1ba4 std::terminate()
    @ 00000000000a1e6f __cxa_throw
    @ 0000000001efcd55 __cxa_throw
    @ 00000000075b676c orc::BooleanRleDecoderImpl::seek(orc::PositionProvider&)
                       
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ByteRLE.cc:526
    @ 00000000075af711 
orc::IntegerColumnReader::seekToRowGroup(std::unordered_map<unsigned long, 
orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>, 
std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
                       
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:120
    @ 00000000075af67f 
orc::StructColumnReader::seekToRowGroup(std::unordered_map<unsigned long, 
orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>, 
std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
                       
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:965
    @ 00000000075af67f 
orc::StructColumnReader::seekToRowGroup(std::unordered_map<unsigned long, 
orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>, 
std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
                       
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:965
    @ 0000000007598179 orc::RowReaderImpl::seekToRowGroup(unsigned int)
                       
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:440
    @ 000000000759d700 orc::RowReaderImpl::startNextStripe()
                       
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:1037
    @ 000000000759daf4 orc::RowReaderImpl::next(orc::ColumnVectorBatch&)
                       
/home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:1055
    @ 0000000002fba9bc main
    @ 000000000002c656 __libc_start_call_main
    @ 000000000002c717 __libc_start_main_alias_2
    @ 0000000002fb2780 _start
{code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to