ZhangHuiGui opened a new issue, #2201:
URL: https://github.com/apache/orc/issues/2201
## Reproduce codes and orc-data
orc-version: current main, commit-id
`3251a01f56d3f813e4889eb7d75eb2d0d63551f6`
Reproduced codes:
```c
#include <cmath>
#include <iostream>
#include <queue>
#include <list>
#include <orc/OrcFile.hh>
#include <orc/Reader.hh>
#include <orc/Type.hh>
#include <orc/Vector.hh>
#include <string>
#include <vector>
#define BATCH_SIZE 1024
void print_batch(const ORC_UNIQUE_PTR<orc::ColumnVectorBatch>& curr_batch,
const char* msg) {
std::cout << "debug check vector str " << msg << " curr_batch "
<< curr_batch.get() << std::endl;
auto curr_batch_data =
dynamic_cast<orc::StructVectorBatch*>(curr_batch.get());
auto string_batch =
dynamic_cast<orc::StringVectorBatch*>(curr_batch_data->fields[0]);
int64_t total = curr_batch->numElements;
for (int64_t idx = 0; idx < total; idx++) {
int64_t orc_data_len = string_batch->length[idx];
char* orc_data = (char*)string_batch->data[idx];
std::cout << "debug check vector str " << msg << " "
<< std::string(orc_data, orc_data_len) << std::endl;
}
}
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cerr << " invalid args " << std::endl;
return 1;
}
int num_count = std::stoi(argv[1]);
try {
const std::string& filePath = "000000_0";
// reader
auto reader =
orc::createReader(orc::readFile(filePath), orc::ReaderOptions());
auto row_count = reader->getNumberOfRows();
// row_reader
orc::RowReaderOptions rowReaderOptions;
std::list<uint64_t> l;
l.push_back(6);
rowReaderOptions.include(l);
std::unique_ptr<orc::RowReader> rowReader =
reader->createRowReader(rowReaderOptions);
const orc::Type& schema = rowReader->getSelectedType();
// read colum batch to vector
std::vector<ORC_UNIQUE_PTR<orc::ColumnVectorBatch>> batch_pool;
std::unique_ptr<orc::ColumnVectorBatch> batch =
rowReader->createRowBatch(BATCH_SIZE);
batch_pool.reserve(row_count / BATCH_SIZE + 1);
int batch_count = 0;
while (rowReader->next(*batch) && batch_count < num_count) {
batch_pool.push_back(std::move(batch));
if (batch_pool.size() == 441 || batch_pool.size() == 1)
print_batch(batch_pool[0], "inner loop");
batch = rowReader->createRowBatch(BATCH_SIZE);
batch_count++;
}
// BUG access the first batch in vector, all the data is invalid
print_batch(batch_pool[0], "after loop");
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
return 0;
}
```
Execute program with attached orc-data:
`./test_orc 440`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]