I use the follows code to filter table, but always core dump at
scanner_builder->Filter(filter_expression_). Is there a better way to filter
a table? or a Recordbatch?
by the way dataset::ScannerBuilder always core dump when I used it in tfio to
create a tensorflow dataset, It's most likely buggy
// Read file columns and build a table
std::shared_ptr<::arrow::Table> table;
CHECK_ARROW(reader->ReadTable(column_indices_, &table));
// Convert the table to a sequence of batches
auto tr = std::make_shared<arrow::TableBatchReader>(*table.get());
// filter
auto scanner_builder =
arrow::dataset::ScannerBuilder::FromRecordBatchReader(tr);
if (!dataset()->filter_.empty()) {
std::cout << filter_expression_.ToString() << std::endl;
scanner_builder->Filter(filter_expression_);
}
1057445597
[email protected]