yjshen commented on code in PR #2189:
URL: https://github.com/apache/arrow-datafusion/pull/2189#discussion_r846773771
##########
datafusion/core/src/row/reader.rs:
##########
@@ -18,71 +18,33 @@
//! Accessing row from raw bytes
use crate::error::{DataFusionError, Result};
-#[cfg(feature = "jit")]
-use crate::reg_fn;
-#[cfg(feature = "jit")]
-use crate::row::fn_name;
-use crate::row::{
- all_valid, get_offsets, schema_null_free, supported, NullBitsFormatter,
-};
+use crate::row::layout::get_offsets;
+use crate::row::validity::{all_valid, NullBitsFormatter};
+use crate::row::{row_supported, schema_null_free, MutableRecordBatch};
use arrow::array::*;
use arrow::datatypes::{DataType, Schema};
-use arrow::error::Result as ArrowResult;
use arrow::record_batch::RecordBatch;
use arrow::util::bit_util::{ceil, get_bit_raw};
-#[cfg(feature = "jit")]
-use datafusion_jit::api::Assembler;
-#[cfg(feature = "jit")]
-use datafusion_jit::api::GeneratedFunction;
-#[cfg(feature = "jit")]
-use datafusion_jit::ast::{I64, PTR};
use std::sync::Arc;
/// Read `data` of raw-bytes rows starting at `offsets` out to a record batch
pub fn read_as_batch(
data: &[u8],
schema: Arc<Schema>,
- offsets: Vec<usize>,
+ offsets: &[usize],
) -> Result<RecordBatch> {
let row_num = offsets.len();
let mut output = MutableRecordBatch::new(row_num, schema.clone());
- let mut row = RowReader::new(&schema, data);
+ let mut row = RowReader::new(&schema);
for offset in offsets.iter().take(row_num) {
- row.point_to(*offset);
+ row.point_to(*offset, data);
Review Comment:
The main reason for this change is to support the use case in sort payload
output, where we need to chase compositeIndex pointers and output rows that
belongs to different input batches/pages. So we could therefore point to a
record, append its cell to output record batch buffer, and ponit to the next
record.
Since it's just a field assignment without expensive calculations, I think
it's acceptable here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]