tustvold commented on code in PR #2040:
URL: https://github.com/apache/arrow-rs/pull/2040#discussion_r919404802


##########
arrow/src/ipc/writer.rs:
##########
@@ -861,6 +863,141 @@ fn has_validity_bitmap(data_type: &DataType, 
write_options: &IpcWriteOptions) ->
     }
 }
 
+/// Whether to truncate the buffer
+#[inline]
+fn buffer_need_truncate(
+    array_offset: usize,
+    buffer: &Buffer,
+    spec: &BufferSpec,
+    min_length: usize,
+) -> bool {
+    spec != &BufferSpec::AlwaysNull && (array_offset != 0 || min_length < 
buffer.len())
+}
+
+/// Returns byte width for a buffer spec. Only for `BufferSpec::FixedWidth`.
+#[inline]
+fn get_buffer_byte_width(spec: &BufferSpec) -> usize {
+    match spec {
+        BufferSpec::FixedWidth { byte_width } => *byte_width,
+        _ => 0,
+    }
+}
+
+/// Returns the number of total bytes in base binary arrays.
+fn get_total_bytes(array_data: &ArrayData) -> usize {
+    if array_data.is_empty() {
+        return 0;
+    }
+    match array_data.data_type() {
+        DataType::Binary => {
+            let array: BinaryArray = array_data.clone().into();
+            let offsets = array.value_offsets();
+            (offsets[array_data.len()] - offsets[0]) as usize
+        }
+        DataType::LargeBinary => {
+            let array: LargeBinaryArray = array_data.clone().into();
+            let offsets = array.value_offsets();
+            (offsets[array_data.len()] - offsets[0]) as usize
+        }
+        DataType::Utf8 => {
+            let array: StringArray = array_data.clone().into();
+            let offsets = array.value_offsets();
+            (offsets[array_data.len()] - offsets[0]) as usize
+        }
+        DataType::LargeUtf8 => {
+            let array: LargeStringArray = array_data.clone().into();
+            let offsets = array.value_offsets();
+            (offsets[array_data.len()] - offsets[0]) as usize
+        }
+        _ => unreachable!(),
+    }
+}
+
+/// Rebase value offsets for given ArrayData to zero-based.
+fn get_zero_based_value_offsets(array_data: &ArrayData) -> Buffer {
+    match array_data.data_type() {
+        DataType::Binary => {
+            let array: BinaryArray = array_data.clone().into();
+            let offsets = array.value_offsets();
+            let start_offset = offsets[0];
+
+            let mut new_offsets = vec![0_i32; array_data.len() + 1];
+            for (idx, x) in offsets.iter().enumerate() {
+                new_offsets[idx] = x - start_offset;
+            }
+
+            Buffer::from_slice_ref(&new_offsets)

Review Comment:
   Yes, but this is copying twice - once into the Vec and once here. If you 
used BufferBuilder instead of Vec you would eliminate one of these copies



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to