ivanvankov commented on a change in pull request #9647:
URL: https://github.com/apache/arrow/pull/9647#discussion_r589046553



##########
File path: rust/arrow/src/array/array_binary.rs
##########
@@ -364,62 +365,135 @@ impl FixedSizeBinaryArray {
         self.data.buffers()[0].clone()
     }
 
-    #[inline]
-    fn value_offset_at(&self, i: usize) -> i32 {
-        self.length * i as i32
-    }
-}
+    /// Create an array from an iterable argument of sparse byte slices.
+    ///
+    /// # Errors
+    ///
+    /// Returns error if argument has length zero, or sizes of nested slices 
don't match.
+    pub fn try_from_sparse_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
+    where
+        T: Iterator<Item = Option<U>>,
+        U: AsRef<[u8]>,
+    {
+        let mut len = 0;
+        let mut size = 0;
+        let mut byte = 0;
+        let mut null_buf = MutableBuffer::from_len_zeroed(0);
+        let mut buffer = MutableBuffer::from_len_zeroed(0);
+        let mut prepend = 0;
+        iter.try_for_each(|item| -> Result<(), ArrowError> {
+            if byte == 0 {
+                null_buf.push(0);
+                byte = 8;
+            }
+            byte -= 1;
+
+            if let Some(sliceble) = item {
+                let slice = sliceble.as_ref();
+                if size != slice.len() {
+                    if size == 0 {
+                        size = slice.len();
+                    } else {
+                        return Err(ArrowError::InvalidArgumentError(format!(
+                            "Nested array size mismatch: one is {}, and the 
other is {}",
+                            size,
+                            slice.len()
+                        )));
+                    }
+                }
+                bit_util::set_bit(null_buf.as_slice_mut(), len);
+                buffer.extend_from_slice(slice);
+            } else {
+                buffer.extend_zeros(size);
+                if size == 0 {
+                    prepend += 1;
+                }
+            }
 
-impl From<Vec<Vec<u8>>> for FixedSizeBinaryArray {
-    fn from(data: Vec<Vec<u8>>) -> Self {
-        let len = data.len();
-        assert!(len > 0);
-        let size = data[0].len();
-        assert!(data.iter().all(|item| item.len() == size));
-        let data = data.into_iter().flatten().collect::<Vec<_>>();
-        let array_data = ArrayData::builder(DataType::FixedSizeBinary(size as 
i32))
-            .len(len)
-            .add_buffer(Buffer::from(&data))
-            .build();
-        FixedSizeBinaryArray::from(array_data)
-    }
-}
+            len += 1;
 
-impl From<Vec<Option<Vec<u8>>>> for FixedSizeBinaryArray {
-    fn from(data: Vec<Option<Vec<u8>>>) -> Self {
-        let len = data.len();
-        assert!(len > 0);
-        // try to estimate the size. This may not be possible no entry is 
valid => panic
-        let size = data.iter().filter_map(|e| 
e.as_ref()).next().unwrap().len();
-        assert!(data
-            .iter()
-            .filter_map(|e| e.as_ref())
-            .all(|item| item.len() == size));
-
-        let num_bytes = bit_util::ceil(len, 8);
-        let mut null_buf = MutableBuffer::from_len_zeroed(num_bytes);
-        let null_slice = null_buf.as_slice_mut();
-
-        data.iter().enumerate().for_each(|(i, entry)| {
-            if entry.is_some() {
-                bit_util::set_bit(null_slice, i);
+            Ok(())
+        })?;
+
+        if len == 0 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Input iterable argument has no data".to_owned(),
+            ));
+        }
+
+        if prepend > 0 {
+            let extend_size = size * prepend;
+            let copy_size = buffer.len();
+            buffer.resize(copy_size + extend_size, 0);
+            unsafe {
+                let src = buffer.as_ptr();
+                let dst = buffer.as_mut_ptr().add(extend_size);
+                std::ptr::copy(src, dst, copy_size);
+                buffer.as_mut_ptr().write_bytes(0, extend_size);
             }
-        });
+        }
 
-        let data = data
-            .into_iter()
-            .flat_map(|e| e.unwrap_or_else(|| vec![0; size]))
-            .collect::<Vec<_>>();
-        let data = ArrayData::new(
+        let array_data = ArrayData::new(
             DataType::FixedSizeBinary(size as i32),
             len,
             None,
             Some(null_buf.into()),
             0,
-            vec![Buffer::from(&data)],
+            vec![buffer.into()],
             vec![],
         );
-        FixedSizeBinaryArray::from(Arc::new(data))
+        Ok(FixedSizeBinaryArray::from(Arc::new(array_data)))
+    }
+
+    /// Create an array from an iterable argument of byte slices.
+    ///
+    /// # Errors
+    ///
+    /// Returns error if argument has length zero, or sizes of nested slices 
don't match.
+    pub fn try_from_iter<T, U>(mut iter: T) -> Result<Self, ArrowError>
+    where
+        T: Iterator<Item = U>,
+        U: AsRef<[u8]>,
+    {
+        let mut len = 0;
+        let mut size = 0;
+        let mut buffer = MutableBuffer::from_len_zeroed(0);
+        iter.try_for_each(|item| -> Result<(), ArrowError> {
+            let slice = item.as_ref();
+            if size != slice.len() {
+                if size == 0 {
+                    size = slice.len();
+                } else {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "Nested array size mismatch: one is {}, and the other 
is {}",
+                        size,
+                        slice.len()
+                    )));
+                }
+            }
+            buffer.extend_from_slice(slice);
+
+            len += 1;
+
+            Ok(())
+        })?;
+
+        if len == 0 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Input iterable argument has no data".to_owned(),
+            ));
+        }
+
+        let array_data = ArrayData::builder(DataType::FixedSizeBinary(size as 
i32))
+            .len(len)

Review comment:
       Yes, it will be equal to 4. But in the previous implementation, for 
example `impl From<Vec<Vec<u8>>> for FixedSizeBinaryArray`, when we pass for 
example a vec with 4 items to `from` then it will put 4 as len in the array 
builder. Isn't it the same there?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to