Dandandan commented on code in PR #9236:
URL: https://github.com/apache/arrow-rs/pull/9236#discussion_r2713723591


##########
parquet/src/arrow/array_reader/byte_view_array.rs:
##########
@@ -469,32 +480,32 @@ impl ByteViewArrayDecoderDictionary {
         // then the base_buffer_idx is 5 - 2 = 3
         let base_buffer_idx = output.buffers.len() as u32 - dict.buffers.len() 
as u32;
 
-        self.decoder.read(len, |keys| {
-            for k in keys {
-                let view = dict
-                    .views
-                    .get(*k as usize)
-                    .ok_or_else(|| general_err!("invalid key={} for 
dictionary", *k))?;
-                let len = *view as u32;
-                if len <= 12 {
-                    // directly append the view if it is inlined
-                    // Safety: the view is from the dictionary, so it is valid
-                    unsafe {
-                        output.append_raw_view_unchecked(view);
+        let mut error = None;
+        let read = self.decoder.read(len, |keys| {
+            output
+                .views
+                .extend(keys.iter().map(|k| match dict.views.get(*k as usize) {
+                    Some(&view) => {
+                        let len = view as u32;
+                        if len <= 12 {
+                            view
+                        } else {
+                            let mut view = ByteView::from(view);
+                            view.buffer_index += base_buffer_idx;
+                            view.into()
+                        }
                     }
-                } else {
-                    // correct the buffer index and append the view
-                    let mut view = ByteView::from(*view);
-                    view.buffer_index += base_buffer_idx;
-                    // Safety: the view is from the dictionary,
-                    // we corrected the index value to point it to output 
buffer, so it is valid
-                    unsafe {
-                        output.append_raw_view_unchecked(&view.into());
+                    None => {
+                        error = Some(general_err!("invalid key={} for 
dictionary", *k));

Review Comment:
   1. Yeah this one is by design and gives us better performance for the happy 
path. Also the buffer will contain "0" views for each invalid value instead of 
returning directly.
   2. Good point, I guess that should be possible without performance penalty



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to