tustvold commented on code in PR #2999:
URL: https://github.com/apache/arrow-rs/pull/2999#discussion_r1014596963


##########
parquet/src/column/reader/decoder.rs:
##########
@@ -323,41 +368,153 @@ impl DefinitionLevelDecoder for ColumnLevelDecoderImpl {
     ) -> Result<(usize, usize)> {
         let mut level_skip = 0;
         let mut value_skip = 0;
-        match self.decoder.as_mut().unwrap() {
-            LevelDecoderInner::Packed(reader, bit_width) => {
-                for _ in 0..num_levels {
-                    // Values are delimited by max_def_level
-                    if max_def_level
-                        == reader
-                            .get_value::<i16>(*bit_width as usize)
-                            .expect("Not enough values in Packed 
ColumnLevelDecoderImpl.")
-                    {
-                        value_skip += 1;
-                    }
-                    level_skip += 1;
-                }
-            }
-            LevelDecoderInner::Rle(reader) => {
-                for _ in 0..num_levels {
-                    if let Some(level) = reader
-                        .get::<i16>()
-                        .expect("Not enough values in Rle 
ColumnLevelDecoderImpl.")
-                    {
-                        // Values are delimited by max_def_level
-                        if level == max_def_level {
-                            value_skip += 1;
-                        }
-                    }
-                    level_skip += 1;
+        while level_skip < num_levels {
+            let remaining_levels = num_levels - level_skip;
+
+            if self.buffer.is_empty() {
+                // Only read number of needed values
+                self.read_to_buffer(remaining_levels.min(SKIP_BUFFER_SIZE))?;
+                if self.buffer.is_empty() {
+                    // Reached end of page
+                    break;
                 }
             }
+            let to_read = self.buffer.len().min(remaining_levels);
+
+            level_skip += to_read;
+            value_skip += self.buffer[..to_read]
+                .iter()
+                .filter(|x| **x == max_def_level)
+                .count();
+
+            self.split_off_buffer(to_read)
         }
+
         Ok((value_skip, level_skip))
     }
 }
 
 impl RepetitionLevelDecoder for ColumnLevelDecoderImpl {
-    fn skip_rep_levels(&mut self, _num_records: usize) -> Result<(usize, 
usize)> {
-        Err(nyi_err!("https://github.com/apache/arrow-rs/issues/1792";))
+    fn skip_rep_levels(&mut self, num_records: usize) -> Result<(usize, 
usize)> {
+        let mut level_skip = 0;
+        let mut record_skip = 0;
+
+        loop {
+            if self.buffer.is_empty() {
+                // Read SKIP_BUFFER_SIZE as we don't know how many to read
+                self.read_to_buffer(SKIP_BUFFER_SIZE)?;
+                if self.buffer.is_empty() {
+                    // Reached end of page
+                    break;
+                }
+            }
+
+            let mut to_skip = 0;
+            while to_skip < self.buffer.len() && record_skip != num_records {
+                if self.buffer[to_skip] == 0 {
+                    record_skip += 1;
+                }
+                to_skip += 1;
+            }
+
+            // Find end of record
+            while to_skip < self.buffer.len() && self.buffer[to_skip] != 0 {
+                to_skip += 1;
+            }
+
+            level_skip += to_skip;
+            if to_skip >= self.buffer.len() {

Review Comment:
   Yeah, the hope is that >= helps LLVM elide bound checks that follow. Not 
sure it makes any difference, just habit 😅



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to