This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new ae8e57c49 fix: resolve Miri UB in null struct field test, re-enable 
Miri on PRs (#3669)
ae8e57c49 is described below

commit ae8e57c49dfed4a0988e26e52243469843d04d60
Author: Andy Grove <[email protected]>
AuthorDate: Wed Mar 11 13:31:29 2026 -0600

    fix: resolve Miri UB in null struct field test, re-enable Miri on PRs 
(#3669)
    
    Add bounds-checking debug_assert in SparkUnsafeRow::get_element_offset
    to catch out-of-bounds accesses early.
    
    Fix test_append_null_struct_field_to_struct_builder which had an
    undersized 8-byte buffer (only null bitset, no field slot) with null
    bit unset, causing an out-of-bounds read in get_long. Use 16 bytes
    with bit 0 set to properly represent a null field.
    
    Re-enable Miri on pull_request trigger now that the upstream cargo
    nightly regression (#3499) is resolved.
---
 .github/workflows/miri.yml                            | 18 ++++++++----------
 native/core/src/execution/shuffle/spark_unsafe/row.rs | 16 +++++++++++++---
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/miri.yml b/.github/workflows/miri.yml
index c9ee6abdd..ea36e1359 100644
--- a/.github/workflows/miri.yml
+++ b/.github/workflows/miri.yml
@@ -28,16 +28,14 @@ on:
       - "native/core/benches/**"
       - "native/spark-expr/benches/**"
       - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
-# Disabled until Miri compatibility is restored
-# https://github.com/apache/datafusion-comet/issues/3499
-#  pull_request:
-#    paths-ignore:
-#      - "doc/**"
-#      - "docs/**"
-#      - "**.md"
-#      - "native/core/benches/**"
-#      - "native/spark-expr/benches/**"
-#      - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
+  pull_request:
+    paths-ignore:
+      - "doc/**"
+      - "docs/**"
+      - "**.md"
+      - "native/core/benches/**"
+      - "native/spark-expr/benches/**"
+      - "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
   # manual trigger
   # 
https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
   workflow_dispatch:
diff --git a/native/core/src/execution/shuffle/spark_unsafe/row.rs 
b/native/core/src/execution/shuffle/spark_unsafe/row.rs
index 6b41afae8..4d937db76 100644
--- a/native/core/src/execution/shuffle/spark_unsafe/row.rs
+++ b/native/core/src/execution/shuffle/spark_unsafe/row.rs
@@ -255,8 +255,15 @@ impl SparkUnsafeObject for SparkUnsafeRow {
         self.row_addr
     }
 
-    fn get_element_offset(&self, index: usize, _: usize) -> *const u8 {
-        (self.row_addr + self.row_bitset_width + (index * 8) as i64) as *const 
u8
+    fn get_element_offset(&self, index: usize, element_size: usize) -> *const 
u8 {
+        let offset = self.row_bitset_width + (index * 8) as i64;
+        debug_assert!(
+            self.row_size >= 0 && offset + element_size as i64 <= 
self.row_size as i64,
+            "get_element_offset: access at offset {offset} with size 
{element_size} \
+             exceeds row_size {} for index {index}",
+            self.row_size
+        );
+        (self.row_addr + offset) as *const u8
     }
 }
 
@@ -1659,7 +1666,10 @@ mod test {
         let fields = Fields::from(vec![Field::new("st", data_type.clone(), 
true)]);
         let mut struct_builder = StructBuilder::from_fields(fields, 1);
         let mut row = SparkUnsafeRow::new_with_num_fields(1);
-        let data = [0; 8];
+        // 8 bytes null bitset + 8 bytes field value = 16 bytes
+        // Set bit 0 in the null bitset to mark field 0 as null
+        let mut data = [0u8; 16];
+        data[0] = 1;
         row.point_to_slice(&data);
         append_field(&data_type, &mut struct_builder, &row, 0).expect("append 
field");
         struct_builder.append_null();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to