This is an automated email from the ASF dual-hosted git repository.

mengw15 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/main by this push:
     new 87eb240ad1 fix: reject non-standard Int bit-widths in ArrowUtils 
(#5030)
87eb240ad1 is described below

commit 87eb240ad1dd667d0013b8dfb405350c2ae593b2
Author: Matthew B. <[email protected]>
AuthorDate: Wed May 13 14:30:22 2026 -0700

    fix: reject non-standard Int bit-widths in ArrowUtils (#5030)
    
    ### What changes were proposed in this PR?
    `ArrowUtils.toAttributeType` (and the mirror dispatch in
    `setTexeraTuple`) used `case 64 | _ => LONG` for `ArrowType.Int`. The
    trailing `_` made the second arm a catch-all, so every Int width that
    was not 16 or 32 (e.g. 8,
    128) silently surfaced as `LONG` rather than being rejected. This PR
    splits the alternation so 64 stays `LONG`, 16/32 stay `INTEGER`, and any
    other width throws `AttributeTypeException`, matching how every other
    unsupported
      ArrowType is already handled in the same method.
    
      ### Any related issues, documentation, or discussions?
      Closes: #4760
      ### How was this PR tested?
    * Updated the existing pin test in `ArrowUtilsSpec` (which previously
    documented the silent coercion and invited this fix) to assert that
    `Int(8)` and `Int(128)` now raise `AttributeTypeException`.
      * `sbt scalafmtAll` clean.
    * `sbt "workflowCore/testOnly
    org.apache.texera.amber.util.ArrowUtilsSpec"` expected to pass;
    operator-side `ArrowUtilsSpec` only exercises widths 16/32/64 and is
    unaffected.
    
      ### Was this PR authored or co-authored using generative AI tooling?
      Co-authored with Claude Opus 4.7 in compliance with ASF
    
    Co-authored-by: Meng Wang <[email protected]>
---
 .../org/apache/texera/amber/util/ArrowUtils.scala      | 14 ++++++++++++--
 .../org/apache/texera/amber/util/ArrowUtilsSpec.scala  | 18 ++++++++++--------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git 
a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/ArrowUtils.scala
 
b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/ArrowUtils.scala
index c4f649e719..af14ae9acd 100644
--- 
a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/ArrowUtils.scala
+++ 
b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/ArrowUtils.scala
@@ -131,8 +131,13 @@ object ArrowUtils extends LazyLogging {
           case 16 | 32 =>
             AttributeType.INTEGER
 
-          case 64 | _ =>
+          case 64 =>
             AttributeType.LONG
+
+          case other =>
+            throw new AttributeTypeUtils.AttributeTypeException(
+              s"Unsupported Int bit width: $other"
+            )
         }
       case _: ArrowType.Bool =>
         AttributeType.BOOLEAN
@@ -187,10 +192,15 @@ object ArrowUtils extends LazyLogging {
                 .asInstanceOf[IntVector]
                 .setSafe(index, !isNull, if (isNull) 0 else 
value.asInstanceOf[Int])
 
-            case 64 | _ =>
+            case 64 =>
               vector
                 .asInstanceOf[BigIntVector]
                 .setSafe(index, !isNull, if (isNull) 0 else 
value.asInstanceOf[Long])
+
+            case other =>
+              throw new AttributeTypeUtils.AttributeTypeException(
+                s"Unsupported Int bit width: $other"
+              )
           }
 
         case _: ArrowType.Bool =>
diff --git 
a/common/workflow-core/src/test/scala/org/apache/texera/amber/util/ArrowUtilsSpec.scala
 
b/common/workflow-core/src/test/scala/org/apache/texera/amber/util/ArrowUtilsSpec.scala
index 212be040b1..62b10a6686 100644
--- 
a/common/workflow-core/src/test/scala/org/apache/texera/amber/util/ArrowUtilsSpec.scala
+++ 
b/common/workflow-core/src/test/scala/org/apache/texera/amber/util/ArrowUtilsSpec.scala
@@ -45,14 +45,16 @@ class ArrowUtilsSpec extends AnyFlatSpec with Matchers {
     ArrowUtils.toAttributeType(new ArrowType.Int(64, true)) shouldBe 
AttributeType.LONG
   }
 
-  it should "map non-standard Int bit-widths to LONG (current behavior)" in {
-    // Pin: the source code's match is `case 16 | 32 => INTEGER` then
-    // `case 64 | _ => LONG`. The trailing `_` makes the second arm a
-    // catch-all, so Int(8), Int(128) and any other width all surface as
-    // LONG. A future fix that distinguishes those widths will deliberately
-    // break this spec.
-    ArrowUtils.toAttributeType(new ArrowType.Int(8, true)) shouldBe 
AttributeType.LONG
-    ArrowUtils.toAttributeType(new ArrowType.Int(128, true)) shouldBe 
AttributeType.LONG
+  it should "throw AttributeTypeException for non-standard Int bit-widths" in {
+    // Only 16/32 (INTEGER) and 64 (LONG) are supported. Other widths used to
+    // be silently coerced to LONG by a `case 64 | _` catch-all; they now
+    // raise rather than masquerade as Int64.
+    assertThrows[AttributeTypeException] {
+      ArrowUtils.toAttributeType(new ArrowType.Int(8, true))
+    }
+    assertThrows[AttributeTypeException] {
+      ArrowUtils.toAttributeType(new ArrowType.Int(128, true))
+    }
   }
 
   it should "map Bool to BOOLEAN" in {

Reply via email to