This is an automated email from the ASF dual-hosted git repository.
mengw15 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new 87eb240ad1 fix: reject non-standard Int bit-widths in ArrowUtils
(#5030)
87eb240ad1 is described below
commit 87eb240ad1dd667d0013b8dfb405350c2ae593b2
Author: Matthew B. <[email protected]>
AuthorDate: Wed May 13 14:30:22 2026 -0700
fix: reject non-standard Int bit-widths in ArrowUtils (#5030)
### What changes were proposed in this PR?
`ArrowUtils.toAttributeType` (and the mirror dispatch in
`setTexeraTuple`) used `case 64 | _ => LONG` for `ArrowType.Int`. The
trailing `_` made the second arm a catch-all, so every Int width that
was not 16 or 32 (e.g. 8,
128) silently surfaced as `LONG` rather than being rejected. This PR
splits the alternation so 64 stays `LONG`, 16/32 stay `INTEGER`, and any
other width throws `AttributeTypeException`, matching how every other
unsupported
ArrowType is already handled in the same method.
### Any related issues, documentation, or discussions?
Closes: #4760
### How was this PR tested?
* Updated the existing pin test in `ArrowUtilsSpec` (which previously
documented the silent coercion and invited this fix) to assert that
`Int(8)` and `Int(128)` now raise `AttributeTypeException`.
* `sbt scalafmtAll` clean.
* `sbt "workflowCore/testOnly
org.apache.texera.amber.util.ArrowUtilsSpec"` expected to pass;
operator-side `ArrowUtilsSpec` only exercises widths 16/32/64 and is
unaffected.
### Was this PR authored or co-authored using generative AI tooling?
Co-authored with Claude Opus 4.7 in compliance with ASF
Co-authored-by: Meng Wang <[email protected]>
---
.../org/apache/texera/amber/util/ArrowUtils.scala | 14 ++++++++++++--
.../org/apache/texera/amber/util/ArrowUtilsSpec.scala | 18 ++++++++++--------
2 files changed, 22 insertions(+), 10 deletions(-)
diff --git
a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/ArrowUtils.scala
b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/ArrowUtils.scala
index c4f649e719..af14ae9acd 100644
---
a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/ArrowUtils.scala
+++
b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/ArrowUtils.scala
@@ -131,8 +131,13 @@ object ArrowUtils extends LazyLogging {
case 16 | 32 =>
AttributeType.INTEGER
- case 64 | _ =>
+ case 64 =>
AttributeType.LONG
+
+ case other =>
+ throw new AttributeTypeUtils.AttributeTypeException(
+ s"Unsupported Int bit width: $other"
+ )
}
case _: ArrowType.Bool =>
AttributeType.BOOLEAN
@@ -187,10 +192,15 @@ object ArrowUtils extends LazyLogging {
.asInstanceOf[IntVector]
.setSafe(index, !isNull, if (isNull) 0 else
value.asInstanceOf[Int])
- case 64 | _ =>
+ case 64 =>
vector
.asInstanceOf[BigIntVector]
.setSafe(index, !isNull, if (isNull) 0 else
value.asInstanceOf[Long])
+
+ case other =>
+ throw new AttributeTypeUtils.AttributeTypeException(
+ s"Unsupported Int bit width: $other"
+ )
}
case _: ArrowType.Bool =>
diff --git
a/common/workflow-core/src/test/scala/org/apache/texera/amber/util/ArrowUtilsSpec.scala
b/common/workflow-core/src/test/scala/org/apache/texera/amber/util/ArrowUtilsSpec.scala
index 212be040b1..62b10a6686 100644
---
a/common/workflow-core/src/test/scala/org/apache/texera/amber/util/ArrowUtilsSpec.scala
+++
b/common/workflow-core/src/test/scala/org/apache/texera/amber/util/ArrowUtilsSpec.scala
@@ -45,14 +45,16 @@ class ArrowUtilsSpec extends AnyFlatSpec with Matchers {
ArrowUtils.toAttributeType(new ArrowType.Int(64, true)) shouldBe
AttributeType.LONG
}
- it should "map non-standard Int bit-widths to LONG (current behavior)" in {
- // Pin: the source code's match is `case 16 | 32 => INTEGER` then
- // `case 64 | _ => LONG`. The trailing `_` makes the second arm a
- // catch-all, so Int(8), Int(128) and any other width all surface as
- // LONG. A future fix that distinguishes those widths will deliberately
- // break this spec.
- ArrowUtils.toAttributeType(new ArrowType.Int(8, true)) shouldBe
AttributeType.LONG
- ArrowUtils.toAttributeType(new ArrowType.Int(128, true)) shouldBe
AttributeType.LONG
+ it should "throw AttributeTypeException for non-standard Int bit-widths" in {
+ // Only 16/32 (INTEGER) and 64 (LONG) are supported. Other widths used to
+ // be silently coerced to LONG by a `case 64 | _` catch-all; they now
+ // raise rather than masquerade as Int64.
+ assertThrows[AttributeTypeException] {
+ ArrowUtils.toAttributeType(new ArrowType.Int(8, true))
+ }
+ assertThrows[AttributeTypeException] {
+ ArrowUtils.toAttributeType(new ArrowType.Int(128, true))
+ }
}
it should "map Bool to BOOLEAN" in {