This is an automated email from the ASF dual-hosted git repository.
xuang7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new ac55403592 fix: require non-null decodingMethod in
URLFetcherOpDesc.sourceSchema (#5048)
ac55403592 is described below
commit ac5540359290afebee35889f47342ff296a875cb
Author: Matthew B. <[email protected]>
AuthorDate: Wed May 13 15:22:04 2026 -0700
fix: require non-null decodingMethod in URLFetcherOpDesc.sourceSchema
(#5048)
### What changes were proposed in this PR?
`URLFetcherOpDesc.decodingMethod` defaults to `null` (from `var
decodingMethod: DecodingMethod = _`). `sourceSchema()` branched on `if
(decodingMethod == DecodingMethod.UTF_8) STRING
else ANY`, so a null silently produced an ANY column even though the
JSON schema marks the field `required = true`. This PR adds a
`require(decodingMethod != null, ...)` precondition
at the top of `sourceSchema()`, surfacing the misconfiguration as a
clear `IllegalArgumentException` instead of a silent ANY-typed output.
### Any related issues, documentation, or discussions?
Closes: #4815
### How was this PR tested?
Updated `URLFetcherOpDescSpec`: flipped the previously pinned "default
to ANY when decodingMethod is left unset" case to assert that
`sourceSchema()` now throws
`IllegalArgumentException`. Existing UTF-8 / RAW_BYTES / physical-op /
schema-propagation specs continue to pass. Ran `sbt "project
WorkflowOperator" "testOnly
org.apache.texera.amber.operator.source.fetcher.URLFetcherOpDescSpec"`,
all 7 tests pass.
### Was this PR authored or co-authored using generative AI tooling?
Co-authored with Claude Opus 4.7 in compliance with ASF
---------
Co-authored-by: Xuan Gu <[email protected]>
---
.../operator/source/fetcher/URLFetcherOpDesc.scala | 4 ++++
.../operator/source/fetcher/URLFetcherOpDescSpec.scala | 17 ++++++++---------
.../operator/source/fetcher/URLFetcherOpExecSpec.scala | 3 ---
3 files changed, 12 insertions(+), 12 deletions(-)
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/fetcher/URLFetcherOpDesc.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/fetcher/URLFetcherOpDesc.scala
index f8b840b28c..7abe83945d 100644
---
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/fetcher/URLFetcherOpDesc.scala
+++
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/fetcher/URLFetcherOpDesc.scala
@@ -46,6 +46,10 @@ class URLFetcherOpDesc extends SourceOperatorDescriptor {
var decodingMethod: DecodingMethod = _
override def sourceSchema(): Schema = {
+ require(
+ decodingMethod != null,
+ "URLFetcherOpDesc.decodingMethod must be set before sourceSchema is
computed"
+ )
Schema()
.add(
"URL content",
diff --git
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/fetcher/URLFetcherOpDescSpec.scala
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/fetcher/URLFetcherOpDescSpec.scala
index 3b8c306b73..34b7ec8f6f 100644
---
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/fetcher/URLFetcherOpDescSpec.scala
+++
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/fetcher/URLFetcherOpDescSpec.scala
@@ -67,17 +67,16 @@ class URLFetcherOpDescSpec extends AnyFlatSpec with
Matchers {
schema.getAttributes.head.getType shouldBe AttributeType.ANY
}
- it should "default to ANY when decodingMethod is left unset (current
behavior)" in {
- // Pin: `var decodingMethod: DecodingMethod = _` defaults to null.
- // sourceSchema's branch is `if (decodingMethod == DecodingMethod.UTF_8)
- // STRING else ANY`, so a null comparison falls through to ANY without
- // raising. Documenting the current behavior so a future explicit-null
- // check breaks this spec deliberately.
+ it should "fail loudly when decodingMethod is left unset rather than
silently defaulting to ANY" in {
+ // `var decodingMethod: DecodingMethod = _` defaults to null. Without a
+ // guard, sourceSchema would fall through `if (decodingMethod ==
+ // DecodingMethod.UTF_8) ... else ANY` and silently produce an ANY column
+ // for a misconfigured operator. sourceSchema now requires a non-null
+ // decodingMethod and surfaces the misconfiguration as an
+ // IllegalArgumentException.
val op = new URLFetcherOpDesc
op.url = "https://example.test/data"
- val schema = op.sourceSchema()
- schema.getAttributes should have length 1
- schema.getAttributes.head.getType shouldBe AttributeType.ANY
+ an[IllegalArgumentException] should be thrownBy op.sourceSchema()
}
"URLFetcherOpDesc.getPhysicalOp" should "wire the URLFetcherOpExec class
name into the OpExecInitInfo" in {
diff --git
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/fetcher/URLFetcherOpExecSpec.scala
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/fetcher/URLFetcherOpExecSpec.scala
index c61468c683..47770cbae8 100644
---
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/fetcher/URLFetcherOpExecSpec.scala
+++
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/source/fetcher/URLFetcherOpExecSpec.scala
@@ -19,14 +19,11 @@
package org.apache.texera.amber.operator.source.fetcher
-import org.apache.texera.amber.core.tuple.Schema
import org.apache.texera.amber.util.JSONUtils.objectMapper
import org.scalatest.BeforeAndAfter
import org.scalatest.flatspec.AnyFlatSpec
class URLFetcherOpExecSpec extends AnyFlatSpec with BeforeAndAfter {
- val resultSchema: Schema = new URLFetcherOpDesc().sourceSchema()
-
val opDesc: URLFetcherOpDesc = new URLFetcherOpDesc()
it should "fetch url and output one tuple with raw bytes" in {