This is an automated email from the ASF dual-hosted git repository.
mengw15 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new a820f67271 fix: avoid NPE in WorkflowCompilationResource for missing
schemas (#5090)
a820f67271 is described below
commit a820f6727179581a17a677b07b83b8e78e7974ac
Author: Meng Wang <[email protected]>
AuthorDate: Fri May 15 19:52:47 2026 -0700
fix: avoid NPE in WorkflowCompilationResource for missing schemas (#5090)
### What changes were proposed in this PR?
Fixes a `NullPointerException` in
`WorkflowCompilationResource.compileWorkflow` when the compiler emits
`Some(null)` for a port whose output schema cannot be derived (e.g. a
CSV scan source whose `fileName` cannot be resolved on disk).
Previously, the resource checked `schemaOption.isEmpty` and then called
`schemaOption.get.attributes` in the `else` branch. Because
`Some(null).isEmpty` is `false`, the `else` branch ran and dereferenced
a `null` `Schema`, raising an NPE that the JAX-RS layer surfaced as HTTP
500. After this change, both `None` and `Some(null)` are normalized to
`None` via `schemaOption.flatMap(Option(_)).map(_.attributes)`, so the
response building always succeeds and the underlying compilation error
reaches the client as a structured `WorkflowCompilationFailure`.
### Any related issues, documentation, discussions?
Closes #5021.
### How was this PR tested?
Added a new unit test in `WorkflowCompilationResourceSpec` that
reproduces the issue: it POSTs a `LogicalPlanPojo` containing a single
CSV scan with a non-existent file path and asserts the response is HTTP
200 with `type=failure` (rather than HTTP 500). The existing happy-path
compilation test still passes, confirming no regression on the success
branch.
Run locally:
```
sbt "WorkflowCompilingService/testOnly
org.apache.texera.service.resource.WorkflowCompilationResourceSpec"
```
Result: `Tests: succeeded 2, failed 0`.
### Was this PR authored or co-authored using generative AI tooling?
Generated-by: Claude Code (claude-opus-4-7)
---
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
---
.../resource/WorkflowCompilationResource.scala | 15 ++++-------
.../resource/WorkflowCompilationResourceSpec.scala | 29 ++++++++++++++++++++++
2 files changed, 34 insertions(+), 10 deletions(-)
diff --git
a/workflow-compiling-service/src/main/scala/org/apache/texera/service/resource/WorkflowCompilationResource.scala
b/workflow-compiling-service/src/main/scala/org/apache/texera/service/resource/WorkflowCompilationResource.scala
index f311f31d0b..501498b1d5 100644
---
a/workflow-compiling-service/src/main/scala/org/apache/texera/service/resource/WorkflowCompilationResource.scala
+++
b/workflow-compiling-service/src/main/scala/org/apache/texera/service/resource/WorkflowCompilationResource.scala
@@ -75,16 +75,11 @@ class WorkflowCompilationResource extends LazyLogging {
case (operatorIdentity, schemas) =>
val opId = operatorIdentity.id
val portIdAndAttributes = schemas.map {
- case (portId, schemaOption) => {
- if (schemaOption.isEmpty) {
- (PortIdentityKeySerializer.portIdToString(portId), None)
- } else {
- (
- PortIdentityKeySerializer.portIdToString(portId),
- Some(schemaOption.get.attributes)
- )
- }
- }
+ case (portId, schemaOption) =>
+ // Normalize Some(null) to None: upstream may yield Some(null)
when a
+ // schema cannot be derived (e.g. unresolvable scan source file).
+ val attributes = schemaOption.flatMap(Option(_)).map(_.attributes)
+ (PortIdentityKeySerializer.portIdToString(portId), attributes)
}
(opId, portIdAndAttributes)
}
diff --git
a/workflow-compiling-service/src/test/scala/org/apache/texera/service/resource/WorkflowCompilationResourceSpec.scala
b/workflow-compiling-service/src/test/scala/org/apache/texera/service/resource/WorkflowCompilationResourceSpec.scala
index 17e231270c..87246fd7f3 100644
---
a/workflow-compiling-service/src/test/scala/org/apache/texera/service/resource/WorkflowCompilationResourceSpec.scala
+++
b/workflow-compiling-service/src/test/scala/org/apache/texera/service/resource/WorkflowCompilationResourceSpec.scala
@@ -217,4 +217,33 @@ class WorkflowCompilationResourceSpec extends AnyFlatSpec
with BeforeAndAfterAll
)
)
}
+
+ it should "return WorkflowCompilationFailure (not HTTP 500) when a scan
source file cannot be resolved" in {
+ val brokenCsv = getCsvScanOpDesc("/does/not/exist/missing.csv", header =
true)
+
+ val logicalPlanPojo = LogicalPlanPojo(
+ operators = List(brokenCsv),
+ links = List(),
+ opsToViewResult = List(),
+ opsToReuseResult = List()
+ )
+
+ val modifiedLogicalPlanJsonString =
transformLogicalPlanPojoToJsonString(logicalPlanPojo)
+
+ val response = resources
+ .target("/compile")
+ .request(MediaType.APPLICATION_JSON)
+ .post(Entity.json(modifiedLogicalPlanJsonString))
+
+ // Must not surface as HTTP 500 — the error must come back as a structured
failure.
+ assertThat(response.getStatus).isEqualTo(200)
+
+ // Inspect the raw JSON rather than deserializing the full response:
WorkflowFatalError
+ // is not round-trippable through the test ObjectMapper, but that is
unrelated to the
+ // bug under test (which is purely about the resource not NPE'ing).
+ val responseBody = response.readEntity(classOf[String])
+ val rootNode = objectMapper.readTree(responseBody)
+ assertThat(rootNode.get("type").asText()).isEqualTo("failure")
+ assertThat(rootNode.has("operatorErrors")).isTrue
+ }
}