This is an automated email from the ASF dual-hosted git repository.

mengw15 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/main by this push:
     new a820f67271 fix: avoid NPE in WorkflowCompilationResource for missing 
schemas (#5090)
a820f67271 is described below

commit a820f6727179581a17a677b07b83b8e78e7974ac
Author: Meng Wang <[email protected]>
AuthorDate: Fri May 15 19:52:47 2026 -0700

    fix: avoid NPE in WorkflowCompilationResource for missing schemas (#5090)
    
    ### What changes were proposed in this PR?
    
    Fixes a `NullPointerException` in
    `WorkflowCompilationResource.compileWorkflow` when the compiler emits
    `Some(null)` for a port whose output schema cannot be derived (e.g. a
    CSV scan source whose `fileName` cannot be resolved on disk).
    
    Previously, the resource checked `schemaOption.isEmpty` and then called
    `schemaOption.get.attributes` in the `else` branch. Because
    `Some(null).isEmpty` is `false`, the `else` branch ran and dereferenced
    a `null` `Schema`, raising an NPE that the JAX-RS layer surfaced as HTTP
    500. After this change, both `None` and `Some(null)` are normalized to
    `None` via `schemaOption.flatMap(Option(_)).map(_.attributes)`, so the
    response building always succeeds and the underlying compilation error
    reaches the client as a structured `WorkflowCompilationFailure`.
    
    ### Any related issues, documentation, discussions?
    
    Closes #5021.
    
    ### How was this PR tested?
    
    Added a new unit test in `WorkflowCompilationResourceSpec` that
    reproduces the issue: it POSTs a `LogicalPlanPojo` containing a single
    CSV scan with a non-existent file path and asserts the response is HTTP
    200 with `type=failure` (rather than HTTP 500). The existing happy-path
    compilation test still passes, confirming no regression on the success
    branch.
    
    Run locally:
    
    ```
    sbt "WorkflowCompilingService/testOnly 
org.apache.texera.service.resource.WorkflowCompilationResourceSpec"
    ```
    
    Result: `Tests: succeeded 2, failed 0`.
    
    ### Was this PR authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (claude-opus-4-7)
    
    ---
    
    🤖 Generated with [Claude Code](https://claude.com/claude-code)
    
    Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
---
 .../resource/WorkflowCompilationResource.scala     | 15 ++++-------
 .../resource/WorkflowCompilationResourceSpec.scala | 29 ++++++++++++++++++++++
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git 
a/workflow-compiling-service/src/main/scala/org/apache/texera/service/resource/WorkflowCompilationResource.scala
 
b/workflow-compiling-service/src/main/scala/org/apache/texera/service/resource/WorkflowCompilationResource.scala
index f311f31d0b..501498b1d5 100644
--- 
a/workflow-compiling-service/src/main/scala/org/apache/texera/service/resource/WorkflowCompilationResource.scala
+++ 
b/workflow-compiling-service/src/main/scala/org/apache/texera/service/resource/WorkflowCompilationResource.scala
@@ -75,16 +75,11 @@ class WorkflowCompilationResource extends LazyLogging {
       case (operatorIdentity, schemas) =>
         val opId = operatorIdentity.id
         val portIdAndAttributes = schemas.map {
-          case (portId, schemaOption) => {
-            if (schemaOption.isEmpty) {
-              (PortIdentityKeySerializer.portIdToString(portId), None)
-            } else {
-              (
-                PortIdentityKeySerializer.portIdToString(portId),
-                Some(schemaOption.get.attributes)
-              )
-            }
-          }
+          case (portId, schemaOption) =>
+            // Normalize Some(null) to None: upstream may yield Some(null) 
when a
+            // schema cannot be derived (e.g. unresolvable scan source file).
+            val attributes = schemaOption.flatMap(Option(_)).map(_.attributes)
+            (PortIdentityKeySerializer.portIdToString(portId), attributes)
         }
         (opId, portIdAndAttributes)
     }
diff --git 
a/workflow-compiling-service/src/test/scala/org/apache/texera/service/resource/WorkflowCompilationResourceSpec.scala
 
b/workflow-compiling-service/src/test/scala/org/apache/texera/service/resource/WorkflowCompilationResourceSpec.scala
index 17e231270c..87246fd7f3 100644
--- 
a/workflow-compiling-service/src/test/scala/org/apache/texera/service/resource/WorkflowCompilationResourceSpec.scala
+++ 
b/workflow-compiling-service/src/test/scala/org/apache/texera/service/resource/WorkflowCompilationResourceSpec.scala
@@ -217,4 +217,33 @@ class WorkflowCompilationResourceSpec extends AnyFlatSpec 
with BeforeAndAfterAll
       )
     )
   }
+
+  it should "return WorkflowCompilationFailure (not HTTP 500) when a scan 
source file cannot be resolved" in {
+    val brokenCsv = getCsvScanOpDesc("/does/not/exist/missing.csv", header = 
true)
+
+    val logicalPlanPojo = LogicalPlanPojo(
+      operators = List(brokenCsv),
+      links = List(),
+      opsToViewResult = List(),
+      opsToReuseResult = List()
+    )
+
+    val modifiedLogicalPlanJsonString = 
transformLogicalPlanPojoToJsonString(logicalPlanPojo)
+
+    val response = resources
+      .target("/compile")
+      .request(MediaType.APPLICATION_JSON)
+      .post(Entity.json(modifiedLogicalPlanJsonString))
+
+    // Must not surface as HTTP 500 — the error must come back as a structured 
failure.
+    assertThat(response.getStatus).isEqualTo(200)
+
+    // Inspect the raw JSON rather than deserializing the full response: 
WorkflowFatalError
+    // is not round-trippable through the test ObjectMapper, but that is 
unrelated to the
+    // bug under test (which is purely about the resource not NPE'ing).
+    val responseBody = response.readEntity(classOf[String])
+    val rootNode = objectMapper.readTree(responseBody)
+    assertThat(rootNode.get("type").asText()).isEqualTo("failure")
+    assertThat(rootNode.has("operatorErrors")).isTrue
+  }
 }

Reply via email to