This is an automated email from the ASF dual-hosted git repository.

aicam pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/main by this push:
     new 538ae1d5cb fix: reject hyphenated layer names in createWorkerIdentity 
(#5051)
538ae1d5cb is described below

commit 538ae1d5cb9027b34b29e99d33c2c286970b29b1
Author: Matthew B. <[email protected]>
AuthorDate: Tue May 19 22:01:48 2026 -0700

    fix: reject hyphenated layer names in createWorkerIdentity (#5051)
    
    ### What changes were proposed in this PR?
    `VirtualIdentityUtils.createWorkerIdentity` now rejects layer names
    containing `-` via `require`. The worker-name format
    `Worker:WF<id>-<op>-<layer>-<workerId>` is inherently ambiguous when
    both `op` and `layer` may contain `-`, and production operator IDs (e.g.
    `ClassName-UUID`) structurally must. Layer names in production are
    always hyphen-free (`main`, `build`, `probe`, `localAgg`, `globalAgg`);
    this change lifts that invariant from "implied by the regex" to
    "enforced at the boundary", so callers fail loudly at construction
    instead of silently mis-parsing later. Two `WorkerSpec` test fixtures
    (`"1st-physical-op"`, `"2nd physical-op"`) are renamed to hyphen-free
    equivalents; they only needed to be two distinct strings.
      ### Any related issues, documentation, or discussions?
    Closes: #4728
    ### How was this PR tested?
    The previously-pinned `VirtualIdentityUtilsSpec` bug spec ("misparse
    layer names that contain hyphens (current behavior)") is replaced with a
    spec asserting that `createWorkerIdentity` rejects hyphenated layer
    names. Existing `VirtualIdentityUtilsSpec`, `WorkerSpec`, and the Python
    `test_virtual_identity.py` cases continue to pass (Python's
    `get_worker_index` only extracts the trailing index group and is
    unaffected).
    ### Was this PR authored or co-authored using generative AI tooling?
      Co-authored with Claude Opus 4.7 in compliance with ASF
    
    Co-authored-by: ali risheh <[email protected]>
---
 .../engine/architecture/worker/WorkerSpec.scala    |  4 ++--
 .../texera/amber/util/VirtualIdentityUtils.scala   |  5 ++++-
 .../amber/util/VirtualIdentityUtilsSpec.scala      | 26 ++++++++++++----------
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git 
a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/worker/WorkerSpec.scala
 
b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/worker/WorkerSpec.scala
index f6f33ebbb0..df9cb086a6 100644
--- 
a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/worker/WorkerSpec.scala
+++ 
b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/worker/WorkerSpec.scala
@@ -98,9 +98,9 @@ class WorkerSpec
   private val mockPortId = PortIdentity()
   private val mockLink =
     PhysicalLink(
-      PhysicalOpIdentity(operatorIdentity, "1st-physical-op"),
+      PhysicalOpIdentity(operatorIdentity, "firstPhysicalOp"),
       mockPortId,
-      PhysicalOpIdentity(operatorIdentity, "2nd-physical-op"),
+      PhysicalOpIdentity(operatorIdentity, "secondPhysicalOp"),
       mockPortId
     )
 
diff --git 
a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/VirtualIdentityUtils.scala
 
b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/VirtualIdentityUtils.scala
index 7c1bfafb8d..586d77a9c5 100644
--- 
a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/VirtualIdentityUtils.scala
+++ 
b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/VirtualIdentityUtils.scala
@@ -39,7 +39,10 @@ object VirtualIdentityUtils {
       layerName: String,
       workerId: Int
   ): ActorVirtualIdentity = {
-
+    require(
+      !layerName.contains('-'),
+      s"layerName must not contain '-' (worker-name parsing relies on this): 
$layerName"
+    )
     ActorVirtualIdentity(
       s"Worker:WF${workflowId.id}-$operator-$layerName-$workerId"
     )
diff --git 
a/common/workflow-core/src/test/scala/org/apache/texera/amber/util/VirtualIdentityUtilsSpec.scala
 
b/common/workflow-core/src/test/scala/org/apache/texera/amber/util/VirtualIdentityUtilsSpec.scala
index a25a0c46da..8ebf7dabec 100644
--- 
a/common/workflow-core/src/test/scala/org/apache/texera/amber/util/VirtualIdentityUtilsSpec.scala
+++ 
b/common/workflow-core/src/test/scala/org/apache/texera/amber/util/VirtualIdentityUtilsSpec.scala
@@ -78,18 +78,20 @@ class VirtualIdentityUtilsSpec extends AnyFlatSpec with 
Matchers {
     opId.layerName shouldBe "main"
   }
 
-  it should "misparse layer names that contain hyphens (current behavior)" in {
-    // The layer capture group is `(\w+)`, which does not allow `-`. When the
-    // real layer name contains hyphens (e.g. "1st-physical-op", as seen in
-    // amber WorkerSpec), the greedy operator group eats most of the layer:
-    // operator becomes "myOp-1st-physical" and layer becomes "op". This pins
-    // the current bug so a future fix that broadens `workerNamePattern` to
-    // accept hyphenated layers will surface here and force this spec to be
-    // updated alongside the implementation.
-    val actor = ActorVirtualIdentity("Worker:WF1-myOp-1st-physical-op-3")
-    val opId = VirtualIdentityUtils.getPhysicalOpId(actor)
-    opId.logicalOpId.id shouldBe "myOp-1st-physical"
-    opId.layerName shouldBe "op"
+  "createWorkerIdentity" should "reject layer names containing '-'" in {
+    // The worker-name format `Worker:WF<id>-<op>-<layer>-<workerId>` is
+    // inherently ambiguous when both `op` and `layer` may contain `-`, and
+    // production operator IDs (e.g. `<className>-<UUID>`) structurally must.
+    // We therefore enforce that layer names do not contain `-` at creation
+    // time so the bad state can never be constructed.
+    assertThrows[IllegalArgumentException] {
+      VirtualIdentityUtils.createWorkerIdentity(
+        WorkflowIdentity(1),
+        operator = "myOp",
+        layerName = "1st-physical-op",
+        workerId = 3
+      )
+    }
   }
 
   // ----- getWorkerIndex -----

Reply via email to