This is an automated email from the ASF dual-hosted git repository.
aicam pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new 538ae1d5cb fix: reject hyphenated layer names in createWorkerIdentity
(#5051)
538ae1d5cb is described below
commit 538ae1d5cb9027b34b29e99d33c2c286970b29b1
Author: Matthew B. <[email protected]>
AuthorDate: Tue May 19 22:01:48 2026 -0700
fix: reject hyphenated layer names in createWorkerIdentity (#5051)
### What changes were proposed in this PR?
`VirtualIdentityUtils.createWorkerIdentity` now rejects layer names
containing `-` via `require`. The worker-name format
`Worker:WF<id>-<op>-<layer>-<workerId>` is inherently ambiguous when
both `op` and `layer` may contain `-`, and production operator IDs (e.g.
`ClassName-UUID`) structurally must. Layer names in production are
always hyphen-free (`main`, `build`, `probe`, `localAgg`, `globalAgg`);
this change lifts that invariant from "implied by the regex" to
"enforced at the boundary", so callers fail loudly at construction
instead of silently mis-parsing later. Two `WorkerSpec` test fixtures
(`"1st-physical-op"`, `"2nd physical-op"`) are renamed to hyphen-free
equivalents; they only needed to be two distinct strings.
### Any related issues, documentation, or discussions?
Closes: #4728
### How was this PR tested?
The previously-pinned `VirtualIdentityUtilsSpec` bug spec ("misparse
layer names that contain hyphens (current behavior)") is replaced with a
spec asserting that `createWorkerIdentity` rejects hyphenated layer
names. Existing `VirtualIdentityUtilsSpec`, `WorkerSpec`, and the Python
`test_virtual_identity.py` cases continue to pass (Python's
`get_worker_index` only extracts the trailing index group and is
unaffected).
### Was this PR authored or co-authored using generative AI tooling?
Co-authored with Claude Opus 4.7 in compliance with ASF
Co-authored-by: ali risheh <[email protected]>
---
.../engine/architecture/worker/WorkerSpec.scala | 4 ++--
.../texera/amber/util/VirtualIdentityUtils.scala | 5 ++++-
.../amber/util/VirtualIdentityUtilsSpec.scala | 26 ++++++++++++----------
3 files changed, 20 insertions(+), 15 deletions(-)
diff --git
a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/worker/WorkerSpec.scala
b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/worker/WorkerSpec.scala
index f6f33ebbb0..df9cb086a6 100644
---
a/amber/src/test/scala/org/apache/texera/amber/engine/architecture/worker/WorkerSpec.scala
+++
b/amber/src/test/scala/org/apache/texera/amber/engine/architecture/worker/WorkerSpec.scala
@@ -98,9 +98,9 @@ class WorkerSpec
private val mockPortId = PortIdentity()
private val mockLink =
PhysicalLink(
- PhysicalOpIdentity(operatorIdentity, "1st-physical-op"),
+ PhysicalOpIdentity(operatorIdentity, "firstPhysicalOp"),
mockPortId,
- PhysicalOpIdentity(operatorIdentity, "2nd-physical-op"),
+ PhysicalOpIdentity(operatorIdentity, "secondPhysicalOp"),
mockPortId
)
diff --git
a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/VirtualIdentityUtils.scala
b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/VirtualIdentityUtils.scala
index 7c1bfafb8d..586d77a9c5 100644
---
a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/VirtualIdentityUtils.scala
+++
b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/VirtualIdentityUtils.scala
@@ -39,7 +39,10 @@ object VirtualIdentityUtils {
layerName: String,
workerId: Int
): ActorVirtualIdentity = {
-
+ require(
+ !layerName.contains('-'),
+ s"layerName must not contain '-' (worker-name parsing relies on this):
$layerName"
+ )
ActorVirtualIdentity(
s"Worker:WF${workflowId.id}-$operator-$layerName-$workerId"
)
diff --git
a/common/workflow-core/src/test/scala/org/apache/texera/amber/util/VirtualIdentityUtilsSpec.scala
b/common/workflow-core/src/test/scala/org/apache/texera/amber/util/VirtualIdentityUtilsSpec.scala
index a25a0c46da..8ebf7dabec 100644
---
a/common/workflow-core/src/test/scala/org/apache/texera/amber/util/VirtualIdentityUtilsSpec.scala
+++
b/common/workflow-core/src/test/scala/org/apache/texera/amber/util/VirtualIdentityUtilsSpec.scala
@@ -78,18 +78,20 @@ class VirtualIdentityUtilsSpec extends AnyFlatSpec with
Matchers {
opId.layerName shouldBe "main"
}
- it should "misparse layer names that contain hyphens (current behavior)" in {
- // The layer capture group is `(\w+)`, which does not allow `-`. When the
- // real layer name contains hyphens (e.g. "1st-physical-op", as seen in
- // amber WorkerSpec), the greedy operator group eats most of the layer:
- // operator becomes "myOp-1st-physical" and layer becomes "op". This pins
- // the current bug so a future fix that broadens `workerNamePattern` to
- // accept hyphenated layers will surface here and force this spec to be
- // updated alongside the implementation.
- val actor = ActorVirtualIdentity("Worker:WF1-myOp-1st-physical-op-3")
- val opId = VirtualIdentityUtils.getPhysicalOpId(actor)
- opId.logicalOpId.id shouldBe "myOp-1st-physical"
- opId.layerName shouldBe "op"
+ "createWorkerIdentity" should "reject layer names containing '-'" in {
+ // The worker-name format `Worker:WF<id>-<op>-<layer>-<workerId>` is
+ // inherently ambiguous when both `op` and `layer` may contain `-`, and
+ // production operator IDs (e.g. `<className>-<UUID>`) structurally must.
+ // We therefore enforce that layer names do not contain `-` at creation
+ // time so the bad state can never be constructed.
+ assertThrows[IllegalArgumentException] {
+ VirtualIdentityUtils.createWorkerIdentity(
+ WorkflowIdentity(1),
+ operator = "myOp",
+ layerName = "1st-physical-op",
+ workerId = 3
+ )
+ }
}
// ----- getWorkerIndex -----