This is an automated email from the ASF dual-hosted git repository.

aglinxinyuan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/main by this push:
     new 9b2005d5b6 test(amber): add unit tests for visualization OpDescs 
(Image, FunnelPlot, VolcanoPlot, BulletChart) (#4809)
9b2005d5b6 is described below

commit 9b2005d5b6718e3a6b3181331bbd0a4d245e130e
Author: Yicong Huang <[email protected]>
AuthorDate: Sun May 3 11:03:01 2026 -0700

    test(amber): add unit tests for visualization OpDescs (Image, FunnelPlot, 
VolcanoPlot, BulletChart) (#4809)
    
    ### What changes were proposed in this PR?
    
    Adds scalatest coverage for four visualization operator descriptors
    under `common/workflow-operator/.../visualization/`. The existing thin
    spec on `ImageVisualizerOpDesc` is extended; specs for
    `FunnelPlotOpDesc`, `VolcanoPlotOpDesc`, and `BulletChartOpDesc` are
    new.
    
    The specs verify (per OpDesc) the user-friendly name + group +
    outputPorts wiring, the single-port `html-content` STRING schema
    returned by `getOutputSchemas`, the rendered Python source from
    `generatePythonCode` (operator class + plotly imports +
    `decode_python_template` runtime decode sites for each `EncodableString`
    field), and the missing-required-field assertion behavior — which
    differs by OpDesc:
    
    - `ImageVisualizerOpDesc.binaryContent` defaults to `null` (`var ... =
    _`), so the assert path raises `NullPointerException` when called on
    null.
    - `FunnelPlotOpDesc.x` / `.y` default to `""`, so the same assert raises
    `AssertionError`.
    - `VolcanoPlotOpDesc` and `BulletChartOpDesc` have no asserts in
    `generatePythonCode` — empty defaults render valid Python source.
    
    ### Any related issues, documentation, discussions?
    
    Closes #4807.
    
    Bug filed separately: most visualization OpDescs (29 files) have a
    duplicate `Map(operatorInfo.outputPorts.head.id -> outputSchema)`
    statement in `getOutputSchemas` — the first call is computed and
    discarded, only the second is the actual return value. Likely a
    copy-paste artifact.
    
    ### How was this PR tested?
    
    ```
    sbt scalafmtCheckAll
    sbt "WorkflowOperator/testOnly 
org.apache.texera.amber.operator.visualization.ImageViz.ImageVisualizerOpDescSpec
 org.apache.texera.amber.operator.visualization.funnelPlot.FunnelPlotOpDescSpec 
org.apache.texera.amber.operator.visualization.volcanoPlot.VolcanoPlotOpDescSpec
 
org.apache.texera.amber.operator.visualization.bulletChart.BulletChartOpDescSpec"
    ```
    
    ### Was this PR authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (claude-opus-4-7)
    
    ---------
    
    Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
---
 .../ImageViz/ImageVisualizerOpDescSpec.scala       |  59 +++++++++-
 .../bulletChart/BulletChartOpDescSpec.scala        | 119 +++++++++++++++++++++
 .../funnelPlot/FunnelPlotOpDescSpec.scala          |  95 ++++++++++++++++
 .../volcanoPlot/VolcanoPlotOpDescSpec.scala        |  95 ++++++++++++++++
 4 files changed, 366 insertions(+), 2 deletions(-)

diff --git 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/ImageViz/ImageVisualizerOpDescSpec.scala
 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/ImageViz/ImageVisualizerOpDescSpec.scala
index 266f14e4d8..f03271aa36 100644
--- 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/ImageViz/ImageVisualizerOpDescSpec.scala
+++ 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/ImageViz/ImageVisualizerOpDescSpec.scala
@@ -19,18 +19,73 @@
 
 package org.apache.texera.amber.operator.visualization.ImageViz
 
+import org.apache.texera.amber.core.tuple.AttributeType
+import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
 import org.scalatest.BeforeAndAfter
 import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
 
-class ImageVisualizerOpDescSpec extends AnyFlatSpec with BeforeAndAfter {
+class ImageVisualizerOpDescSpec extends AnyFlatSpec with BeforeAndAfter with 
Matchers {
   var opDesc: ImageVisualizerOpDesc = _
   before {
     opDesc = new ImageVisualizerOpDesc()
   }
 
-  it should "throw assertion error if BinaryContent is empty" in {
+  it should "currently throw NullPointerException when binaryContent is 
uninitialized" in {
+    // Documents the present behavior without claiming it is the contract:
+    // `binaryContent` is declared `var binaryContent: EncodableString = _`,
+    // so an uninitialized reference field defaults to null and the
+    // `assert(binaryContent.nonEmpty)` inside `createBinaryData` reaches
+    // `null.nonEmpty` and throws NPE before the assert message can fire.
     assertThrows[NullPointerException] {
       opDesc.createBinaryData()
     }
   }
+
+  it should "eventually reject missing binaryContent with a controlled error 
(pendingUntilFixed)" in pendingUntilFixed {
+    // Intended contract: because `binaryContent` is declared
+    // `@JsonProperty(required = true)`, an unconfigured operator should
+    // surface a domain error (AssertionError or IllegalArgumentException),
+    // not an NPE from dereferencing null. Using pendingUntilFixed so a
+    // future validation fix flips this test from Pending to a deliberate
+    // failure that forces removal of the marker.
+    val ex = intercept[RuntimeException] {
+      opDesc.createBinaryData()
+    }
+    ex shouldBe a[AssertionError]
+  }
+
+  "ImageVisualizerOpDesc.operatorInfo" should "advertise the user-friendly 
name and Media group" in {
+    val info = opDesc.operatorInfo
+    info.userFriendlyName shouldBe "Image Visualizer"
+    info.operatorGroupName shouldBe 
OperatorGroupConstants.VISUALIZATION_MEDIA_GROUP
+    info.operatorDescription should include("image")
+  }
+
+  it should "expose exactly one output port wired through forVisualization" in 
{
+    opDesc.operatorInfo.outputPorts should have length 1
+  }
+
+  "ImageVisualizerOpDesc.getOutputSchemas" should "return a single-port schema 
with an html-content STRING column" in {
+    opDesc.binaryContent = "image_bytes"
+    val schemas = opDesc.getOutputSchemas(Map.empty)
+    schemas should have size 1
+    val (portId, schema) = schemas.head
+    portId shouldBe opDesc.operatorInfo.outputPorts.head.id
+    schema.getAttributes should have length 1
+    schema.getAttributes.head.getName shouldBe "html-content"
+    schema.getAttributes.head.getType shouldBe AttributeType.STRING
+  }
+
+  "ImageVisualizerOpDesc.generatePythonCode" should "render a UDFOperatorV2 
source with a runtime column-decode site" in {
+    // EncodableString fields are NOT emitted as literal strings — the pyb
+    // macro wraps them in `self.decode_python_template.decode("<base64>")`
+    // calls so the column name resolves at runtime. Verify the structure
+    // (operator class, body helper, decode site) instead of a literal name.
+    opDesc.binaryContent = "image_bytes"
+    val code = opDesc.generatePythonCode()
+    code should include("class ProcessTupleOperator(UDFOperatorV2)")
+    code should include("encode_image_to_html")
+    code should include("decode_python_template")
+  }
 }
diff --git 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/bulletChart/BulletChartOpDescSpec.scala
 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/bulletChart/BulletChartOpDescSpec.scala
new file mode 100644
index 0000000000..fff547b939
--- /dev/null
+++ 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/bulletChart/BulletChartOpDescSpec.scala
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.visualization.bulletChart
+
+import org.apache.texera.amber.core.tuple.AttributeType
+import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+import java.util
+import java.util.{List => JList}
+
+class BulletChartOpDescSpec extends AnyFlatSpec with Matchers {
+
+  private def configured: BulletChartOpDesc = {
+    val op = new BulletChartOpDesc
+    op.value = "actualValue"
+    op.deltaReference = "100"
+    op
+  }
+
+  "BulletChartOpDesc.operatorInfo" should "advertise the user-friendly name 
and Financial group" in {
+    val info = (new BulletChartOpDesc).operatorInfo
+    info.userFriendlyName shouldBe "Bullet Chart"
+    info.operatorGroupName shouldBe 
OperatorGroupConstants.VISUALIZATION_FINANCIAL_GROUP
+    info.operatorDescription should include("Bullet Chart")
+  }
+
+  it should "expose exactly one output port wired through forVisualization" in 
{
+    (new BulletChartOpDesc).operatorInfo.outputPorts should have length 1
+  }
+
+  "BulletChartOpDesc.getOutputSchemas" should "return a single-port schema 
with an html-content STRING column" in {
+    val op = configured
+    val schemas = op.getOutputSchemas(Map.empty)
+    schemas should have size 1
+    val (portId, schema) = schemas.head
+    portId shouldBe op.operatorInfo.outputPorts.head.id
+    schema.getAttributes should have length 1
+    schema.getAttributes.head.getName shouldBe "html-content"
+    schema.getAttributes.head.getType shouldBe AttributeType.STRING
+  }
+
+  "BulletChartOpDesc.generatePythonCode" should "render Python source with a 
runtime decode site for the value column" in {
+    // EncodableString fields are NOT emitted as literal strings — the pyb
+    // macro wraps them in `self.decode_python_template.decode("<base64>")`
+    // calls. The rendered source must reference the decoder symbol at least
+    // for `value` and `deltaReference`.
+    val code = configured.generatePythonCode()
+    code should include("plotly.graph_objects")
+    val decodeOccurrences = "decode_python_template".r.findAllIn(code).length
+    decodeOccurrences should be >= 2
+  }
+
+  it should "default to an empty steps list when none are configured" in {
+    // The bullet-chart template ships with several unrelated `[]` literals
+    // (`colors`, `valid_steps`, `step_errors`, `steps_list`, `html_chunks`),
+    // so a bare `code should include("[]")` is too weak. Anchor on the
+    // generated `steps_data = ...` literal directly so a regression that
+    // makes it non-empty would actually fail the assertion.
+    val code = configured.generatePythonCode()
+    code should include regex """steps_data\s*=\s*\[\]"""
+  }
+
+  it should "include each configured step's start/end JSON keys with extra 
decode sites" in {
+    val op = configured
+    val steps: JList[BulletChartStepDefinition] = new 
util.ArrayList[BulletChartStepDefinition]()
+    steps.add(new BulletChartStepDefinition("0", "50"))
+    steps.add(new BulletChartStepDefinition("50", "100"))
+    op.steps = steps
+    val code = op.generatePythonCode()
+    code should include("\"start\":")
+    code should include("\"end\":")
+    // Two steps × 2 EncodableString fields each = 4 extra decode sites on
+    // top of the value/deltaReference decodes from the base configuration.
+    val baseDecodes = 
"decode_python_template".r.findAllIn(configured.generatePythonCode()).length
+    val withSteps = "decode_python_template".r.findAllIn(code).length
+    withSteps shouldBe baseDecodes + 4
+  }
+
+  it should "currently render a code block even with the default empty 
configuration (no assert guard)" in {
+    // Documents the present behavior: BulletChartOpDesc has no assert
+    // guards inside generatePythonCode, so empty defaults still produce
+    // syntactically valid Python source. The intended contract lives in
+    // the pendingUntilFixed test below.
+    val op = new BulletChartOpDesc
+    val code = op.generatePythonCode()
+    code should include("plotly.graph_objects")
+  }
+
+  it should "eventually reject empty required value/deltaReference like 
FunnelPlot/ImageVisualizer (pendingUntilFixed)" in pendingUntilFixed {
+    // Intended contract: `value` and `deltaReference` are marked required
+    // on `BulletChartOpDesc`, so generatePythonCode on a default-constructed
+    // instance should raise instead of rendering empty-string column refs.
+    // Using pendingUntilFixed so a future validation fix flips this test
+    // from Pending to a deliberate failure and forces removal of the marker.
+    val op = new BulletChartOpDesc
+    intercept[RuntimeException] {
+      op.generatePythonCode()
+    }
+  }
+}
diff --git 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/funnelPlot/FunnelPlotOpDescSpec.scala
 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/funnelPlot/FunnelPlotOpDescSpec.scala
new file mode 100644
index 0000000000..5f46a0d6f7
--- /dev/null
+++ 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/funnelPlot/FunnelPlotOpDescSpec.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.visualization.funnelPlot
+
+import org.apache.texera.amber.core.tuple.AttributeType
+import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+class FunnelPlotOpDescSpec extends AnyFlatSpec with Matchers {
+
+  private def configured: FunnelPlotOpDesc = {
+    val op = new FunnelPlotOpDesc
+    op.x = "stage"
+    op.y = "count"
+    op
+  }
+
+  "FunnelPlotOpDesc.operatorInfo" should "advertise the user-friendly name and 
Financial group" in {
+    val info = (new FunnelPlotOpDesc).operatorInfo
+    info.userFriendlyName shouldBe "Funnel Plot"
+    info.operatorGroupName shouldBe 
OperatorGroupConstants.VISUALIZATION_FINANCIAL_GROUP
+    info.operatorDescription should include("Funnel")
+  }
+
+  it should "expose exactly one output port wired through forVisualization" in 
{
+    (new FunnelPlotOpDesc).operatorInfo.outputPorts should have length 1
+  }
+
+  "FunnelPlotOpDesc.getOutputSchemas" should "return a single-port schema with 
an html-content STRING column" in {
+    val op = configured
+    val schemas = op.getOutputSchemas(Map.empty)
+    schemas should have size 1
+    val (portId, schema) = schemas.head
+    portId shouldBe op.operatorInfo.outputPorts.head.id
+    schema.getAttributes should have length 1
+    schema.getAttributes.head.getName shouldBe "html-content"
+    schema.getAttributes.head.getType shouldBe AttributeType.STRING
+  }
+
+  "FunnelPlotOpDesc.generatePythonCode" should "render a UDFTableOperator 
source with runtime decode sites for x and y" in {
+    // EncodableString fields are NOT emitted as literal strings — the pyb
+    // macro wraps them in `self.decode_python_template.decode("<base64>")`
+    // calls. Each configured column becomes one decode site, so x + y must
+    // produce at least two distinct decodes in the rendered source.
+    val code = configured.generatePythonCode()
+    code should include("class ProcessTableOperator(UDFTableOperator)")
+    code should include("plotly.express")
+    val decodeOccurrences = "decode_python_template".r.findAllIn(code).length
+    decodeOccurrences should be >= 2
+  }
+
+  it should "render the optional color argument only when color is configured" 
in {
+    val without = configured.generatePythonCode()
+    val withColor = {
+      val op = configured
+      op.color = "category"
+      op.generatePythonCode()
+    }
+    without should not include "color="
+    withColor should include("color=")
+    // With color set, the rendered source has one extra decode site beyond
+    // the two for x and y.
+    val withDecodes = "decode_python_template".r.findAllIn(withColor).length
+    val withoutDecodes = "decode_python_template".r.findAllIn(without).length
+    withDecodes shouldBe withoutDecodes + 1
+  }
+
+  it should "fail-fast when required x/y are unset (the assert guards inside 
createPlotlyFigure)" in {
+    // Pin: createPlotlyFigure asserts nonEmpty on both x and y. The fields
+    // are initialized to "" so the assert path is reached (not the NPE path
+    // that ImageVisualizerOpDesc hits).
+    val op = new FunnelPlotOpDesc
+    op.x = ""
+    op.y = ""
+    assertThrows[AssertionError](op.generatePythonCode())
+  }
+}
diff --git 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/volcanoPlot/VolcanoPlotOpDescSpec.scala
 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/volcanoPlot/VolcanoPlotOpDescSpec.scala
new file mode 100644
index 0000000000..e85ed589ef
--- /dev/null
+++ 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/visualization/volcanoPlot/VolcanoPlotOpDescSpec.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.visualization.volcanoPlot
+
+import org.apache.texera.amber.core.tuple.AttributeType
+import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+class VolcanoPlotOpDescSpec extends AnyFlatSpec with Matchers {
+
+  private def configured: VolcanoPlotOpDesc = {
+    val op = new VolcanoPlotOpDesc
+    op.effectColumn = "log2fc"
+    op.pvalueColumn = "pvalue"
+    op
+  }
+
+  "VolcanoPlotOpDesc.operatorInfo" should "advertise the user-friendly name 
and Scientific group" in {
+    val info = (new VolcanoPlotOpDesc).operatorInfo
+    info.userFriendlyName shouldBe "Volcano Plot"
+    info.operatorGroupName shouldBe 
OperatorGroupConstants.VISUALIZATION_SCIENTIFIC_GROUP
+    info.operatorDescription should include("statistical")
+  }
+
+  it should "expose exactly one output port wired through forVisualization" in 
{
+    (new VolcanoPlotOpDesc).operatorInfo.outputPorts should have length 1
+  }
+
+  "VolcanoPlotOpDesc.getOutputSchemas" should "return a single-port schema 
with an html-content STRING column" in {
+    val op = configured
+    val schemas = op.getOutputSchemas(Map.empty)
+    schemas should have size 1
+    val (portId, schema) = schemas.head
+    portId shouldBe op.operatorInfo.outputPorts.head.id
+    schema.getAttributes should have length 1
+    schema.getAttributes.head.getName shouldBe "html-content"
+    schema.getAttributes.head.getType shouldBe AttributeType.STRING
+  }
+
+  "VolcanoPlotOpDesc.generatePythonCode" should "render a UDFTableOperator 
source that decodes both column references" in {
+    // EncodableString fields are NOT emitted as literal column names — the
+    // pyb macro wraps them in `self.decode_python_template.decode("<base64>")`
+    // calls so the column name is resolved at runtime. Verify the structure
+    // (class + import + decode site count) instead of substring matches.
+    val code = configured.generatePythonCode()
+    code should include("class ProcessTableOperator(UDFTableOperator)")
+    code should include("plotly.express")
+    code should include("-log10(pvalue)")
+    val decodeOccurrences = "decode_python_template".r.findAllIn(code).length
+    decodeOccurrences should be >= 2
+  }
+
+  it should "currently render code even when required fields are empty (no 
assert guard)" in {
+    // Documents the present behavior: VolcanoPlotOpDesc does not assert on
+    // its required fields inside `generatePythonCode`. An empty
+    // configuration therefore renders syntactically valid Python that
+    // references an empty string. The intended contract is split out into
+    // the pendingUntilFixed test below so this assertion no longer reads
+    // as the contract.
+    val op = new VolcanoPlotOpDesc
+    val code = op.generatePythonCode()
+    code should include("class ProcessTableOperator(UDFTableOperator)")
+  }
+
+  it should "eventually reject empty required fields like 
FunnelPlot/ImageVisualizer (pendingUntilFixed)" in pendingUntilFixed {
+    // Intended contract: `effectColumn` and `pvalueColumn` are marked
+    // required on `VolcanoPlotOpDesc`, so generatePythonCode on a
+    // default-constructed instance should raise instead of producing a
+    // string-literal-empty payload. Using pendingUntilFixed so a future
+    // validation fix flips this test from Pending to a deliberate failure
+    // and forces removal of the marker.
+    val op = new VolcanoPlotOpDesc
+    intercept[RuntimeException] {
+      op.generatePythonCode()
+    }
+  }
+}

Reply via email to