This is an automated email from the ASF dual-hosted git repository.

github-merge-queue[bot] pushed a commit to branch 
gh-readonly-queue/main/pr-5878-6eb4165f7ca49ba4eb9d6727971097763c9a8377
in repository https://gitbox.apache.org/repos/asf/texera.git

commit 7a9730bee0986e6d51e29aa28936665336aa7b44
Author: Xinyuan Lin <[email protected]>
AuthorDate: Tue Jun 23 15:27:38 2026 -0700

    test(workflow-operator): add unit test coverage for machine-learning 
operator types (Scorer, HyperParameters, SklearnML base) (#5878)
    
    ### What changes were proposed in this PR?
    
    Pin behavior of three previously-untested machine-learning operator
    types in `common/workflow-operator/machineLearning/`. No production-code
    changes.
    
    | Spec | Source class | Tests |
    | --- | --- | --- |
    | `MachineLearningScorerOpDescSpec` | `MachineLearningScorerOpDesc` | 6
    |
    | `HyperParametersSpec` | `HyperParameters` | 5 |
    | `SklearnMLOperatorDescriptorSpec` | `SklearnMLOperatorDescriptor`
    (abstract base) | 3 |
    
    **Behavior pinned**
    
    | Surface | Contract |
    | --- | --- |
    | `MachineLearningScorerOpDesc` | operatorInfo (`Machine Learning
    Scorer`, MACHINE_LEARNING_GENERAL_GROUP); `getOutputSchemas` branches —
    classification → a `Class` column, regression → empty;
    `generatePythonCode` structure; round-trip |
    | `HyperParameters` | generic config bag —
    `parameter`/`attribute`/`value` null defaults + `parametersSource ==
    false`; `@JsonProperty` wire-keys; `Include.NON_NULL` omits null fields
    on a fresh instance; populated round-trip |
    | `SklearnMLOperatorDescriptor` | abstract base — operatorInfo derived
    from the subclass (`training` + `parameter` inputs depending in order,
    one output, ADVANCED_SKLEARN_GROUP); fixed `Model`/`Parameters`
    `getOutputSchemas`; base-field defaults |
    
    **Note for reviewers:** `SklearnMLOperatorDescriptor` is abstract, so
    its base contract is exercised through a **real** concrete subclass
    (`SklearnAdvancedKNNClassifierTrainerOpDesc`) rather than a test stub —
    a stub placed under `org.apache.texera.amber.operator.*` would be picked
    up by `PythonCodeRawInvalidTextSpec`'s classpath scanner (which
    instantiates every `PythonOperatorDescriptor` and runs `py_compile`).
    Using a real subclass keeps the spec off that scanner.
    
    ### Any related issues, documentation, discussions?
    
    Closes #5875.
    
    ### How was this PR tested?
    
    - `sbt "WorkflowOperator/testOnly
    
org.apache.texera.amber.operator.machineLearning.Scorer.MachineLearningScorerOpDescSpec
    
org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.base.HyperParametersSpec
    
org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.base.SklearnMLOperatorDescriptorSpec"`
    — 14 tests, all green
    - `sbt "WorkflowOperator/Test/scalafmtCheck"` and `sbt
    "WorkflowOperator/Test/scalafix --check"` — clean
    - CI to confirm
    
    ### Was this PR authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (Opus 4.8 [1M context])
---
 .../Scorer/MachineLearningScorerOpDescSpec.scala   | 95 ++++++++++++++++++++++
 .../sklearnAdvanced/base/HyperParametersSpec.scala | 79 ++++++++++++++++++
 .../base/SklearnMLOperatorDescriptorSpec.scala     | 72 ++++++++++++++++
 3 files changed, 246 insertions(+)

diff --git 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/Scorer/MachineLearningScorerOpDescSpec.scala
 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/Scorer/MachineLearningScorerOpDescSpec.scala
new file mode 100644
index 0000000000..14909b5c9f
--- /dev/null
+++ 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/Scorer/MachineLearningScorerOpDescSpec.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.machineLearning.Scorer
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema}
+import org.apache.texera.amber.operator.LogicalOp
+import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+import java.nio.charset.StandardCharsets
+import java.util.Base64
+
+class MachineLearningScorerOpDescSpec extends AnyFlatSpec with Matchers {
+
+  "MachineLearningScorerOpDesc.operatorInfo" should
+    "advertise the name and Machine Learning General group" in {
+    val info = (new MachineLearningScorerOpDesc).operatorInfo
+    info.userFriendlyName shouldBe "Machine Learning Scorer"
+    info.operatorDescription shouldBe "Scorer for machine learning models"
+    info.operatorGroupName shouldBe 
OperatorGroupConstants.MACHINE_LEARNING_GENERAL_GROUP
+    info.inputPorts should have length 1
+    info.outputPorts should have length 1
+  }
+
+  "MachineLearningScorerOpDesc" should "default isRegression false and the 
column fields to empty" in {
+    val d = new MachineLearningScorerOpDesc
+    d.isRegression shouldBe false
+    d.actualValueColumn shouldBe ""
+    d.predictValueColumn shouldBe ""
+    d.classificationMetrics shouldBe empty
+    d.regressionMetrics shouldBe empty
+  }
+
+  "MachineLearningScorerOpDesc.getOutputSchemas" should
+    "include a Class column for classification with no metrics" in {
+    val d = new MachineLearningScorerOpDesc
+    d.getOutputSchemas(Map.empty) shouldBe Map(
+      d.operatorInfo.outputPorts.head.id -> Schema(
+        List(new Attribute("Class", AttributeType.STRING))
+      )
+    )
+  }
+
+  it should "produce an empty schema for regression with no metrics" in {
+    val d = new MachineLearningScorerOpDesc
+    d.isRegression = true
+    val out = d.getOutputSchemas(Map.empty)
+    out.keySet shouldBe Set(d.operatorInfo.outputPorts.head.id)
+    out(d.operatorInfo.outputPorts.head.id).getAttributes shouldBe empty
+  }
+
+  "MachineLearningScorerOpDesc.generatePythonCode" should "emit the scorer 
table operator" in {
+    val d = new MachineLearningScorerOpDesc
+    d.actualValueColumn = "y"
+    d.predictValueColumn = "yhat"
+    val code = d.generatePythonCode()
+    code should include("class ProcessTableOperator(UDFTableOperator)")
+    code should include("from sklearn.metrics import")
+    // actualValueColumn/predictValueColumn are EncodableString: 
base64-encoded into the emitted code.
+    code should 
include(Base64.getEncoder.encodeToString("y".getBytes(StandardCharsets.UTF_8)))
+    code should 
include(Base64.getEncoder.encodeToString("yhat".getBytes(StandardCharsets.UTF_8)))
+  }
+
+  "MachineLearningScorerOpDesc" should "round-trip its config fields through 
the polymorphic base" in {
+    val d = new MachineLearningScorerOpDesc
+    d.isRegression = true
+    d.actualValueColumn = "y"
+    d.predictValueColumn = "yhat"
+    val restored = objectMapper.readValue(objectMapper.writeValueAsString(d), 
classOf[LogicalOp])
+    restored shouldBe a[MachineLearningScorerOpDesc]
+    val s = restored.asInstanceOf[MachineLearningScorerOpDesc]
+    s.isRegression shouldBe true
+    s.actualValueColumn shouldBe "y"
+    s.predictValueColumn shouldBe "yhat"
+  }
+}
diff --git 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/HyperParametersSpec.scala
 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/HyperParametersSpec.scala
new file mode 100644
index 0000000000..d0e6b96153
--- /dev/null
+++ 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/HyperParametersSpec.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.base
+
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+class HyperParametersSpec extends AnyFlatSpec with Matchers {
+
+  "HyperParameters" should
+    "default parameter/attribute/value to null and parametersSource to false" 
in {
+    val h = new HyperParameters[String]
+    h.parameter shouldBe null
+    h.attribute shouldBe null
+    h.value shouldBe null
+    h.parametersSource shouldBe false
+  }
+
+  it should "allow all fields to be assigned post-construction" in {
+    val h = new HyperParameters[String]
+    h.parameter = "alpha"
+    h.attribute = "colA"
+    h.value = "0.5"
+    h.parametersSource = true
+    h.parameter shouldBe "alpha"
+    h.attribute shouldBe "colA"
+    h.value shouldBe "0.5"
+    h.parametersSource shouldBe true
+  }
+
+  "HyperParameters" should "serialize attribute and value under their 
wire-keys" in {
+    val h = new HyperParameters[String]
+    h.attribute = "colA"
+    h.value = "0.5"
+    val tree = objectMapper.readTree(objectMapper.writeValueAsString(h))
+    tree.get("attribute").asText shouldBe "colA"
+    tree.get("value").asText shouldBe "0.5"
+  }
+
+  "HyperParameters JSON" should "omit null fields (Include.NON_NULL) for a 
fresh instance" in {
+    val tree = objectMapper.readTree(objectMapper.writeValueAsString(new 
HyperParameters[String]))
+    tree.has("parameter") shouldBe false
+    tree.has("attribute") shouldBe false
+    tree.has("value") shouldBe false
+    tree.has("parametersSource") shouldBe true
+  }
+
+  it should "round-trip populated fields" in {
+    val h = new HyperParameters[String]
+    h.parameter = "alpha"
+    h.attribute = "colA"
+    h.value = "0.5"
+    h.parametersSource = true
+    val restored =
+      objectMapper.readValue(objectMapper.writeValueAsString(h), 
classOf[HyperParameters[String]])
+    restored.parameter shouldBe "alpha"
+    restored.attribute shouldBe "colA"
+    restored.value shouldBe "0.5"
+    restored.parametersSource shouldBe true
+  }
+}
diff --git 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/SklearnMLOperatorDescriptorSpec.scala
 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/SklearnMLOperatorDescriptorSpec.scala
new file mode 100644
index 0000000000..eac74a54d6
--- /dev/null
+++ 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/machineLearning/sklearnAdvanced/base/SklearnMLOperatorDescriptorSpec.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.base
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema}
+import org.apache.texera.amber.core.workflow.PortIdentity
+import 
org.apache.texera.amber.operator.machineLearning.sklearnAdvanced.KNNTrainer.SklearnAdvancedKNNClassifierTrainerOpDesc
+import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+class SklearnMLOperatorDescriptorSpec extends AnyFlatSpec with Matchers {
+
+  // Exercise the abstract base's contract through a real concrete subclass — 
no test
+  // stub. A stub placed under org.apache.texera.amber.operator.* would be 
picked up by
+  // PythonCodeRawInvalidTextSpec's classpath scanner; using a real subclass 
avoids that.
+  private def newOp(): SklearnMLOperatorDescriptor[_] =
+    new SklearnAdvancedKNNClassifierTrainerOpDesc
+
+  "SklearnMLOperatorDescriptor.operatorInfo" should
+    "derive name/description and advertise the training + parameter inputs and 
one output" in {
+    val info = newOp().operatorInfo
+    info.userFriendlyName shouldBe "KNN Classifier"
+    info.operatorDescription shouldBe "Sklearn KNN Classifier Operator"
+    info.operatorGroupName shouldBe 
OperatorGroupConstants.ADVANCED_SKLEARN_GROUP
+    info.inputPorts should have length 2
+    info.inputPorts.head.id shouldBe PortIdentity(0)
+    info.inputPorts.head.displayName shouldBe "training"
+    info.inputPorts.last.id shouldBe PortIdentity(1)
+    info.inputPorts.last.displayName shouldBe "parameter"
+    info.inputPorts.last.dependencies shouldBe List(PortIdentity(0))
+    info.outputPorts should have length 1
+  }
+
+  "SklearnMLOperatorDescriptor.getOutputSchemas" should
+    "produce the fixed Model/Parameters schema keyed by the declared output 
port" in {
+    val op = newOp()
+    op.getOutputSchemas(Map.empty) shouldBe Map(
+      op.operatorInfo.outputPorts.head.id -> Schema(
+        List(
+          new Attribute("Model", AttributeType.BINARY),
+          new Attribute("Parameters", AttributeType.STRING)
+        )
+      )
+    )
+  }
+
+  "SklearnMLOperatorDescriptor" should
+    "default paraList to empty, groundTruthAttribute to empty, and 
selectedFeatures to null" in {
+    val op = newOp()
+    op.paraList shouldBe empty
+    op.groundTruthAttribute shouldBe ""
+    op.selectedFeatures shouldBe null
+  }
+}

Reply via email to