This is an automated email from the ASF dual-hosted git repository.

github-merge-queue[bot] pushed a commit to branch 
gh-readonly-queue/main/pr-5925-ad908b74857b86f4bc6087b61fcbf9a54f738edb
in repository https://gitbox.apache.org/repos/asf/texera.git

commit e17de6fd622daa340594dd268b6d74632cfb08c8
Author: Xinyuan Lin <[email protected]>
AuthorDate: Fri Jun 26 01:35:47 2026 -0700

    test(workflow-operator): add unit test coverage for Sklearn Naive Bayes 
descriptors (#5925)
    
    ### What changes were proposed in this PR?
    
    Pin behavior of the four previously-untested Sklearn Naive Bayes
    classifier descriptors in `common/workflow-operator`. No production-code
    changes.
    
    | Spec | Source class | Tests |
    | --- | --- | --- |
    | `SklearnBernoulliNaiveBayesOpDescSpec` |
    `SklearnBernoulliNaiveBayesOpDesc` | 5 |
    | `SklearnComplementNaiveBayesOpDescSpec` |
    `SklearnComplementNaiveBayesOpDesc` | 5 |
    | `SklearnGaussianNaiveBayesOpDescSpec` |
    `SklearnGaussianNaiveBayesOpDesc` | 5 |
    | `SklearnMultinomialNaiveBayesOpDescSpec` |
    `SklearnMultinomialNaiveBayesOpDesc` | 5 |
    
    **Behavior pinned**
    
    | Surface | Contract |
    | --- | --- |
    | `operatorInfo` | exact model name + `Sklearn <name> Operator`
    description; Sklearn group; training/testing input ports + one blocking
    output |
    | field defaults | `countVectorizer`/`tfidfTransformer` `false`;
    `target`/`text` `null` |
    | `getOutputSchemas` | `model_name` (STRING) + `model` (BINARY) keyed by
    the declared output port |
    | `generatePythonCode` | imports and instantiates the matching sklearn
    estimator (e.g. `BernoulliNB`) via `make_pipeline` |
    | Round-trip | config fields preserved through the polymorphic
    `LogicalOp` base, with the correct `operatorType` discriminator |
    
    ### Any related issues, documentation, discussions?
    
    Part of the ongoing `workflow-operator` unit-test coverage effort.
    
    ### How was this PR tested?
    
    - `sbt "WorkflowOperator/testOnly *SklearnBernoulliNaiveBayesOpDescSpec
    *SklearnComplementNaiveBayesOpDescSpec
    *SklearnGaussianNaiveBayesOpDescSpec
    *SklearnMultinomialNaiveBayesOpDescSpec"` — 20 tests, all green
    - `sbt "WorkflowOperator/Test/scalafmtCheck"` and `sbt
    "WorkflowOperator/scalafixAll --check"` — clean
    - CI to confirm
    
    ### Was this PR authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (Opus 4.8 [1M context])
---
 .../SklearnBernoulliNaiveBayesOpDescSpec.scala     | 81 ++++++++++++++++++++++
 .../SklearnComplementNaiveBayesOpDescSpec.scala    | 81 ++++++++++++++++++++++
 .../SklearnGaussianNaiveBayesOpDescSpec.scala      | 81 ++++++++++++++++++++++
 .../SklearnMultinomialNaiveBayesOpDescSpec.scala   | 81 ++++++++++++++++++++++
 4 files changed, 324 insertions(+)

diff --git 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnBernoulliNaiveBayesOpDescSpec.scala
 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnBernoulliNaiveBayesOpDescSpec.scala
new file mode 100644
index 0000000000..b34d9b2e1b
--- /dev/null
+++ 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnBernoulliNaiveBayesOpDescSpec.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.sklearn
+
+import org.apache.texera.amber.core.tuple.AttributeType
+import org.apache.texera.amber.operator.LogicalOp
+import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+class SklearnBernoulliNaiveBayesOpDescSpec extends AnyFlatSpec with Matchers {
+
+  "SklearnBernoulliNaiveBayesOpDesc.operatorInfo" should
+    "advertise the model name, Sklearn group, and the training/testing port 
shape" in {
+    val info = (new SklearnBernoulliNaiveBayesOpDesc).operatorInfo
+    info.userFriendlyName shouldBe "Bernoulli Naive Bayes"
+    info.operatorDescription shouldBe "Sklearn Bernoulli Naive Bayes Operator"
+    info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_GROUP
+    info.inputPorts.map(_.displayName) shouldBe List("training", "testing")
+    info.outputPorts should have length 1
+    info.outputPorts.head.blocking shouldBe true
+  }
+
+  "SklearnBernoulliNaiveBayesOpDesc" should "default its config fields" in {
+    val d = new SklearnBernoulliNaiveBayesOpDesc
+    d.countVectorizer shouldBe false
+    d.tfidfTransformer shouldBe false
+    d.target shouldBe null
+    d.text shouldBe null
+  }
+
+  "SklearnBernoulliNaiveBayesOpDesc.getOutputSchemas" should
+    "emit the model_name/model schema keyed by the declared output port" in {
+    val d = new SklearnBernoulliNaiveBayesOpDesc
+    val schema = 
d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
+    schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
+    schema.getAttribute("model").getType shouldBe AttributeType.BINARY
+  }
+
+  "SklearnBernoulliNaiveBayesOpDesc.generatePythonCode" should
+    "import and instantiate the BernoulliNB estimator" in {
+    val d = new SklearnBernoulliNaiveBayesOpDesc
+    d.target = "y"
+    val code = d.generatePythonCode()
+    code should include("from sklearn.naive_bayes import BernoulliNB")
+    code should include("make_pipeline")
+    code should include("Bernoulli Naive Bayes")
+  }
+
+  "SklearnBernoulliNaiveBayesOpDesc" should
+    "round-trip its config fields through the polymorphic base" in {
+    val d = new SklearnBernoulliNaiveBayesOpDesc
+    d.target = "label"
+    d.countVectorizer = true
+    val json = objectMapper.writeValueAsString(d)
+    json should include("\"operatorType\":\"SklearnBernoulliNaiveBayes\"")
+    val restored = objectMapper.readValue(json, classOf[LogicalOp])
+    restored shouldBe a[SklearnBernoulliNaiveBayesOpDesc]
+    val r = restored.asInstanceOf[SklearnBernoulliNaiveBayesOpDesc]
+    r.target shouldBe "label"
+    r.countVectorizer shouldBe true
+  }
+}
diff --git 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnComplementNaiveBayesOpDescSpec.scala
 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnComplementNaiveBayesOpDescSpec.scala
new file mode 100644
index 0000000000..0d50eb4577
--- /dev/null
+++ 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnComplementNaiveBayesOpDescSpec.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.sklearn
+
+import org.apache.texera.amber.core.tuple.AttributeType
+import org.apache.texera.amber.operator.LogicalOp
+import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+class SklearnComplementNaiveBayesOpDescSpec extends AnyFlatSpec with Matchers {
+
+  "SklearnComplementNaiveBayesOpDesc.operatorInfo" should
+    "advertise the model name, Sklearn group, and the training/testing port 
shape" in {
+    val info = (new SklearnComplementNaiveBayesOpDesc).operatorInfo
+    info.userFriendlyName shouldBe "Complement Naive Bayes"
+    info.operatorDescription shouldBe "Sklearn Complement Naive Bayes Operator"
+    info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_GROUP
+    info.inputPorts.map(_.displayName) shouldBe List("training", "testing")
+    info.outputPorts should have length 1
+    info.outputPorts.head.blocking shouldBe true
+  }
+
+  "SklearnComplementNaiveBayesOpDesc" should "default its config fields" in {
+    val d = new SklearnComplementNaiveBayesOpDesc
+    d.countVectorizer shouldBe false
+    d.tfidfTransformer shouldBe false
+    d.target shouldBe null
+    d.text shouldBe null
+  }
+
+  "SklearnComplementNaiveBayesOpDesc.getOutputSchemas" should
+    "emit the model_name/model schema keyed by the declared output port" in {
+    val d = new SklearnComplementNaiveBayesOpDesc
+    val schema = 
d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
+    schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
+    schema.getAttribute("model").getType shouldBe AttributeType.BINARY
+  }
+
+  "SklearnComplementNaiveBayesOpDesc.generatePythonCode" should
+    "import and instantiate the ComplementNB estimator" in {
+    val d = new SklearnComplementNaiveBayesOpDesc
+    d.target = "y"
+    val code = d.generatePythonCode()
+    code should include("from sklearn.naive_bayes import ComplementNB")
+    code should include("make_pipeline")
+    code should include("Complement Naive Bayes")
+  }
+
+  "SklearnComplementNaiveBayesOpDesc" should
+    "round-trip its config fields through the polymorphic base" in {
+    val d = new SklearnComplementNaiveBayesOpDesc
+    d.target = "label"
+    d.countVectorizer = true
+    val json = objectMapper.writeValueAsString(d)
+    json should include("\"operatorType\":\"SklearnComplementNaiveBayes\"")
+    val restored = objectMapper.readValue(json, classOf[LogicalOp])
+    restored shouldBe a[SklearnComplementNaiveBayesOpDesc]
+    val r = restored.asInstanceOf[SklearnComplementNaiveBayesOpDesc]
+    r.target shouldBe "label"
+    r.countVectorizer shouldBe true
+  }
+}
diff --git 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnGaussianNaiveBayesOpDescSpec.scala
 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnGaussianNaiveBayesOpDescSpec.scala
new file mode 100644
index 0000000000..9c25894dc1
--- /dev/null
+++ 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnGaussianNaiveBayesOpDescSpec.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.sklearn
+
+import org.apache.texera.amber.core.tuple.AttributeType
+import org.apache.texera.amber.operator.LogicalOp
+import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+class SklearnGaussianNaiveBayesOpDescSpec extends AnyFlatSpec with Matchers {
+
+  "SklearnGaussianNaiveBayesOpDesc.operatorInfo" should
+    "advertise the model name, Sklearn group, and the training/testing port 
shape" in {
+    val info = (new SklearnGaussianNaiveBayesOpDesc).operatorInfo
+    info.userFriendlyName shouldBe "Gaussian Naive Bayes"
+    info.operatorDescription shouldBe "Sklearn Gaussian Naive Bayes Operator"
+    info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_GROUP
+    info.inputPorts.map(_.displayName) shouldBe List("training", "testing")
+    info.outputPorts should have length 1
+    info.outputPorts.head.blocking shouldBe true
+  }
+
+  "SklearnGaussianNaiveBayesOpDesc" should "default its config fields" in {
+    val d = new SklearnGaussianNaiveBayesOpDesc
+    d.countVectorizer shouldBe false
+    d.tfidfTransformer shouldBe false
+    d.target shouldBe null
+    d.text shouldBe null
+  }
+
+  "SklearnGaussianNaiveBayesOpDesc.getOutputSchemas" should
+    "emit the model_name/model schema keyed by the declared output port" in {
+    val d = new SklearnGaussianNaiveBayesOpDesc
+    val schema = 
d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
+    schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
+    schema.getAttribute("model").getType shouldBe AttributeType.BINARY
+  }
+
+  "SklearnGaussianNaiveBayesOpDesc.generatePythonCode" should
+    "import and instantiate the GaussianNB estimator" in {
+    val d = new SklearnGaussianNaiveBayesOpDesc
+    d.target = "y"
+    val code = d.generatePythonCode()
+    code should include("from sklearn.naive_bayes import GaussianNB")
+    code should include("make_pipeline")
+    code should include("Gaussian Naive Bayes")
+  }
+
+  "SklearnGaussianNaiveBayesOpDesc" should
+    "round-trip its config fields through the polymorphic base" in {
+    val d = new SklearnGaussianNaiveBayesOpDesc
+    d.target = "label"
+    d.countVectorizer = true
+    val json = objectMapper.writeValueAsString(d)
+    json should include("\"operatorType\":\"SklearnGaussianNaiveBayes\"")
+    val restored = objectMapper.readValue(json, classOf[LogicalOp])
+    restored shouldBe a[SklearnGaussianNaiveBayesOpDesc]
+    val r = restored.asInstanceOf[SklearnGaussianNaiveBayesOpDesc]
+    r.target shouldBe "label"
+    r.countVectorizer shouldBe true
+  }
+}
diff --git 
a/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnMultinomialNaiveBayesOpDescSpec.scala
 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnMultinomialNaiveBayesOpDescSpec.scala
new file mode 100644
index 0000000000..3a6b4debfd
--- /dev/null
+++ 
b/common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnMultinomialNaiveBayesOpDescSpec.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.sklearn
+
+import org.apache.texera.amber.core.tuple.AttributeType
+import org.apache.texera.amber.operator.LogicalOp
+import org.apache.texera.amber.operator.metadata.OperatorGroupConstants
+import org.apache.texera.amber.util.JSONUtils.objectMapper
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+class SklearnMultinomialNaiveBayesOpDescSpec extends AnyFlatSpec with Matchers 
{
+
+  "SklearnMultinomialNaiveBayesOpDesc.operatorInfo" should
+    "advertise the model name, Sklearn group, and the training/testing port 
shape" in {
+    val info = (new SklearnMultinomialNaiveBayesOpDesc).operatorInfo
+    info.userFriendlyName shouldBe "Multinomial Naive Bayes"
+    info.operatorDescription shouldBe "Sklearn Multinomial Naive Bayes 
Operator"
+    info.operatorGroupName shouldBe OperatorGroupConstants.SKLEARN_GROUP
+    info.inputPorts.map(_.displayName) shouldBe List("training", "testing")
+    info.outputPorts should have length 1
+    info.outputPorts.head.blocking shouldBe true
+  }
+
+  "SklearnMultinomialNaiveBayesOpDesc" should "default its config fields" in {
+    val d = new SklearnMultinomialNaiveBayesOpDesc
+    d.countVectorizer shouldBe false
+    d.tfidfTransformer shouldBe false
+    d.target shouldBe null
+    d.text shouldBe null
+  }
+
+  "SklearnMultinomialNaiveBayesOpDesc.getOutputSchemas" should
+    "emit the model_name/model schema keyed by the declared output port" in {
+    val d = new SklearnMultinomialNaiveBayesOpDesc
+    val schema = 
d.getOutputSchemas(Map.empty)(d.operatorInfo.outputPorts.head.id)
+    schema.getAttribute("model_name").getType shouldBe AttributeType.STRING
+    schema.getAttribute("model").getType shouldBe AttributeType.BINARY
+  }
+
+  "SklearnMultinomialNaiveBayesOpDesc.generatePythonCode" should
+    "import and instantiate the MultinomialNB estimator" in {
+    val d = new SklearnMultinomialNaiveBayesOpDesc
+    d.target = "y"
+    val code = d.generatePythonCode()
+    code should include("from sklearn.naive_bayes import MultinomialNB")
+    code should include("make_pipeline")
+    code should include("Multinomial Naive Bayes")
+  }
+
+  "SklearnMultinomialNaiveBayesOpDesc" should
+    "round-trip its config fields through the polymorphic base" in {
+    val d = new SklearnMultinomialNaiveBayesOpDesc
+    d.target = "label"
+    d.countVectorizer = true
+    val json = objectMapper.writeValueAsString(d)
+    json should include("\"operatorType\":\"SklearnMultinomialNaiveBayes\"")
+    val restored = objectMapper.readValue(json, classOf[LogicalOp])
+    restored shouldBe a[SklearnMultinomialNaiveBayesOpDesc]
+    val r = restored.asInstanceOf[SklearnMultinomialNaiveBayesOpDesc]
+    r.target shouldBe "label"
+    r.countVectorizer shouldBe true
+  }
+}

Reply via email to