aglinxinyuan commented on code in PR #4827:
URL: https://github.com/apache/texera/pull/4827#discussion_r3177854293


##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/sklearn/SklearnOpDescRegistrySpec.scala:
##########
@@ -0,0 +1,339 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.sklearn
+
+import org.apache.texera.amber.operator.sklearn.training._
+import org.scalatest.flatspec.AnyFlatSpec
+
+/**
+  * Pins the wiring (Python import statement + user-friendly model name) for
+  * every concrete `SklearnClassifierOpDesc` and `SklearnTrainingOpDesc`. A
+  * typo in either string would silently misroute downstream UI labels and
+  * breakage of the generated Python pipeline.
+  */
+class SklearnOpDescRegistrySpec extends AnyFlatSpec {
+
+  // 
---------------------------------------------------------------------------
+  // Classifier registry (24 concrete SklearnClassifierOpDesc subclasses)
+  // 
---------------------------------------------------------------------------
+
+  private val classifierEntries: List[(SklearnClassifierOpDesc, String, 
String)] = List(
+    (
+      new SklearnAdaptiveBoostingOpDesc(),
+      "from sklearn.ensemble import AdaBoostClassifier",
+      "Adaptive Boosting"
+    ),
+    (new SklearnBaggingOpDesc(), "from sklearn.ensemble import 
BaggingClassifier", "Bagging"),
+    (
+      new SklearnBernoulliNaiveBayesOpDesc(),
+      "from sklearn.naive_bayes import BernoulliNB",
+      "Bernoulli Naive Bayes"
+    ),
+    (
+      new SklearnComplementNaiveBayesOpDesc(),
+      "from sklearn.naive_bayes import ComplementNB",
+      "Complement Naive Bayes"
+    ),
+    (
+      new SklearnDecisionTreeOpDesc(),
+      "from sklearn.tree import DecisionTreeClassifier",
+      "Decision Tree"
+    ),
+    (new SklearnExtraTreeOpDesc(), "from sklearn.tree import 
ExtraTreeClassifier", "Extra Tree"),
+    (
+      new SklearnExtraTreesOpDesc(),
+      "from sklearn.ensemble import ExtraTreesClassifier",
+      "Extra Trees"
+    ),
+    (
+      new SklearnGaussianNaiveBayesOpDesc(),
+      "from sklearn.naive_bayes import GaussianNB",
+      "Gaussian Naive Bayes"
+    ),
+    (
+      new SklearnGradientBoostingOpDesc(),
+      "from sklearn.ensemble import GradientBoostingClassifier",
+      "Gradient Boosting"
+    ),
+    (
+      new SklearnKNNOpDesc(),
+      "from sklearn.neighbors import KNeighborsClassifier",
+      "K-nearest Neighbors"
+    ),
+    (
+      new SklearnLinearSVMOpDesc(),
+      "from sklearn.svm import LinearSVC",
+      "Linear Support Vector Machine"
+    ),
+    (
+      new SklearnLogisticRegressionCVOpDesc(),
+      "from sklearn.linear_model import LogisticRegressionCV",
+      "Logistic Regression Cross Validation"
+    ),
+    (
+      new SklearnLogisticRegressionOpDesc(),
+      "from sklearn.linear_model import LogisticRegression",
+      "Logistic Regression"
+    ),
+    (
+      new SklearnMultiLayerPerceptronOpDesc(),
+      "from sklearn.neural_network import MLPClassifier",
+      "Multi-layer Perceptron"
+    ),
+    (
+      new SklearnMultinomialNaiveBayesOpDesc(),
+      "from sklearn.naive_bayes import MultinomialNB",
+      "Multinomial Naive Bayes"
+    ),
+    (
+      new SklearnNearestCentroidOpDesc(),
+      "from sklearn.neighbors import NearestCentroid",
+      "Nearest Centroid"
+    ),
+    (
+      new SklearnPassiveAggressiveOpDesc(),
+      "from sklearn.linear_model import PassiveAggressiveClassifier",
+      "Passive Aggressive"
+    ),
+    (
+      new SklearnPerceptronOpDesc(),
+      "from sklearn.linear_model import Perceptron",
+      "Linear Perceptron"
+    ),
+    (
+      new SklearnProbabilityCalibrationOpDesc(),
+      "from sklearn.calibration import CalibratedClassifierCV",
+      "Probability Calibration"
+    ),
+    (
+      new SklearnRandomForestOpDesc(),
+      "from sklearn.ensemble import RandomForestClassifier",
+      "Random Forest"
+    ),
+    (
+      new SklearnRidgeCVOpDesc(),
+      "from sklearn.linear_model import RidgeClassifierCV",
+      "Ridge Regression Cross Validation"
+    ),
+    (
+      new SklearnRidgeOpDesc(),
+      "from sklearn.linear_model import RidgeClassifier",
+      "Ridge Regression"
+    ),
+    (
+      new SklearnSDGOpDesc(),
+      "from sklearn.linear_model import SGDClassifier",
+      "Stochastic Gradient Descent"
+    ),
+    (new SklearnSVMOpDesc(), "from sklearn.svm import SVC", "Support Vector 
Machine")
+  )
+
+  classifierEntries.foreach {
+    case (desc, expectedImport, expectedName) =>
+      val cls = desc.getClass.getSimpleName
+      cls should s"return import statement '$expectedImport'" in {
+        assert(desc.getImportStatements == expectedImport)
+      }
+      it should s"return user-friendly model name '$expectedName'" in {
+        assert(desc.getUserFriendlyModelName == expectedName)
+      }
+  }
+
+  "SklearnClassifierOpDesc base class" should "default to empty strings before 
subclass overrides" in {
+    val anonymous = new SklearnClassifierOpDesc {}
+    assert(anonymous.getImportStatements == "")
+    assert(anonymous.getUserFriendlyModelName == "")
+  }
+
+  it should "embed the import statement into generatePythonCode for a concrete 
subclass" in {
+    val desc = new SklearnLogisticRegressionOpDesc()
+    desc.target = "y"
+    desc.countVectorizer = false
+    // `tfidfTransformer` is a val on the base class, defaults to false.
+    val code = desc.generatePythonCode()
+    assert(code.contains("from sklearn.linear_model import 
LogisticRegression"))
+    // Classifier OpDescs emit a UDFTableOperator pipeline.
+    assert(code.contains("ProcessTableOperator"))
+  }
+
+  // 
---------------------------------------------------------------------------
+  // Training registry (26 concrete SklearnTrainingOpDesc subclasses)
+  // 
---------------------------------------------------------------------------
+
+  private val trainingEntries: List[(SklearnTrainingOpDesc, String, String)] = 
List(
+    (
+      new SklearnTrainingAdaptiveBoostingOpDesc(),
+      "from sklearn.ensemble import AdaBoostClassifier",
+      "Training: Adaptive Boosting"
+    ),
+    (
+      new SklearnTrainingBaggingOpDesc(),
+      "from sklearn.ensemble import BaggingClassifier",
+      "Training: Bagging Training" // current source value (typo-style 
duplication preserved)

Review Comment:
   Done in d37d1ae5e8 — corrected the `getUserFriendlyModelName` to `Training: 
Bagging` to match the rest of the SklearnTraining* registry pattern, and 
updated the spec expectation accordingly.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to