Github user yanboliang commented on a diff in the pull request: https://github.com/apache/spark/pull/19020#discussion_r140439369 --- Diff: mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala --- @@ -69,19 +69,57 @@ private[regression] trait LinearRegressionParams extends PredictorParams "The solver algorithm for optimization. Supported options: " + s"${supportedSolvers.mkString(", ")}. (Default auto)", ParamValidators.inArray[String](supportedSolvers)) + + /** + * The loss function to be optimized. + * Supported options: "leastSquares" and "huber". + * Default: "leastSquares" + * + * @group param + */ + @Since("2.3.0") + final override val loss: Param[String] = new Param[String](this, "loss", "The loss function to" + + s" be optimized. Supported options: ${supportedLosses.mkString(", ")}. (Default leastSquares)", + ParamValidators.inArray[String](supportedLosses)) + + /** + * The shape parameter to control the amount of robustness. Must be > 1.0. + * At larger values of epsilon, the huber criterion becomes more similar to least squares + * regression; for small values of epsilon, the criterion is more similar to L1 regression. + * Default is 1.35 to get as much robustness as possible while retaining + * 95% statistical efficiency for normally distributed data. + * Only valid when "loss" is "huber". + */ + @Since("2.3.0") + final val epsilon = new DoubleParam(this, "epsilon", "The shape parameter to control the " + + "amount of robustness. Must be > 1.0.", ParamValidators.gt(1.0)) + + /** @group getParam */ + @Since("2.3.0") + def getEpsilon: Double = $(epsilon) + + override protected def validateAndTransformSchema( + schema: StructType, + fitting: Boolean, + featuresDataType: DataType): StructType = { + if ($(loss) == Huber) { + require($(solver)!= Normal, "LinearRegression with huber loss doesn't support " + + "normal solver, please change solver to auto or l-bfgs.") + require($(elasticNetParam) == 0.0, "LinearRegression with huber loss only supports " + + s"L2 regularization, but got elasticNetParam = $getElasticNetParam.") + + } + super.validateAndTransformSchema(schema, fitting, featuresDataType) + } } /** * Linear regression. * - * The learning objective is to minimize the squared error, with regularization. - * The specific squared error loss function used is: - * - * <blockquote> - * $$ - * L = 1/2n ||A coefficients - y||^2^ - * $$ - * </blockquote> + * The learning objective is to minimize the specified loss function, with regularization. + * This supports two loss functions: + * - leastSquares (a.k.a squared loss) --- End diff -- I agree, and I added math formula for both _squaredError_ and _huber_ loss function.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org