Github user mengxr commented on a diff in the pull request:

    https://github.com/apache/spark/pull/11694#discussion_r56057691
  
    --- Diff: 
mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
 ---
    @@ -633,3 +755,179 @@ object GeneralizedLinearRegressionModel extends 
MLReadable[GeneralizedLinearRegr
         }
       }
     }
    +
    +/**
    + * :: Experimental ::
    + * Summarizing Generalized Linear regression Fits.
    + *
    + * @param predictions predictions outputted by the model's `transform` 
method
    + * @param predictionCol field in "predictions" which gives the prediction 
value of each instance
    + * @param family the family object of the model
    + * @param link the link object of the model
    + * @param model the model that should be summarized
    + * @param diagInvAtWA diagonal of matrix (A^T * W * A)^-1 in the last 
iteration
    + * @param numIterations number of iterations
    + */
    +@Since("2.0.0")
    +@Experimental
    +class GeneralizedLinearRegressionSummary private[regression] (
    +    @transient val predictions: DataFrame,
    +    val predictionCol: String,
    +    val family: GeneralizedLinearRegression.Family,
    +    val link: GeneralizedLinearRegression.Link,
    +    val model: GeneralizedLinearRegressionModel,
    +    private val diagInvAtWA: Array[Double],
    +    val numIterations: Int) extends Serializable {
    +
    +  import GeneralizedLinearRegression._
    +
    +  /** Number of instances in DataFrame predictions */
    +  lazy val numInstances: Long = predictions.count()
    +
    +  /** The numeric rank of the fitted linear model */
    +  lazy val rank: Long = if (model.getFitIntercept) {
    +    model.coefficients.size + 1
    +  } else {
    +    model.coefficients.size
    +  }
    +
    +  /** Degrees of freedom */
    +  lazy val degreesOfFreedom: Long = {
    +    numInstances - rank
    +  }
    +
    +  /** The residual degrees of freedom */
    +  lazy val residualDegreeOfFreedom: Long = degreesOfFreedom
    +
    +  /** The residual degrees of freedom for the null model */
    +  lazy val residualDegreeOfFreedomNull: Long = if (model.getFitIntercept) {
    +    numInstances - 1
    +  } else {
    +    numInstances
    +  }
    +
    +  private lazy val devianceResiduals: DataFrame = {
    +    val drUDF = udf { (y: Double, mu: Double, weight: Double) =>
    +      val r = math.sqrt(math.max(family.deviance(y, mu, weight), 0.0))
    +      if (y > mu) r else -1.0 * r
    +    }
    +    val w = if (model.getWeightCol.isEmpty) lit(1.0) else 
col(model.getWeightCol)
    +    predictions.select(
    +      drUDF(col(model.getLabelCol), col(predictionCol), 
w).as("devianceResiduals"))
    +  }
    +
    +  private lazy val pearsonResiduals: DataFrame = {
    +    val prUDF = udf { mu: Double => family.variance(mu) }
    +    val w = if (model.getWeightCol.isEmpty) lit(1.0) else 
col(model.getWeightCol)
    +    predictions.select(col(model.getLabelCol).minus(col(predictionCol))
    +      
.multiply(sqrt(w)).divide(sqrt(prUDF(col(predictionCol)))).as("pearsonResiduals"))
    +  }
    +
    +  private lazy val workingResiduals: DataFrame = {
    +    val wrUDF = udf { (y: Double, mu: Double) => (y - mu) * link.deriv(mu) 
}
    +    predictions.select(wrUDF(col(model.getLabelCol), 
col(predictionCol)).as("workingResiduals"))
    +  }
    +
    +  private lazy val responseResiduals: DataFrame = {
    +    
predictions.select(col(model.getLabelCol).minus(col(predictionCol)).as("responseResiduals"))
    +  }
    +
    +  /**
    +   * Get the residuals of the fitted model by type.
    +   * @param residualsType The type of residuals which should be returned.
    +   *                      Supported options: deviance(default), pearson, 
working and response.
    +   */
    +  def residuals(residualsType: String = "deviance"): DataFrame = {
    --- End diff --
    
    We shall not use default values for Java compatibility. Overload 
`residuals` instead.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to