Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/22732#discussion_r225281705 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala --- @@ -2137,36 +2137,27 @@ class Analyzer( case p => p transformExpressionsUp { - case udf@ScalaUDF(func, _, inputs, _, _, _, _, nullableTypes) => - if (nullableTypes.isEmpty) { - // If no nullability info is available, do nothing. No fields will be specially - // checked for null in the plan. If nullability info is incorrect, the results - // of the UDF could be wrong. - udf - } else { - // Otherwise, add special handling of null for fields that can't accept null. - // The result of operations like this, when passed null, is generally to return null. - assert(nullableTypes.length == inputs.length) - - // TODO: skip null handling for not-nullable primitive inputs after we can completely - // trust the `nullable` information. - val needsNullCheck = (nullable: Boolean, expr: Expression) => - nullable && !expr.isInstanceOf[KnownNotNull] - val inputsNullCheck = nullableTypes.zip(inputs) - .filter { case (nullableType, expr) => needsNullCheck(!nullableType, expr) } - .map { case (_, expr) => IsNull(expr) } - .reduceLeftOption[Expression]((e1, e2) => Or(e1, e2)) - // Once we add an `If` check above the udf, it is safe to mark those checked inputs - // as not nullable (i.e., wrap them with `KnownNotNull`), because the null-returning - // branch of `If` will be called if any of these checked inputs is null. Thus we can - // prevent this rule from being applied repeatedly. - val newInputs = nullableTypes.zip(inputs).map { case (nullable, expr) => - if (nullable) expr else KnownNotNull(expr) - } - inputsNullCheck - .map(If(_, Literal.create(null, udf.dataType), udf.copy(children = newInputs))) - .getOrElse(udf) - } + case udf @ ScalaUDF(_, _, inputs, handleNullForInputs, _, _, _, _) + if !handleNullForInputs.forall(!_) => + // Otherwise, add special handling of null for fields that can't accept null. + // The result of operations like this, when passed null, is generally to return null. + assert(handleNullForInputs.length == inputs.length) + + // TODO: skip null handling for not-nullable primitive inputs after we can completely + // trust the `nullable` information. + val inputsNullCheck = handleNullForInputs.zip(inputs) --- End diff -- The TODO above seems to apply to the line that was removed? I'm not totally clear. It's OK to not check `!expr.isInstanceOf[KnownNotNull]` now? you know this better than I. Hey if tests pass I think it proves it.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org