Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/18906#discussion_r162857996 --- Diff: python/pyspark/sql/tests.py --- @@ -602,6 +602,30 @@ def test_non_existed_udf(self): self.assertRaisesRegexp(AnalysisException, "Can not load class non_existed_udf", lambda: sqlContext.registerJavaFunction("udf1", "non_existed_udf")) + def test_udf_no_nulls(self): + from pyspark.sql.functions import udf + plus_four = udf(lambda x: x + 4, IntegerType()).asNonNullable() + df = self.spark.range(10) + res = df.select(plus_four(df['id']).alias('plus_four')) + self.assertFalse(plus_four.nullable) + self.assertFalse(res.schema['plus_four'].nullable) + self.assertEqual(res.agg({'plus_four': 'sum'}).collect()[0][0], 85) + + def test_udf_with_callable_no_nulls(self): + df = self.spark.range(10) + + class PlusFour: + def __call__(self, col): + if col is not None: + return col + 4 --- End diff -- We need `else` clause to be non-nullable?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org