Github user icexelloss commented on a diff in the pull request: https://github.com/apache/spark/pull/20142#discussion_r159994714 --- Diff: python/pyspark/sql/tests.py --- @@ -3950,6 +3974,33 @@ def test_vectorized_udf_timestamps_respect_session_timezone(self): finally: self.spark.conf.set("spark.sql.session.timeZone", orig_tz) + def test_nondeterministic_udf(self): + # Non-deterministic UDFs should be allowed in select and withColumn + from pyspark.sql.functions import pandas_udf, col + + random_udf = self.random_udf + df = self.spark.range(10) + + result1 = df.select(random_udf(col('id')).alias('rand')).collect() + result2 = df.withColumn('rand', random_udf(col('id'))).collect() + + for row in result1: + self.assertTrue(0.0 <= row.rand < 1.0) + for row in result2: + self.assertTrue(0.0 <= row.rand < 1.0) --- End diff -- I changed the test to be similar to the non-pandas one.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org