Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/22206#discussion_r212497182 --- Diff: python/pyspark/sql/tests.py --- @@ -6394,6 +6394,17 @@ def test_invalid_args(self): df.withColumn('mean_v', mean_udf(df['v']).over(ow)) +class DataSourceV2Tests(ReusedSQLTestCase): + def test_pyspark_udf_SPARK_25213(self): + from pyspark.sql.functions import udf + + df = self.spark.read.format("org.apache.spark.sql.sources.v2.SimpleDataSourceV2").load() + result = df.withColumn('x', udf(lambda x: x, 'int')(df['i'])) --- End diff -- This only tests Project with Scalar PythonUDF? Might be better to also test Filter case.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org