Github user BryanCutler commented on a diff in the pull request: https://github.com/apache/spark/pull/21650#discussion_r202865674 --- Diff: python/pyspark/sql/tests.py --- @@ -5471,6 +5598,22 @@ def foo(_): self.assertEqual(r.a, 'hi') self.assertEqual(r.b, 1) + def test_mixed_udf(self): + # Test Pandas UDF and scalar Python UDF followed by groupby apply + from pyspark.sql.functions import udf, pandas_udf, PandasUDFType + import pandas as pd + + df = self.spark.range(0, 10).toDF('v1') + df = df.withColumn('v2', udf(lambda x: x + 1, 'int')(df['v1'])) + df = df.withColumn('v3', pandas_udf(lambda x: x + 2, 'int')(df['v1'])) --- End diff -- could you just chain the `withColumn` calls here? I think it's clearer than reassigning the df each time
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org