Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/18664#discussion_r145024991 --- Diff: python/pyspark/sql/tests.py --- @@ -3383,6 +3403,42 @@ def test_vectorized_udf_varargs(self): res = df.select(f(col('id'))) self.assertEquals(df.collect(), res.collect()) + def test_vectorized_udf_timestamps(self): + from pyspark.sql.functions import pandas_udf, col + from datetime import date, datetime + schema = StructType([ + StructField("idx", LongType(), True), + StructField("date", DateType(), True), + StructField("timestamp", TimestampType(), True)]) + data = [(0, date(1969, 1, 1), datetime(1969, 1, 1, 1, 1, 1)), + (1, date(2012, 2, 2), datetime(2012, 2, 2, 2, 2, 2)), + (2, date(2100, 3, 3), datetime(2100, 3, 3, 3, 3, 3)), + (3, date(2104, 4, 4), datetime(2104, 4, 4, 4, 4, 4))] + + df = self.spark.createDataFrame(data, schema=schema) + + # Check that a timestamp passed through a pandas_udf will not be altered by timezone calc + identity = pandas_udf(lambda t: t, returnType=TimestampType()) + df = df.withColumn("timestamp_copy", identity(col("timestamp"))) + + @pandas_udf(returnType=BooleanType()) + def check_data(idx, date, timestamp, timestamp_copy): + is_equal = timestamp == timestamp_copy + if is_equal.all(): + for i in xrange(len(is_equal)): --- End diff -- Let's just use `range`. seems failed in Python 3 due to missing `xrange`.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org