Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/18664#discussion_r145024991
  
    --- Diff: python/pyspark/sql/tests.py ---
    @@ -3383,6 +3403,42 @@ def test_vectorized_udf_varargs(self):
             res = df.select(f(col('id')))
             self.assertEquals(df.collect(), res.collect())
     
    +    def test_vectorized_udf_timestamps(self):
    +        from pyspark.sql.functions import pandas_udf, col
    +        from datetime import date, datetime
    +        schema = StructType([
    +            StructField("idx", LongType(), True),
    +            StructField("date", DateType(), True),
    +            StructField("timestamp", TimestampType(), True)])
    +        data = [(0, date(1969, 1, 1), datetime(1969, 1, 1, 1, 1, 1)),
    +                (1, date(2012, 2, 2), datetime(2012, 2, 2, 2, 2, 2)),
    +                (2, date(2100, 3, 3), datetime(2100, 3, 3, 3, 3, 3)),
    +                (3, date(2104, 4, 4), datetime(2104, 4, 4, 4, 4, 4))]
    +
    +        df = self.spark.createDataFrame(data, schema=schema)
    +
    +        # Check that a timestamp passed through a pandas_udf will not be 
altered by timezone calc
    +        identity = pandas_udf(lambda t: t, returnType=TimestampType())
    +        df = df.withColumn("timestamp_copy", identity(col("timestamp")))
    +
    +        @pandas_udf(returnType=BooleanType())
    +        def check_data(idx, date, timestamp, timestamp_copy):
    +            is_equal = timestamp == timestamp_copy
    +            if is_equal.all():
    +                for i in xrange(len(is_equal)):
    --- End diff --
    
    Let's just use `range`. seems failed in Python 3 due to missing `xrange`.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to