Github user BryanCutler commented on a diff in the pull request:

    https://github.com/apache/spark/pull/18664#discussion_r143885245
  
    --- Diff: python/pyspark/sql/types.py ---
    @@ -1624,6 +1624,40 @@ def to_arrow_type(dt):
         return arrow_type
     
     
    +def _localize_series_timestamps(s):
    +    """ Convert a tz-aware timestamp to local tz-naive
    +    """
    +    return s.dt.tz_localize(None)
    +
    +
    +def _check_localize_series_timestamps(s):
    +    from pandas.types.common import is_datetime64tz_dtype
    +    # TODO: handle nested timestamps?
    +    return _localize_series_timestamps(s) if 
is_datetime64tz_dtype(s.dtype) else s
    +
    +
    +def _check_localize_dataframe_timestamps(df):
    +    from pandas.types.common import is_datetime64tz_dtype
    +    for column, series in df.iteritems():
    +        # TODO: handle nested timestamps?
    +        if is_datetime64tz_dtype(series.dtype):
    +            df[column] = _localize_series_timestamps(series)
    +    return df
    +
    +
    +def _convert_series_timestamps(s):
    +    """ Convert a tz-naive timestamp in local tz to UTC normalized
    +    """
    +    # TODO: this should be system local tz or SESSION_LOCAL_TIMEZONE?
    +    return s.dt.tz_convert("UTC")
    --- End diff --
    
    this function is called on the returned Series from a `pandas_udf`.  I 
_think_ this will work because internally Spark just wants long values 
normalized to UTC, and will not do anything with the actual timezone id when 
read in with `ArrowColumnVector`.  What do you think?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to