Github user BryanCutler commented on a diff in the pull request: https://github.com/apache/spark/pull/18664#discussion_r143885245 --- Diff: python/pyspark/sql/types.py --- @@ -1624,6 +1624,40 @@ def to_arrow_type(dt): return arrow_type +def _localize_series_timestamps(s): + """ Convert a tz-aware timestamp to local tz-naive + """ + return s.dt.tz_localize(None) + + +def _check_localize_series_timestamps(s): + from pandas.types.common import is_datetime64tz_dtype + # TODO: handle nested timestamps? + return _localize_series_timestamps(s) if is_datetime64tz_dtype(s.dtype) else s + + +def _check_localize_dataframe_timestamps(df): + from pandas.types.common import is_datetime64tz_dtype + for column, series in df.iteritems(): + # TODO: handle nested timestamps? + if is_datetime64tz_dtype(series.dtype): + df[column] = _localize_series_timestamps(series) + return df + + +def _convert_series_timestamps(s): + """ Convert a tz-naive timestamp in local tz to UTC normalized + """ + # TODO: this should be system local tz or SESSION_LOCAL_TIMEZONE? + return s.dt.tz_convert("UTC") --- End diff -- this function is called on the returned Series from a `pandas_udf`. I _think_ this will work because internally Spark just wants long values normalized to UTC, and will not do anything with the actual timezone id when read in with `ArrowColumnVector`. What do you think?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org