Github user BryanCutler commented on a diff in the pull request: https://github.com/apache/spark/pull/18664#discussion_r147207020 --- Diff: python/pyspark/sql/types.py --- @@ -1619,11 +1619,38 @@ def to_arrow_type(dt): arrow_type = pa.decimal(dt.precision, dt.scale) elif type(dt) == StringType: arrow_type = pa.string() + elif type(dt) == DateType: + arrow_type = pa.date32() + elif type(dt) == TimestampType: + # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read + arrow_type = pa.timestamp('us', tz='UTC') else: raise TypeError("Unsupported type in conversion to Arrow: " + str(dt)) return arrow_type +def _check_dataframe_localize_timestamps(df): + """ Convert timezone aware timestamps to timezone-naive in local time + """ + from pandas.api.types import is_datetime64tz_dtype + for column, series in df.iteritems(): + # TODO: handle nested timestamps, such as ArrayType(TimestampType())? + if is_datetime64tz_dtype(series.dtype): + df[column] = series.dt.tz_convert('tzlocal()').dt.tz_localize(None) + return df + + +def _check_series_convert_timestamps_internal(s): + """ Convert a tz-naive timestamp in local tz to UTC normalized for Spark internal storage + """ + from pandas.api.types import is_datetime64_dtype + # TODO: handle nested timestamps, such as ArrayType(TimestampType())? + if is_datetime64_dtype(s.dtype): + return s.dt.tz_localize('tzlocal()').dt.tz_convert('UTC') + else: + return s --- End diff -- Here is what I found, for the actual internal data it doesn't matter. Changing the timezone on a series is just a metadata operation so the same data will be transferred back to Spark regardless ``` In [101]: ts = pd.Timestamp(1, unit='D', tz='America/New_York') In [102]: ts.value Out[102]: 86400000000000 In [103]: ts.tz_convert('UTC').value Out[103]: 86400000000000 ``` However, to be consistent we should make sure the tz is UTC so I'll add this along with a test to make sure.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org