Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20537#discussion_r167133597 --- Diff: python/pyspark/sql/types.py --- @@ -1744,8 +1744,27 @@ def _check_series_convert_timestamps_internal(s, timezone): from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype # TODO: handle nested timestamps, such as ArrayType(TimestampType())? if is_datetime64_dtype(s.dtype): + # tz_localize with ambiguous=False has the same behavior of pytz.localize + # >>> import datetime + # >>> import pandas as pd + # >>> import pytz + # >>> + # >>> t = datetime.datetime(2015, 11, 1, 1, 23, 24) + # >>> ts = pd.Series([t]) + # >>> tz = pytz.timezone('America/New_York') + # >>> + # >>> ts.dt.tz_localize(tz, ambiguous=False) + # 0 2015-11-01 01:23:24-05:00 + # dtype: datetime64[ns, America/New_York] + # >>> + # >>> ts.dt.tz_localize(tz, ambiguous=True) + # 0 2015-11-01 01:23:24-04:00 + # dtype: datetime64[ns, America/New_York] + # >>> + # >>> str(tz.localize(t)) + # '2015-11-01 01:23:24-05:00' --- End diff -- @icexelloss, I got that it's good to know but shall we describe it as a prose? This comment looks a format of a doctest but they are actually just in comments. It would be nicer if we just have a explanation in the comments, not as a doctest format.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org