diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index f0a9a0400e392..71559af624f6c 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -1568,6 +1568,18 @@ def test_datetime_at_epoch(self): self.assertEqual(first['date'], epoch) self.assertEqual(first['lit_date'], epoch) + # regression test for SPARK-20787 + def test_datetype_accepts_calendar_dates(self): + df1 = self.spark.createDataFrame(self.sc.parallelize([[datetime.datetime(1899, 12, 31)]])) + df2 = self.spark.createDataFrame(self.sc.parallelize([[datetime.datetime(100, 1, 1)]])) + try: + counted1 = df1.count() + counted2 = df2.count() + self.assertEqual(counted1, 1) + self.assertEqual(counted2, 1) + except Exception: + self.fail("Internal conversion should handle years 100-1899") + def test_decimal(self): from decimal import Decimal schema = StructType([StructField("decimal", DecimalType(10, 5))]) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 26b54a7fb3709..d8e10e4d11289 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -187,8 +187,12 @@ def needConversion(self): def toInternal(self, dt): if dt is not None: - seconds = (calendar.timegm(dt.utctimetuple()) if dt.tzinfo - else time.mktime(dt.timetuple())) + # Avoiding the invalid range of years (100-1899) for mktime in Python < 3 + if dt.year > 1899 or dt.year < 100: + seconds = (calendar.timegm(dt.utctimetuple()) if dt.tzinfo + else time.mktime(dt.timetuple())) + else: + seconds = calendar.timegm(dt.utctimetuple()) return int(seconds) * 1000000 + dt.microsecond def fromInternal(self, ts):
With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org