[ https://issues.apache.org/jira/browse/SPARK-41730?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
ASF GitHub Bot updated SPARK-41730: ----------------------------------- Labels: pull-request-available (was: ) > `min` fails on the minimal timestamp > ------------------------------------ > > Key: SPARK-41730 > URL: https://issues.apache.org/jira/browse/SPARK-41730 > Project: Spark > Issue Type: Bug > Components: PySpark > Affects Versions: 3.4.0 > Reporter: Max Gekk > Assignee: Max Gekk > Priority: Major > Labels: pull-request-available > > The code below demonstrates the issue: > {code:python} > >>> from datetime import datetime, timezone > >>> from pyspark.sql.types import TimestampType > >>> from pyspark.sql import functions as F > >>> ts = spark.createDataFrame([datetime(1, 1, 1, 0, 0, 0, 0, > >>> tzinfo=timezone.utc)], TimestampType()).toDF("test_column") > >>> ts.select(F.min('test_column')).first()[0] > Traceback (most recent call last): > File "<stdin>", line 1, in <module> > File "/Users/maximgekk/proj/apache-spark/python/pyspark/sql/dataframe.py", > line 2762, in first > return self.head() > File "/Users/maximgekk/proj/apache-spark/python/pyspark/sql/dataframe.py", > line 2738, in head > rs = self.head(1) > File "/Users/maximgekk/proj/apache-spark/python/pyspark/sql/dataframe.py", > line 2740, in head > return self.take(n) > File "/Users/maximgekk/proj/apache-spark/python/pyspark/sql/dataframe.py", > line 1297, in take > return self.limit(num).collect() > File "/Users/maximgekk/proj/apache-spark/python/pyspark/sql/dataframe.py", > line 1198, in collect > return list(_load_from_socket(sock_info, > BatchedSerializer(CPickleSerializer()))) > File "/Users/maximgekk/proj/apache-spark/python/pyspark/serializers.py", > line 152, in load_stream > yield self._read_with_length(stream) > File "/Users/maximgekk/proj/apache-spark/python/pyspark/serializers.py", > line 174, in _read_with_length > return self.loads(obj) > File "/Users/maximgekk/proj/apache-spark/python/pyspark/serializers.py", > line 472, in loads > return cloudpickle.loads(obj, encoding=encoding) > File "/Users/maximgekk/proj/apache-spark/python/pyspark/sql/types.py", line > 2010, in <lambda> > return lambda *a: dataType.fromInternal(a) > File "/Users/maximgekk/proj/apache-spark/python/pyspark/sql/types.py", line > 1018, in fromInternal > values = [ > File "/Users/maximgekk/proj/apache-spark/python/pyspark/sql/types.py", line > 1019, in <listcomp> > f.fromInternal(v) if c else v > File "/Users/maximgekk/proj/apache-spark/python/pyspark/sql/types.py", line > 667, in fromInternal > return self.dataType.fromInternal(obj) > File "/Users/maximgekk/proj/apache-spark/python/pyspark/sql/types.py", line > 279, in fromInternal > return datetime.datetime.fromtimestamp(ts // > 1000000).replace(microsecond=ts % 1000000) > ValueError: year 0 is out of range > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org