[ https://issues.apache.org/jira/browse/SPARK-39209?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
chong updated SPARK-39209: -------------------------- Description: Got Error when cast a big enough long to a timestamp in ANSI mode, should get the max timestamp according to the code in Cast.scala: {code:java} private[this] def longToTimestamp(t: Long): Long = SECONDS.toMicros(t) // the logic of SECONDS.toMicros is: static long x(long d, long m, long over) { if (d > Long.MAX_VALUE / 1000000L) return Long.MAX_VALUE; if (d < -(Long.MAX_VALUE / 1000000L)) return Long.MIN_VALUE; return d * m; }{code} Reproduce steps: {code:java} $SPARK_HOME/bin/spark-shell import spark.implicits._ val df = Seq((Long.MaxValue / 1000000) + 1).toDF("a") df.selectExpr("cast(a as timestamp)").collect() // the result is right Array[org.apache.spark.sql.Row] = Array([294247-01-10 12:00:54.775807]) import org.apache.spark.sql.types._ import org.apache.spark.sql.Row val schema = StructType(Array(StructField("a", LongType))) val data = Seq(Row((Long.MaxValue / 1000000) + 1)) val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) df.selectExpr("cast(a as timestamp)").collect() // error occurs: java.lang.RuntimeException: Error while decoding: java.lang.ArithmeticException: long overflow createexternalrow(staticinvoke(class org.apache.spark.sql.catalyst.util.DateTimeUtils$, ObjectType(class java.sql.Timestamp), toJavaTimestamp, input[0, timestamp, true], true, false), StructField(a,TimestampType,true)) at org.apache.spark.sql.errors.QueryExecutionErrors$.expressionDecodingError(QueryExecutionErrors.scala:1047) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:184) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:172) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36) at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198) at scala.collection.TraversableLike.map(TraversableLike.scala:286) at scala.collection.TraversableLike.map$(TraversableLike.scala:279) at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198) at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3715) at org.apache.spark.sql.Dataset.$anonfun$collect$1(Dataset.scala:2971) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3706) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3704) at org.apache.spark.sql.Dataset.collect(Dataset.scala:2971) ... 51 elided Caused by: java.lang.ArithmeticException: long overflow at java.lang.Math.multiplyExact(Math.java:892) at org.apache.spark.sql.catalyst.util.DateTimeUtils$.millisToMicros(DateTimeUtils.scala:213) at org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:362) at org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:386) at org.apache.spark.sql.catalyst.util.DateTimeUtils$.toJavaTimestamp(DateTimeUtils.scala:146) at org.apache.spark.sql.catalyst.util.DateTimeUtils.toJavaTimestamp(DateTimeUtils.scala) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:181) ... 69 more {code} was: Got Error when cast a big enough long to a timestamp in ANSI mode, should get the max timestamp according to the code in Cast.scala: {code:java} private[this] def longToTimestamp(t: Long): Long = SECONDS.toMicros(t) // the logic of SECONDS.toMicros is: static long x(long d, long m, long over) { if (d > Long.MAX_VALUE / 1000000L) return Long.MAX_VALUE; if (d < -(Long.MAX_VALUE / 1000000L)) return Long.MIN_VALUE; return d * m; }{code} Reproduce steps: {code:java} $SPARK_HOME/bin/spark-shell import spark.implicits._ val df = Seq((Long.MaxValue / 1000000) + 1).toDF("a") df.selectExpr("cast(a as timestamp)").collect() // the result is right Array[org.apache.spark.sql.Row] = Array([294247-01-10 12:00:54.775807]) import org.apache.spark.sql.types._ import org.apache.spark.sql.Row val schema = StructType(Array(StructField("a", LongType))) val data = Seq(Row((Long.MaxValue / 1000000) + 1)) val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) df.selectExpr("cast(a as timestamp)").collect() // error occurs: java.lang.RuntimeException: Error while decoding: java.lang.ArithmeticException: long overflow createexternalrow(staticinvoke(class org.apache.spark.sql.catalyst.util.DateTimeUtils$, ObjectType(class java.sql.Timestamp), toJavaTimestamp, input[0, timestamp, true], true, false, true), StructField(a,TimestampType,true)) at org.apache.spark.sql.errors.QueryExecutionErrors$.expressionDecodingError(QueryExecutionErrors.scala:1157) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:184) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:172) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36) at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198) at scala.collection.TraversableLike.map(TraversableLike.scala:286) at scala.collection.TraversableLike.map$(TraversableLike.scala:279) at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198) at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3864) at org.apache.spark.sql.Dataset.$anonfun$collect$1(Dataset.scala:3119) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3855) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:109) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:169) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:95) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3853) at org.apache.spark.sql.Dataset.collect(Dataset.scala:3119) ... 55 elided Caused by: java.lang.ArithmeticException: long overflow at java.lang.Math.multiplyExact(Math.java:892) at org.apache.spark.sql.catalyst.util.DateTimeUtils$.millisToMicros(DateTimeUtils.scala:240) at org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:370) at org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:390) at org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:411) at org.apache.spark.sql.catalyst.util.DateTimeUtils$.toJavaTimestamp(DateTimeUtils.scala:162) at org.apache.spark.sql.catalyst.util.DateTimeUtils.toJavaTimestamp(DateTimeUtils.scala) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source) at org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:181) ... 73 more {code} > Error occurs when cast a big enough long to timestamp in ANSI mode > -------------------------------------------------------------------- > > Key: SPARK-39209 > URL: https://issues.apache.org/jira/browse/SPARK-39209 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.3.0 > Environment: Spark 3.3.0 > Reporter: chong > Priority: Major > > > Got Error when cast a big enough long to a timestamp in ANSI mode, should get > the max timestamp according to the code in Cast.scala: > > {code:java} > private[this] def longToTimestamp(t: Long): Long = SECONDS.toMicros(t) > // the logic of SECONDS.toMicros is: > static long x(long d, long m, long over) { > if (d > Long.MAX_VALUE / 1000000L) return Long.MAX_VALUE; > if (d < -(Long.MAX_VALUE / 1000000L)) return Long.MIN_VALUE; > return d * m; > }{code} > > > Reproduce steps: > {code:java} > $SPARK_HOME/bin/spark-shell > import spark.implicits._ > val df = Seq((Long.MaxValue / 1000000) + 1).toDF("a") > df.selectExpr("cast(a as timestamp)").collect() > // the result is right Array[org.apache.spark.sql.Row] = Array([294247-01-10 > 12:00:54.775807]) > > import org.apache.spark.sql.types._ > import org.apache.spark.sql.Row > val schema = StructType(Array(StructField("a", LongType))) > val data = Seq(Row((Long.MaxValue / 1000000) + 1)) > val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) > df.selectExpr("cast(a as timestamp)").collect() > > // error occurs: > java.lang.RuntimeException: Error while decoding: > java.lang.ArithmeticException: long overflow > createexternalrow(staticinvoke(class > org.apache.spark.sql.catalyst.util.DateTimeUtils$, ObjectType(class > java.sql.Timestamp), toJavaTimestamp, input[0, timestamp, true], true, > false), StructField(a,TimestampType,true)) > at > org.apache.spark.sql.errors.QueryExecutionErrors$.expressionDecodingError(QueryExecutionErrors.scala:1047) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:184) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:172) > at > scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) > at > scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36) > at > scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198) > at scala.collection.TraversableLike.map(TraversableLike.scala:286) > at scala.collection.TraversableLike.map$(TraversableLike.scala:279) > at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198) > at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3715) > at org.apache.spark.sql.Dataset.$anonfun$collect$1(Dataset.scala:2971) > at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3706) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3704) > at org.apache.spark.sql.Dataset.collect(Dataset.scala:2971) > ... 51 elided > Caused by: java.lang.ArithmeticException: long overflow > at java.lang.Math.multiplyExact(Math.java:892) > at > org.apache.spark.sql.catalyst.util.DateTimeUtils$.millisToMicros(DateTimeUtils.scala:213) > at > org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:362) > at > org.apache.spark.sql.catalyst.util.RebaseDateTime$.rebaseGregorianToJulianMicros(RebaseDateTime.scala:386) > at > org.apache.spark.sql.catalyst.util.DateTimeUtils$.toJavaTimestamp(DateTimeUtils.scala:146) > at > org.apache.spark.sql.catalyst.util.DateTimeUtils.toJavaTimestamp(DateTimeUtils.scala) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown > Source) > at > org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Deserializer.apply(ExpressionEncoder.scala:181) > ... 69 more > {code} > -- This message was sent by Atlassian Jira (v8.20.7#820007) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org