[ https://issues.apache.org/jira/browse/SPARK-32226?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17156287#comment-17156287 ]
Chen Zhang commented on SPARK-32226: ------------------------------------ Hello [~thesuperzapper], The dialect of Informix database is not implemented in spark source code. You can run code similar to the following to register a dialect about the Informix database, which may solve your program errors. {code:scala} import java.sql.Timestamp import java.text.SimpleDateFormat import java.util.Locale import org.apache.spark.sql.jdbc.JdbcDialect import org.apache.spark.sql.jdbc.JdbcDialects case object InformixDialect extends JdbcDialect { override def canHandle(url: String): Boolean = url.toLowerCase(Locale.ROOT).startsWith("jdbc:informix") override def compileValue(value: Any): Any = value match { case timestampValue: Timestamp => val dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS") val timestampString = dateFormat.format(timestampValue) "'" + timestampString + "'" case v => super.compileValue(v) } } JdbcDialects.registerDialect(InformixDialect) {code} > JDBC TimeStamp predicates always append `.0` > -------------------------------------------- > > Key: SPARK-32226 > URL: https://issues.apache.org/jira/browse/SPARK-32226 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.0.0 > Reporter: Mathew Wicks > Priority: Major > > If you have an Informix column with type `DATETIME YEAR TO SECOND`, Informix > will not let you pass a filter of the form `2020-01-01 00:00:00.0` (with the > `.0` at the end). > > In Spark 3.0.0, our predicate pushdown will alway append this `.0` to the end > of a TimeStamp column filter, even if you don't specify it: > {code:java} > df.where("col1 > '2020-01-01 00:00:00'") > {code} > > I think we should only pass the `.XXX` suffix if the user passes it in the > filter, for example: > {code:java} > df.where("col1 > '2020-01-01 00:00:00.123'") > {code} > > The relevant Spark class is: > {code:java} > org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampToString > {code} > > To aid people searching for this error, here is the error emitted by spark: > {code:java} > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2023) > at > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:1972) > at > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:1971) > at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) > at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1971) > at > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:950) > at > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:950) > at scala.Option.foreach(Option.scala:407) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:950) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2203) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2152) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2141) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) > at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:752) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2093) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114) > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2133) > at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:467) > at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:420) > at > org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:47) > at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3625) > at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2695) > at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3616) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3614) > at org.apache.spark.sql.Dataset.head(Dataset.scala:2695) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2902) > at org.apache.spark.sql.Dataset.getRows(Dataset.scala:300) > at org.apache.spark.sql.Dataset.showString(Dataset.scala:337) > at org.apache.spark.sql.Dataset.show(Dataset.scala:824) > at org.apache.spark.sql.Dataset.show(Dataset.scala:783) > at org.apache.spark.sql.Dataset.show(Dataset.scala:792) > ... 47 elided > Caused by: java.sql.SQLException: Extra characters at the end of a datetime > or interval. > at com.informix.util.IfxErrMsg.buildExceptionWithMessage(IfxErrMsg.java:416) > at com.informix.util.IfxErrMsg.buildIsamException(IfxErrMsg.java:401) > at com.informix.jdbc.IfxSqli.addException(IfxSqli.java:3096) > at com.informix.jdbc.IfxSqli.receiveError(IfxSqli.java:3368) > at com.informix.jdbc.IfxSqli.dispatchMsg(IfxSqli.java:2292) > at com.informix.jdbc.IfxSqli.receiveMessage(IfxSqli.java:2217) > at com.informix.jdbc.IfxSqli.executePrepare(IfxSqli.java:1213) > at > com.informix.jdbc.IfxPreparedStatement.setupExecutePrepare(IfxPreparedStatement.java:245) > at > com.informix.jdbc.IfxPreparedStatement.processSQL(IfxPreparedStatement.java:229) > at > com.informix.jdbc.IfxPreparedStatement.<init>(IfxPreparedStatement.java:119) > at > com.informix.jdbc.IfxSqliConnect.prepareStatement(IfxSqliConnect.java:5905) > at > com.informix.jdbc.IfxSqliConnect.prepareStatement(IfxSqliConnect.java:2474) > at > org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:301) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) > at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:313) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:127) > at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:444) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:447) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org