I am facing an error while trying to save Dataframe containing datetime field into MySQL table. What I am doing is: 1. Reading data from MySQL table which has fields of type datetime in MySQL. 2. Process Dataframe. 3. Store/Save Dataframe back into another MySQL table.
While creating table, spark is creating table in MySQL with type timestamp for field which were datetime in MySQL. For reference, I am adding dataframe contents: scala> data.select("date_from","date_to").show(5, false) +---------------------+---------------------+ |date_from |date_to | +---------------------+---------------------+ |1899-12-31 18:00:00.0|2199-12-31 23:59:59.0| |1899-12-31 18:00:00.0|2199-12-31 23:59:59.0| |1899-12-31 18:00:00.0|2199-12-31 23:59:59.0| |1899-12-31 18:00:00.0|2199-12-31 23:59:59.0| |1899-12-31 18:00:00.0|2199-12-31 23:59:59.0| +---------------------+---------------------+ And while trying to insert data in new table, I am getting following error: ========================================================================= scala> data.write.mode("overwrite").jdbc(url,"test_service_table", prop) org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 11.0 failed 1 times, most recent failure: Lost task 0.0 in stage 11.0 (TID 11, localhost): java.sql.BatchUpdateException: Data truncation: Incorrect datetime value: '1899-12-31 15:00:00' for column 'date_from' at row 1 at com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:2028) at com.mysql.jdbc.PreparedStatement.executeBatch(PreparedStatement.java:1451) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.savePartition(JdbcUtils.scala:215) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$saveTable$1.apply(JdbcUtils.scala:277) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$saveTable$1.apply(JdbcUtils.scala:276) at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$33.apply(RDD.scala:920) at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$33.apply(RDD.scala:920) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) at java.lang.Thread.run(Unknown Source) Caused by: com.mysql.jdbc.MysqlDataTruncation: Data truncation: Incorrect datetime value: '1899-12-31 15:00:00' for column 'date_from' at row 1 at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3607) at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3541) at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2002) at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2163) at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2624) at com.mysql.jdbc.PreparedStatement.executeInternal(PreparedStatement.java:2127) at com.mysql.jdbc.PreparedStatement.executeUpdate(PreparedStatement.java:2427) at com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:1980) ... 14 more Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929) at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:920) at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:918) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) at org.apache.spark.rdd.RDD.foreachPartition(RDD.scala:918) at org.apache.spark.sql.DataFrame$$anonfun$foreachPartition$1.apply$mcV$sp(DataFrame.scala:1444) at org.apache.spark.sql.DataFrame$$anonfun$foreachPartition$1.apply(DataFrame.scala:1444) at org.apache.spark.sql.DataFrame$$anonfun$foreachPartition$1.apply(DataFrame.scala:1444) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:56) at org.apache.spark.sql.DataFrame.withNewExecutionId(DataFrame.scala:2086) at org.apache.spark.sql.DataFrame.foreachPartition(DataFrame.scala:1443) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.saveTable(JdbcUtils.scala:276) at org.apache.spark.sql.DataFrameWriter.jdbc(DataFrameWriter.scala:311) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:38) at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40) at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42) at $iwC$$iwC$$iwC$$iwC.<init>(<console>:44) at $iwC$$iwC$$iwC.<init>(<console>:46) at $iwC$$iwC.<init>(<console>:48) at $iwC.<init>(<console>:50) at <init>(<console>:52) at .<init>(<console>:56) at .<clinit>(<console>) at .<init>(<console>:7) at .<clinit>(<console>) at $print(<console>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) at java.lang.reflect.Method.invoke(Unknown Source) at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346) at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857) at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902) at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814) at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657) at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665) at org.apache.spark.repl.SparkILoop.org $apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945) at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) at org.apache.spark.repl.SparkILoop.org $apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945) at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059) at org.apache.spark.repl.Main$.main(Main.scala:31) at org.apache.spark.repl.Main.main(Main.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) at java.lang.reflect.Method.invoke(Unknown Source) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:742) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.sql.BatchUpdateException: Data truncation: Incorrect datetime value: '1899-12-31 15:00:00' for column 'date_from' at row 1 at com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:2028) at com.mysql.jdbc.PreparedStatement.executeBatch(PreparedStatement.java:1451) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.savePartition(JdbcUtils.scala:215) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$saveTable$1.apply(JdbcUtils.scala:277) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$saveTable$1.apply(JdbcUtils.scala:276) at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$33.apply(RDD.scala:920) at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$33.apply(RDD.scala:920) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214) at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) at java.lang.Thread.run(Unknown Source) Caused by: com.mysql.jdbc.MysqlDataTruncation: Data truncation: Incorrect datetime value: '1899-12-31 15:00:00' for column 'date_from' at row 1 at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3607) at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3541) at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2002) at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2163) at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2624) at com.mysql.jdbc.PreparedStatement.executeInternal(PreparedStatement.java:2127) at com.mysql.jdbc.PreparedStatement.executeUpdate(PreparedStatement.java:2427) at com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:1980) ... 14 more ============================================================================ Any help on this is really appreciated. Regards Dhaval