I am facing an error while trying to save Dataframe containing datetime
field into MySQL table.
What I am doing is:
1. Reading data from MySQL table which has fields of type datetime in MySQL.
2. Process Dataframe.
3. Store/Save Dataframe back into another MySQL table.

While creating table, spark is creating table in MySQL with type timestamp
for field which were datetime in MySQL.

For reference, I am adding dataframe contents:
scala> data.select("date_from","date_to").show(5, false)
+---------------------+---------------------+
|date_from            |date_to              |
+---------------------+---------------------+
|1899-12-31 18:00:00.0|2199-12-31 23:59:59.0|
|1899-12-31 18:00:00.0|2199-12-31 23:59:59.0|
|1899-12-31 18:00:00.0|2199-12-31 23:59:59.0|
|1899-12-31 18:00:00.0|2199-12-31 23:59:59.0|
|1899-12-31 18:00:00.0|2199-12-31 23:59:59.0|
+---------------------+---------------------+


And while trying to insert data in new table, I am getting following error:


=========================================================================
scala> data.write.mode("overwrite").jdbc(url,"test_service_table", prop)
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0
in stage 11.0 failed 1 times, most recent failure: Lost task 0.0 in stage
11.0 (TID 11, localhost): java.sql.BatchUpdateException: Data truncation:
Incorrect datetime value: '1899-12-31 15:00:00' for column 'date_from' at
row 1
        at
com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:2028)
        at
com.mysql.jdbc.PreparedStatement.executeBatch(PreparedStatement.java:1451)
        at
org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.savePartition(JdbcUtils.scala:215)
        at
org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$saveTable$1.apply(JdbcUtils.scala:277)
        at
org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$saveTable$1.apply(JdbcUtils.scala:276)
        at
org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$33.apply(RDD.scala:920)
        at
org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$33.apply(RDD.scala:920)
        at
org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858)
        at
org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858)
        at
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
        at org.apache.spark.scheduler.Task.run(Task.scala:89)
        at
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
Source)
        at java.lang.Thread.run(Unknown Source)
Caused by: com.mysql.jdbc.MysqlDataTruncation: Data truncation: Incorrect
datetime value: '1899-12-31 15:00:00' for column 'date_from' at row 1
        at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3607)
        at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3541)
        at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2002)
        at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2163)
        at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2624)
        at
com.mysql.jdbc.PreparedStatement.executeInternal(PreparedStatement.java:2127)
        at
com.mysql.jdbc.PreparedStatement.executeUpdate(PreparedStatement.java:2427)
        at
com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:1980)
        ... 14 more

Driver stacktrace:
        at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
        at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
        at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
        at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        at
scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
        at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
        at
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
        at
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
        at scala.Option.foreach(Option.scala:236)
        at
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
        at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
        at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
        at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
        at
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929)
        at
org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:920)
        at
org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:918)
        at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
        at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
        at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
        at org.apache.spark.rdd.RDD.foreachPartition(RDD.scala:918)
        at
org.apache.spark.sql.DataFrame$$anonfun$foreachPartition$1.apply$mcV$sp(DataFrame.scala:1444)
        at
org.apache.spark.sql.DataFrame$$anonfun$foreachPartition$1.apply(DataFrame.scala:1444)
        at
org.apache.spark.sql.DataFrame$$anonfun$foreachPartition$1.apply(DataFrame.scala:1444)
        at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:56)
        at
org.apache.spark.sql.DataFrame.withNewExecutionId(DataFrame.scala:2086)
        at
org.apache.spark.sql.DataFrame.foreachPartition(DataFrame.scala:1443)
        at
org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.saveTable(JdbcUtils.scala:276)
        at
org.apache.spark.sql.DataFrameWriter.jdbc(DataFrameWriter.scala:311)
        at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:33)
        at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:38)
        at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:40)
        at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:42)
        at $iwC$$iwC$$iwC$$iwC.<init>(<console>:44)
        at $iwC$$iwC$$iwC.<init>(<console>:46)
        at $iwC$$iwC.<init>(<console>:48)
        at $iwC.<init>(<console>:50)
        at <init>(<console>:52)
        at .<init>(<console>:56)
        at .<clinit>(<console>)
        at .<init>(<console>:7)
        at .<clinit>(<console>)
        at $print(<console>)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
        at java.lang.reflect.Method.invoke(Unknown Source)
        at
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
        at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)
        at
org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
        at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
        at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
        at
org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
        at
org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
        at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
        at
org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
        at
org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
        at org.apache.spark.repl.SparkILoop.org
$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
        at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
        at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
        at
org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:945)
        at
scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
        at org.apache.spark.repl.SparkILoop.org
$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:945)
        at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1059)
        at org.apache.spark.repl.Main$.main(Main.scala:31)
        at org.apache.spark.repl.Main.main(Main.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
        at java.lang.reflect.Method.invoke(Unknown Source)
        at
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:742)
        at
org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
        at
org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.sql.BatchUpdateException: Data truncation: Incorrect
datetime value: '1899-12-31 15:00:00' for column 'date_from' at row 1
        at
com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:2028)
        at
com.mysql.jdbc.PreparedStatement.executeBatch(PreparedStatement.java:1451)
        at
org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.savePartition(JdbcUtils.scala:215)
        at
org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$saveTable$1.apply(JdbcUtils.scala:277)
        at
org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$saveTable$1.apply(JdbcUtils.scala:276)
        at
org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$33.apply(RDD.scala:920)
        at
org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$33.apply(RDD.scala:920)
        at
org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858)
        at
org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1858)
        at
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
        at org.apache.spark.scheduler.Task.run(Task.scala:89)
        at
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown
Source)
        at java.lang.Thread.run(Unknown Source)
Caused by: com.mysql.jdbc.MysqlDataTruncation: Data truncation: Incorrect
datetime value: '1899-12-31 15:00:00' for column 'date_from' at row 1
        at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3607)
        at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:3541)
        at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2002)
        at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2163)
        at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2624)
        at
com.mysql.jdbc.PreparedStatement.executeInternal(PreparedStatement.java:2127)
        at
com.mysql.jdbc.PreparedStatement.executeUpdate(PreparedStatement.java:2427)
        at
com.mysql.jdbc.PreparedStatement.executeBatchSerially(PreparedStatement.java:1980)
        ... 14 more
============================================================================

Any help on this is really appreciated.

Regards
Dhaval

Reply via email to