[ https://issues.apache.org/jira/browse/SPARK-13699?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15181780#comment-15181780 ]
Dhaval Modi edited comment on SPARK-13699 at 3/5/16 5:30 PM: ------------------------------------------------------------- ================== Code Snippet ======================================= val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc); val src=sqlContext.sql("select * from src_table"); val tgt=sqlContext.sql("select * from tgt_table"); var tgtFinal=tgt.filter("currind = 'N'"); //Add to final table val tgtActive=tgt.filter("currind = 'Y'"); #src.select("col1").except(src.select("col1").as('a).join(tgtActive.select("col1").as('b),"col1")) val newTgt1 = tgtActive.as('a).join(src.as('b),$"a.col1" === $"b.col1") #val newTgt2 = tgtActive.except(newTgt1.select("a.*")); tgtFinal = tgtFinal.unionAll(tgtActive.except(newTgt1.select("a.*"))); var srcInsert = src.except(newTgt1.select("b.*")) import org.apache.spark.sql._ val inBatchID = udf((t:String) => "13" ) val inCurrInd = udf((t:String) => "Y" ) val NCurrInd = udf((t:String) => "N" ) val endDate = udf((t:String) => "9999-12-31 23:59:59") tgtFinal = tgtFinal.unionAll(newTgt1.select("a.*").withColumn("currInd", NCurrInd(col("col1"))).withColumn("endDate", current_timestamp()).withColumn("updateDate", current_timestamp())) srcInsert = src.withColumn("batchId", inBatchID(col("col1"))).withColumn("currInd", inCurrInd(col("col1"))).withColumn("startDate", current_timestamp()).withColumn("endDate", date_format(endDate(col("col1")),"yyyy-MM-dd HH:mm:ss")).withColumn("updateDate", current_timestamp()) tgtFinal = tgtFinal.unionAll(srcInsert) tgtFinal.write.mode(SaveMode.Overwrite).saveAsTable("tgt_table") ======================= Code Snippet ================================= was (Author: mysti): ================== Code Snippet ======================================= val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc); val src=sqlContext.sql("select * from src_table"); val tgt=sqlContext.sql("select * from tgt_table"); var tgtFinal=tgt.filter("currind = 'N'"); //Add to final table val tgtActive=tgt.filter("currind = 'Y'"); #src.select("col1").except(src.select("col1").as('a).join(tgtActive.select("col1").as('b),"col1")) val newTgt1 = tgtActive.as('a).join(src.as('b),$"a.col1" === $"b.col1") #val newTgt2 = tgtActive.except(newTgt1.select("a.*")); tgtFinal = tgtFinal.unionAll(tgtActive.except(newTgt1.select("a.*"))); var srcInsert = src.except(newTgt1.select("b.*")) import org.apache.spark.sql._ val inBatchID = udf((t:String) => "13" ) val inCurrInd = udf((t:String) => "Y" ) val NCurrInd = udf((t:String) => "N" ) val endDate = udf((t:String) => "9999-12-31 23:59:59") tgtFinal = tgtFinal.unionAll(newTgt1.select("a.*").withColumn("currInd", NCurrInd(col("col1"))).withColumn("endDate", current_timestamp()).withColumn("updateDate", current_timestamp())) srcInsert = src.withColumn("batchId", inBatchID(col("col1"))).withColumn("currInd", inCurrInd(col("col1"))).withColumn("startDate", current_timestamp()).withColumn("endDate", date_format(endDate(col("col1")),"yyyy-MM-dd HH:mm:ss")).withColumn("updateDate", current_timestamp()) tgtFinal = tgtFinal.unionAll(srcInsert) tgtFinal.write.mode(SaveMode.Overwrite).saveAsTable(tgt_table) ======================= Code Snippet ================================= > Spark SQL drops the table in "overwrite" mode while writing into table > ---------------------------------------------------------------------- > > Key: SPARK-13699 > URL: https://issues.apache.org/jira/browse/SPARK-13699 > Project: Spark > Issue Type: Improvement > Components: SQL > Affects Versions: 1.6.0 > Reporter: Dhaval Modi > Attachments: stackTrace.txt > > > Hi, > While writing the dataframe to HIVE table with "SaveMode.Overwrite" option. > E.g. > tgtFinal.write.mode(SaveMode.Overwrite).saveAsTable("tgt_table") > sqlContext drop the table instead of truncating. > This is causing error while overwriting. > Adding stacktrace & commands to reproduce the issue, > Thanks & Regards, > Dhaval -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org