[jira] [Resolved] (SPARK-15595) DataFrame.write.mode(SaveMode.Append).insertInto(TABLE) overwrites non-partitioned table
[ https://issues.apache.org/jira/browse/SPARK-15595?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sudarshan Lamkhede resolved SPARK-15595. Resolution: Invalid Seems to be specific to the custom spark distribution I am using. > DataFrame.write.mode(SaveMode.Append).insertInto(TABLE) overwrites > non-partitioned table > > > Key: SPARK-15595 > URL: https://issues.apache.org/jira/browse/SPARK-15595 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.5.2 >Reporter: Sudarshan Lamkhede > > See the examples below > {noformat} > scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS noparts (model_name > STRING, dateint INT) STORED AS PARQUET""") > res0: org.apache.spark.sql.DataFrame = [result: string] > scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS parts (model_name STRING) > PARTITIONED BY (dateint INT) STORED AS PARQUET""") > res1: org.apache.spark.sql.DataFrame = [result: string] > scala> sqlContext.sql("select * from noparts").show() > +--+---+ > |model_name|dateint| > +--+---+ > +--+---+ > scala> sqlContext.sql("select * from parts").show() > +--+---+ > |model_name|dateint| > +--+---+ > +--+---+ > scala> import sqlContext.implicits._ > import sqlContext.implicits._ > scala> val df1 = sc.parallelize(Array(("before", 1)), 1).toDF("model_name", > "dateint") > df1: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] > scala> val df2 = sc.parallelize(Array(("after", 2)), 1).toDF("model_name", > "dateint") > df2: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] > scala> import org.apache.spark.sql.SaveMode > import org.apache.spark.sql.SaveMode > scala> df1.write.mode(SaveMode.Append).insertInto("noparts") > {noformat} > This inserts one record > {noformat} > scala> sqlContext.sql("select * from noparts").show() > +--+---+ > |model_name|dateint| > +--+---+ > |before| 1| > +--+---+ > {noformat} > But subsequent writes overwrite it > {noformat} > scala> df2.write.mode(SaveMode.Append).insertInto("noparts") > scala> sqlContext.sql("select * from noparts").show() > +--+---+ > |model_name|dateint| > +--+---+ > | after| 2| > +--+---+ > {noformat} > That does not happen with partitioned table > {noformat} > scala> df1.write.mode(SaveMode.Append).insertInto("parts") > scala> sqlContext.sql("select * from parts").show() > +--+---+ > |model_name|dateint| > +--+---+ > |before| 1| > +--+---+ > scala> df2.write.mode(SaveMode.Append).insertInto("parts") > scala> sqlContext.sql("select * from parts").show() > +--+---+ > |model_name|dateint| > +--+---+ > |before| 1| > | after| 2| > +--+---+ > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Updated] (SPARK-15595) DataFrame.write.mode(SaveMode.Append).insertInto(TABLE) overwrites non-partitioned table
[ https://issues.apache.org/jira/browse/SPARK-15595?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sudarshan Lamkhede updated SPARK-15595: --- Description: See the examples below {noformat} scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS noparts (model_name STRING, dateint INT) STORED AS PARQUET""") res0: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS parts (model_name STRING) PARTITIONED BY (dateint INT) STORED AS PARQUET""") res1: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> import sqlContext.implicits._ import sqlContext.implicits._ scala> val df1 = sc.parallelize(Array(("before", 1)), 1).toDF("model_name", "dateint") df1: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> val df2 = sc.parallelize(Array(("after", 2)), 1).toDF("model_name", "dateint") df2: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> import org.apache.spark.sql.SaveMode import org.apache.spark.sql.SaveMode scala> df1.write.mode(SaveMode.Append).insertInto("noparts") {noformat} This inserts one record {noformat} scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ {noformat} But subsequent writes overwrite it {noformat} scala> df2.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ | after| 2| +--+---+ {noformat} That does not happen with partitioned table {noformat} scala> df1.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| | after| 2| +--+---+ {noformat} was: See the examples below {noformat} scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS noparts (model_name STRING, dateint INT) STORED AS PARQUET""") res0: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS parts (model_name STRING) PARTITIONED BY (dateint INT) STORED AS PARQUET""") res1: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> import sqlContext.implicits._ import sqlContext.implicits._ scala> val df1 = sc.parallelize(Array(("before", 1)), 1).toDF("model_name", "dateint") df1: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> val df2 = sc.parallelize(Array(("after", 2)), 1).toDF("model_name", "dateint") df2: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> import org.apache.spark.sql.SaveMode import org.apache.spark.sql.SaveMode scala> df1.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ | after| 2| +--+---+ scala> df1.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| | after| 2| +--+---+ {noformat} > DataFrame.write.mode(SaveMode.Append).insertInto(TABLE) overwrites > non-partitioned table > > > Key: SPARK-15595 > URL: https://issues.apache.org/jira/browse/SPARK-15595 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.5.2 >Reporter: Sudarshan Lamkhede > > See the examples below >
[jira] [Updated] (SPARK-15595) DataFrame.write.mode(SaveMode.Append).insertInto(TABLE) overwrites non-partitioned table
[ https://issues.apache.org/jira/browse/SPARK-15595?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sudarshan Lamkhede updated SPARK-15595: --- Description: See the examples below {noformat} scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS noparts (model_name STRING, dateint INT) STORED AS PARQUET""") res0: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS parts (model_name STRING) PARTITIONED BY (dateint INT) STORED AS PARQUET""") res1: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> import sqlContext.implicits._ import sqlContext.implicits._ scala> val df1 = sc.parallelize(Array(("before", 1)), 1).toDF("model_name", "dateint") df1: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> val df2 = sc.parallelize(Array(("after", 2)), 1).toDF("model_name", "dateint") df2: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> import org.apache.spark.sql.SaveMode import org.apache.spark.sql.SaveMode scala> df1.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ | after| 2| +--+---+ scala> df1.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| | after| 2| +--+---+ {noformat} was: See the examples below scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS noparts (model_name STRING, dateint INT) STORED AS PARQUET""") res0: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS parts (model_name STRING) PARTITIONED BY (dateint INT) STORED AS PARQUET""") res1: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> import sqlContext.implicits._ import sqlContext.implicits._ scala> val df1 = sc.parallelize(Array(("before", 1)), 1).toDF("model_name", "dateint") df1: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> val df2 = sc.parallelize(Array(("after", 2)), 1).toDF("model_name", "dateint") df2: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> import org.apache.spark.sql.SaveMode import org.apache.spark.sql.SaveMode scala> df1.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ | after| 2| +--+---+ scala> df1.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| | after| 2| +--+---+ > DataFrame.write.mode(SaveMode.Append).insertInto(TABLE) overwrites > non-partitioned table > > > Key: SPARK-15595 > URL: https://issues.apache.org/jira/browse/SPARK-15595 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.5.2 >Reporter: Sudarshan Lamkhede > > See the examples below > {noformat} > scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS noparts (model_name > STRING, dateint INT) STORED AS PARQUET""") > res0: org.apache.spark.sql.DataFrame = [result: str
[jira] [Created] (SPARK-15595) DataFrame.write.mode(SaveMode.Append).insertInto(TABLE) overwrites non-partitioned table
Sudarshan Lamkhede created SPARK-15595: -- Summary: DataFrame.write.mode(SaveMode.Append).insertInto(TABLE) overwrites non-partitioned table Key: SPARK-15595 URL: https://issues.apache.org/jira/browse/SPARK-15595 Project: Spark Issue Type: Bug Components: SQL Affects Versions: 1.5.2 Reporter: Sudarshan Lamkhede See the examples below scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS noparts (model_name STRING, dateint INT) STORED AS PARQUET""") res0: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("""CREATE TABLE IF NOT EXISTS parts (model_name STRING) PARTITIONED BY (dateint INT) STORED AS PARQUET""") res1: org.apache.spark.sql.DataFrame = [result: string] scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ +--+---+ scala> import sqlContext.implicits._ import sqlContext.implicits._ scala> val df1 = sc.parallelize(Array(("before", 1)), 1).toDF("model_name", "dateint") df1: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> val df2 = sc.parallelize(Array(("after", 2)), 1).toDF("model_name", "dateint") df2: org.apache.spark.sql.DataFrame = [model_name: string, dateint: int] scala> import org.apache.spark.sql.SaveMode import org.apache.spark.sql.SaveMode scala> df1.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("noparts") scala> sqlContext.sql("select * from noparts").show() +--+---+ |model_name|dateint| +--+---+ | after| 2| +--+---+ scala> df1.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| +--+---+ scala> df2.write.mode(SaveMode.Append).insertInto("parts") scala> sqlContext.sql("select * from parts").show() +--+---+ |model_name|dateint| +--+---+ |before| 1| | after| 2| +--+---+ -- This message was sent by Atlassian JIRA (v6.3.4#6332) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org