[ 
https://issues.apache.org/jira/browse/SPARK-46617?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Adrian Stratulat updated SPARK-46617:
-------------------------------------
    Description: 
{noformat}
*no* further _formatting_ is done here{noformat}
h2. Description:

*Encountered behavior:* Using "CREATE TABLE IF NOT EXISTS" in a spark session, 
on a table that already exists, while Hive is not enabled, results in the table 
being {*}silently overwritten{*}.

*Expected behavior:* "CREATE TABLE IF NOT EXISTS" on a a table that already 
exists results in the statement being skipped. See test-matrix below.
h2. Test matrix & test scenarios:
{noformat}
+----------------------------+-------------------------------------+
| sql statement              | Behavior when overwriting a table   |
|                            +---------------+---------------------+
|                            | hive-enabled  | hive-disabled       |
+----------------------------+---------------+---------------------+
| create-table               | exception (1) | exception (2)       |
| create-table-if-not-exists | skip (3)      | OVERWRITE *BUG* (4) |
+----------------------------+---------------+---------------------+
{noformat}
h3.  [|#1-create-table-hive](1) Create Table - Hive

 
{code:java}
// works_as_expected_hive.sc
import $ivy.`org.apache.spark::spark-core:3.5.0`
import $ivy.`org.apache.spark::spark-sql:3.5.0`
import $ivy.`org.apache.spark::spark-hive:3.5.0`

import org.apache.spark.sql.SparkSession

@main
def main() : Unit = {

  val spark = SparkSession
              .builder()
              .appName("Spark SQL basic example")
              .master("local[*]")
              .enableHiveSupport()
              .getOrCreate()

  spark.sql("""
    CREATE TABLE hello
    USING csv
    OPTIONS (header=true)
    LOCATION 'store/'
    AS (select 1 as col)
  """)
} {code}
h2.  [|#2-create-table-no-hive](2) Create Table - No Hive
{code:java}
// works_as_expected_nohive.sc
import $ivy.`org.apache.spark::spark-core:3.5.0`
import $ivy.`org.apache.spark::spark-sql:3.5.0`

import org.apache.spark.sql.SparkSession

@main
def main() : Unit = {

  val spark = SparkSession
              .builder()
              .appName("Spark SQL basic example")
              .master("local[*]")
              .getOrCreate()

  spark.sql("""
    CREATE TABLE hello
    USING csv
    OPTIONS (header=true)
    LOCATION 'store/'
    AS (select 1 as col)
  """)
} {code}
h3. (3) Create Table IF NOT EXISTS - Hive
{code:java}
// works_as_expected_if_not_exists_hive.sc
import $ivy.`org.apache.spark::spark-core:3.5.0`
import $ivy.`org.apache.spark::spark-sql:3.5.0`
import $ivy.`org.apache.spark::spark-hive:3.5.0`

import org.apache.spark.sql.SparkSession

@main
def main() : Unit = {

  val spark = SparkSession
              .builder()
              .appName("Spark SQL basic example")
              .master("local[*]")
              .enableHiveSupport()
              .getOrCreate()

  spark.sql("""
    CREATE TABLE IF NOT EXISTS hello
    USING csv
    OPTIONS (header=true)
    LOCATION 'store/'
    AS (select 1 as col)
  """)
}{code}
h3. (4) Create Table IF NOT EXISTS - No Hive

 
{code:java}
//bug1.sc

import $ivy.`org.apache.spark::spark-core:3.5.0`
import $ivy.`org.apache.spark::spark-sql:3.5.0`

import org.apache.spark.sql.SparkSession

@main
def main() : Unit = {

  val spark = SparkSession
              .builder()
              .appName("Spark SQL basic example")
              .master("local[*]")
              .getOrCreate()

  spark.sql("""
    CREATE TABLE IF NOT EXISTS hello
    USING csv
    OPTIONS (header=true)
    LOCATION 'store/'
    AS (select 1 as col)
  """)
}
 {code}
{code:java}
//bug2.sc

import $ivy.`org.apache.spark::spark-core:3.5.0`
import $ivy.`org.apache.spark::spark-sql:3.5.0`

import org.apache.spark.sql.SparkSession

@main
def main() : Unit = {

  val spark = SparkSession
              .builder()
              .appName("Spark SQL basic example")
              .master("local[*]")
              .getOrCreate()

  spark.sql("""
    CREATE TABLE IF NOT EXISTS hello
    USING csv
    OPTIONS (header=true)
    LOCATION 'store/'
    AS (select 2 as col)   -- PAYLOAD BECOMES 2
  """)
} {code}
 

 

 

  was:
h2. Description:

*Encountered behavior:* Using "CREATE TABLE IF NOT EXISTS" in a spark session, 
on a table that already exists, while Hive is not enabled, results in the table 
being {*}silently overwritten{*}.

*Expected behavior:* "CREATE TABLE IF NOT EXISTS" on a a table that already 
exists results in the statement being skipped. See test-matrix below.
h2. Test matrix & test scenarios:{{{{}}{}}}
{noformat}
+----------------------------+-------------------------------------+
| sql statement              | Behavior when overwriting a table   |
|                            +---------------+---------------------+
|                            | hive-enabled  | hive-disabled       |
+----------------------------+---------------+---------------------+
| create-table               | exception (1) | exception (2)       |
| create-table-if-not-exists | skip (3)      | OVERWRITE *BUG* (4) |
+----------------------------+---------------+---------------------+
{noformat}
 


> Create-table-if-not-exists overwrites tables
> --------------------------------------------
>
>                 Key: SPARK-46617
>                 URL: https://issues.apache.org/jira/browse/SPARK-46617
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.5.0
>            Reporter: Adrian Stratulat
>            Priority: Minor
>
> {noformat}
> *no* further _formatting_ is done here{noformat}
> h2. Description:
> *Encountered behavior:* Using "CREATE TABLE IF NOT EXISTS" in a spark 
> session, on a table that already exists, while Hive is not enabled, results 
> in the table being {*}silently overwritten{*}.
> *Expected behavior:* "CREATE TABLE IF NOT EXISTS" on a a table that already 
> exists results in the statement being skipped. See test-matrix below.
> h2. Test matrix & test scenarios:
> {noformat}
> +----------------------------+-------------------------------------+
> | sql statement              | Behavior when overwriting a table   |
> |                            +---------------+---------------------+
> |                            | hive-enabled  | hive-disabled       |
> +----------------------------+---------------+---------------------+
> | create-table               | exception (1) | exception (2)       |
> | create-table-if-not-exists | skip (3)      | OVERWRITE *BUG* (4) |
> +----------------------------+---------------+---------------------+
> {noformat}
> h3.  [|#1-create-table-hive](1) Create Table - Hive
>  
> {code:java}
> // works_as_expected_hive.sc
> import $ivy.`org.apache.spark::spark-core:3.5.0`
> import $ivy.`org.apache.spark::spark-sql:3.5.0`
> import $ivy.`org.apache.spark::spark-hive:3.5.0`
> import org.apache.spark.sql.SparkSession
> @main
> def main() : Unit = {
>   val spark = SparkSession
>               .builder()
>               .appName("Spark SQL basic example")
>               .master("local[*]")
>               .enableHiveSupport()
>               .getOrCreate()
>   spark.sql("""
>     CREATE TABLE hello
>     USING csv
>     OPTIONS (header=true)
>     LOCATION 'store/'
>     AS (select 1 as col)
>   """)
> } {code}
> h2.  [|#2-create-table-no-hive](2) Create Table - No Hive
> {code:java}
> // works_as_expected_nohive.sc
> import $ivy.`org.apache.spark::spark-core:3.5.0`
> import $ivy.`org.apache.spark::spark-sql:3.5.0`
> import org.apache.spark.sql.SparkSession
> @main
> def main() : Unit = {
>   val spark = SparkSession
>               .builder()
>               .appName("Spark SQL basic example")
>               .master("local[*]")
>               .getOrCreate()
>   spark.sql("""
>     CREATE TABLE hello
>     USING csv
>     OPTIONS (header=true)
>     LOCATION 'store/'
>     AS (select 1 as col)
>   """)
> } {code}
> h3. (3) Create Table IF NOT EXISTS - Hive
> {code:java}
> // works_as_expected_if_not_exists_hive.sc
> import $ivy.`org.apache.spark::spark-core:3.5.0`
> import $ivy.`org.apache.spark::spark-sql:3.5.0`
> import $ivy.`org.apache.spark::spark-hive:3.5.0`
> import org.apache.spark.sql.SparkSession
> @main
> def main() : Unit = {
>   val spark = SparkSession
>               .builder()
>               .appName("Spark SQL basic example")
>               .master("local[*]")
>               .enableHiveSupport()
>               .getOrCreate()
>   spark.sql("""
>     CREATE TABLE IF NOT EXISTS hello
>     USING csv
>     OPTIONS (header=true)
>     LOCATION 'store/'
>     AS (select 1 as col)
>   """)
> }{code}
> h3. (4) Create Table IF NOT EXISTS - No Hive
>  
> {code:java}
> //bug1.sc
> import $ivy.`org.apache.spark::spark-core:3.5.0`
> import $ivy.`org.apache.spark::spark-sql:3.5.0`
> import org.apache.spark.sql.SparkSession
> @main
> def main() : Unit = {
>   val spark = SparkSession
>               .builder()
>               .appName("Spark SQL basic example")
>               .master("local[*]")
>               .getOrCreate()
>   spark.sql("""
>     CREATE TABLE IF NOT EXISTS hello
>     USING csv
>     OPTIONS (header=true)
>     LOCATION 'store/'
>     AS (select 1 as col)
>   """)
> }
>  {code}
> {code:java}
> //bug2.sc
> import $ivy.`org.apache.spark::spark-core:3.5.0`
> import $ivy.`org.apache.spark::spark-sql:3.5.0`
> import org.apache.spark.sql.SparkSession
> @main
> def main() : Unit = {
>   val spark = SparkSession
>               .builder()
>               .appName("Spark SQL basic example")
>               .master("local[*]")
>               .getOrCreate()
>   spark.sql("""
>     CREATE TABLE IF NOT EXISTS hello
>     USING csv
>     OPTIONS (header=true)
>     LOCATION 'store/'
>     AS (select 2 as col)   -- PAYLOAD BECOMES 2
>   """)
> } {code}
>  
>  
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to