[ 
https://issues.apache.org/jira/browse/SPARK-26727?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Srinivas Yarra updated SPARK-26727:
-----------------------------------
    Description: 
We experienced that sometimes the Hive query "CREATE OR REPLACE VIEW <view 
name> AS SELECT <columns> FROM <table>" fails with the following exception:
{code:java}
// code placeholder
{code}
org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException: Table or 
view '<view name>' already exists in database 'default'; at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:314)
 at 
org.apache.spark.sql.execution.command.CreateViewCommand.run(views.scala:165) 
at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
 at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
 at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
 at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:195) at 
org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:195) at 
org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3365) at 
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
 at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
 at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
 at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3364) at 
org.apache.spark.sql.Dataset.<init>(Dataset.scala:195) at 
org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:80) at 
org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) ... 49 elided
{code:java}
scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") res1: org.apache.spark.sql.DataFrame = []

scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") res2: org.apache.spark.sql.DataFrame = [] 

scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") res3: org.apache.spark.sql.DataFrame = [] 

scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") res4: org.apache.spark.sql.DataFrame = [] 

scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") res5: org.apache.spark.sql.DataFrame = [] 

scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") res6: org.apache.spark.sql.DataFrame = [] 

scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") res7: org.apache.spark.sql.DataFrame = []

scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") res8: org.apache.spark.sql.DataFrame = [] 

scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") res9: org.apache.spark.sql.DataFrame = [] 

scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") res10: org.apache.spark.sql.DataFrame = [] 

scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") res11: org.apache.spark.sql.DataFrame = [] 

scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException: 
Table or view 'testsparkreplace' already exists in database 'default'; at 
org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createTable$1.apply$mcV$sp(HiveExternalCatalog.scala:246)
 at 
org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createTable$1.apply(HiveExternalCatalog.scala:236)
 at 
org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createTable$1.apply(HiveExternalCatalog.scala:236)
 at 
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
 at 
org.apache.spark.sql.hive.HiveExternalCatalog.createTable(HiveExternalCatalog.scala:236)
 at 
org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.createTable(ExternalCatalogWithListener.scala:94)
 at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:319)
 at 
org.apache.spark.sql.execution.command.CreateViewCommand.run(views.scala:165) 
at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
 at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
 at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
 at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:195) at 
org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:195) at 
org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3365) at 
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
 at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
 at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
 at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3364) at 
org.apache.spark.sql.Dataset.<init>(Dataset.scala:195) at 
org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:80) at
org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) ... 49 elided 
scala> spark.catalog.tableExists("testSparkReplace") res13: Boolean = false 

scala>
{code}
 

As we can see, it does not happen for the first 11 executions and then the 
exception is thrown  and then the table does not exist, so it is dropped, but 
not created again

  was:
We experienced that sometimes the Hive query "CREATE OR REPLACE VIEW <view 
name> AS SELECT <columns> FROM <table>" fails with the following exception:
{code:java}
// code placeholder
{code}
org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException: Table or 
view '<view name>' already exists in database 'default'; at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:314)
 at 
org.apache.spark.sql.execution.command.CreateViewCommand.run(views.scala:165) 
at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
 at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
 at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
 at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:195) at 
org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:195) at 
org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3365) at 
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
 at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
 at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
 at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3364) at 
org.apache.spark.sql.Dataset.<init>(Dataset.scala:195) at 
org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:80) at 
org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) ... 49 elided

 

scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy FROM 
ae_dual") res1: org.apache.spark.sql.DataFrame = [] scala> spark.sql("CREATE OR 
REPLACE VIEW testSparkReplace as SELECT dummy FROM ae_dual") res2: 
org.apache.spark.sql.DataFrame = [] scala> spark.sql("CREATE OR REPLACE VIEW 
testSparkReplace as SELECT dummy FROM ae_dual") res3: 
org.apache.spark.sql.DataFrame = [] scala> spark.sql("CREATE OR REPLACE VIEW 
testSparkReplace as SELECT dummy FROM ae_dual") res4: 
org.apache.spark.sql.DataFrame = [] scala> spark.sql("CREATE OR REPLACE VIEW 
testSparkReplace as SELECT dummy FROM ae_dual") res5: 
org.apache.spark.sql.DataFrame = [] scala> spark.sql("CREATE OR REPLACE VIEW 
testSparkReplace as SELECT dummy FROM ae_dual") res6: 
org.apache.spark.sql.DataFrame = [] scala> spark.sql("CREATE OR REPLACE VIEW 
testSparkReplace as SELECT dummy FROM ae_dual") res7: 
org.apache.spark.sql.DataFrame = [] scala> spark.sql("CREATE OR REPLACE VIEW 
testSparkReplace as SELECT dummy FROM ae_dual") res8: 
org.apache.spark.sql.DataFrame = [] scala> spark.sql("CREATE OR REPLACE VIEW 
testSparkReplace as SELECT dummy FROM ae_dual") res9: 
org.apache.spark.sql.DataFrame = [] scala> spark.sql("CREATE OR REPLACE VIEW 
testSparkReplace as SELECT dummy FROM ae_dual") res10: 
org.apache.spark.sql.DataFrame = [] scala> spark.sql("CREATE OR REPLACE VIEW 
testSparkReplace as SELECT dummy FROM ae_dual") res11: 
org.apache.spark.sql.DataFrame = [] scala> spark.sql("CREATE OR REPLACE VIEW 
testSparkReplace as SELECT dummy FROM ae_dual") 
org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException: Table or 
view 'testsparkreplace' already exists in database 'default'; at 
org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createTable$1.apply$mcV$sp(HiveExternalCatalog.scala:246)
 at 
org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createTable$1.apply(HiveExternalCatalog.scala:236)
 at 
org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createTable$1.apply(HiveExternalCatalog.scala:236)
 at 
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
 at 
org.apache.spark.sql.hive.HiveExternalCatalog.createTable(HiveExternalCatalog.scala:236)
 at 
org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.createTable(ExternalCatalogWithListener.scala:94)
 at 
org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:319)
 at 
org.apache.spark.sql.execution.command.CreateViewCommand.run(views.scala:165) 
at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
 at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
 at 
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
 at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:195) at 
org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:195) at 
org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3365) at 
org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
 at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
 at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
 at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3364) at 
org.apache.spark.sql.Dataset.<init>(Dataset.scala:195) at 
org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:80) at 
org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) ... 49 elided 
scala> spark.catalog.tableExists("testSparkReplace") res13: Boolean = false 
scala>


> CREATE OR REPLACE VIEW query fails with TableAlreadyExistsException
> -------------------------------------------------------------------
>
>                 Key: SPARK-26727
>                 URL: https://issues.apache.org/jira/browse/SPARK-26727
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Core
>    Affects Versions: 2.4.0
>            Reporter: Srinivas Yarra
>            Priority: Major
>
> We experienced that sometimes the Hive query "CREATE OR REPLACE VIEW <view 
> name> AS SELECT <columns> FROM <table>" fails with the following exception:
> {code:java}
> // code placeholder
> {code}
> org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException: Table or 
> view '<view name>' already exists in database 'default'; at 
> org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:314)
>  at 
> org.apache.spark.sql.execution.command.CreateViewCommand.run(views.scala:165) 
> at 
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
>  at 
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
>  at 
> org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
>  at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:195) at 
> org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:195) at 
> org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3365) at 
> org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
>  at 
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
>  at 
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
>  at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3364) at 
> org.apache.spark.sql.Dataset.<init>(Dataset.scala:195) at 
> org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:80) at 
> org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) ... 49 elided
> {code:java}
> scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy 
> FROM ae_dual") res1: org.apache.spark.sql.DataFrame = []
> scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy 
> FROM ae_dual") res2: org.apache.spark.sql.DataFrame = [] 
> scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy 
> FROM ae_dual") res3: org.apache.spark.sql.DataFrame = [] 
> scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy 
> FROM ae_dual") res4: org.apache.spark.sql.DataFrame = [] 
> scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy 
> FROM ae_dual") res5: org.apache.spark.sql.DataFrame = [] 
> scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy 
> FROM ae_dual") res6: org.apache.spark.sql.DataFrame = [] 
> scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy 
> FROM ae_dual") res7: org.apache.spark.sql.DataFrame = []
> scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy 
> FROM ae_dual") res8: org.apache.spark.sql.DataFrame = [] 
> scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy 
> FROM ae_dual") res9: org.apache.spark.sql.DataFrame = [] 
> scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy 
> FROM ae_dual") res10: org.apache.spark.sql.DataFrame = [] 
> scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy 
> FROM ae_dual") res11: org.apache.spark.sql.DataFrame = [] 
> scala> spark.sql("CREATE OR REPLACE VIEW testSparkReplace as SELECT dummy 
> FROM ae_dual") 
> org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException: Table or 
> view 'testsparkreplace' already exists in database 'default'; at 
> org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createTable$1.apply$mcV$sp(HiveExternalCatalog.scala:246)
>  at 
> org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createTable$1.apply(HiveExternalCatalog.scala:236)
>  at 
> org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createTable$1.apply(HiveExternalCatalog.scala:236)
>  at 
> org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
>  at 
> org.apache.spark.sql.hive.HiveExternalCatalog.createTable(HiveExternalCatalog.scala:236)
>  at 
> org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.createTable(ExternalCatalogWithListener.scala:94)
>  at 
> org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:319)
>  at 
> org.apache.spark.sql.execution.command.CreateViewCommand.run(views.scala:165) 
> at 
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
>  at 
> org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
>  at 
> org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
>  at org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:195) at 
> org.apache.spark.sql.Dataset$$anonfun$6.apply(Dataset.scala:195) at 
> org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3365) at 
> org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
>  at 
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
>  at 
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
>  at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3364) at 
> org.apache.spark.sql.Dataset.<init>(Dataset.scala:195) at 
> org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:80) at
> org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) ... 49 elided 
> scala> spark.catalog.tableExists("testSparkReplace") res13: Boolean = false 
> scala>
> {code}
>  
> As we can see, it does not happen for the first 11 executions and then the 
> exception is thrown  and then the table does not exist, so it is dropped, but 
> not created again



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to