This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new ab2862e764d [SPARK-42616][SQL] SparkSQLCLIDriver shall only close 
started hive sessionState
ab2862e764d is described below

commit ab2862e764df01c4a49ee530b407e4a25df879dd
Author: Kent Yao <y...@apache.org>
AuthorDate: Wed Mar 1 09:47:45 2023 +0800

    [SPARK-42616][SQL] SparkSQLCLIDriver shall only close started hive 
sessionState
    
    ### What changes were proposed in this pull request?
    
    the hive sessionState initiated in SparkSQLCLIDriver will be started later 
in HiveClient during communicating with HMS if necessary. There are some cases 
that it will not get started:
    - fail early before reaching HiveClient
    - HiveClient is not used, e.g., v2 catalog only
    - ...
    
    ### Why are the changes needed?
    
    Bugfix, an app will end up with unexpected states, e.g.,
    
    ```java
    bin/spark-sql -c spark.sql.catalogImplementation=in-memory -e "select 1"
    23/02/28 13:40:22 WARN Utils: Your hostname, hulk.local resolves to a 
loopback address: 127.0.0.1; using 10.221.102.180 instead (on interface en0)
    23/02/28 13:40:22 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to 
another address
    Setting default log level to "WARN".
    To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use 
setLogLevel(newLevel).
    23/02/28 13:40:22 WARN NativeCodeLoader: Unable to load native-hadoop 
library for your platform... using builtin-java classes where applicable
    Spark master: local[*], Application Id: local-1677562824027
    1
    Time taken: 2.578 seconds, Fetched 1 row(s)
    23/02/28 13:40:28 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout 
does not exist
    23/02/28 13:40:28 WARN HiveConf: HiveConf of name hive.stats.retries.wait 
does not exist
    23/02/28 13:40:29 WARN Hive: Failed to register all functions.
    java.lang.RuntimeException: Unable to instantiate 
org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
            at 
org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1742)
            at 
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:83)
            at 
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:133)
            at 
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
            at 
org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3607)
            at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3659)
            at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3639)
            at 
org.apache.hadoop.hive.ql.metadata.Hive.getAllFunctions(Hive.java:3901)
            at 
org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:248)
            at 
org.apache.hadoop.hive.ql.metadata.Hive.registerAllFunctionsOnce(Hive.java:231)
            at org.apache.hadoop.hive.ql.metadata.Hive.<init>(Hive.java:395)
            at org.apache.hadoop.hive.ql.metadata.Hive.create(Hive.java:339)
            at 
org.apache.hadoop.hive.ql.metadata.Hive.getInternal(Hive.java:319)
            at org.apache.hadoop.hive.ql.metadata.Hive.get(Hive.java:288)
            at 
org.apache.hadoop.hive.ql.session.SessionState.unCacheDataNucleusClassLoaders(SessionState.java:1596)
            at 
org.apache.hadoop.hive.ql.session.SessionState.close(SessionState.java:1586)
            at 
org.apache.hadoop.hive.cli.CliSessionState.close(CliSessionState.java:66)
            at 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.$anonfun$main$2(SparkSQLCLIDriver.scala:153)
            at 
org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:214)
            at 
org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$2(ShutdownHookManager.scala:188)
            at 
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
            at 
org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2079)
            at 
org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$1(ShutdownHookManager.scala:188)
            at 
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
            at scala.util.Try$.apply(Try.scala:213)
            at 
org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188)
            at 
org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178)
            at 
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
            at java.util.concurrent.FutureTask.run(FutureTask.java:266)
            at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
            at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
            at java.lang.Thread.run(Thread.java:750)
    Caused by: java.lang.reflect.InvocationTargetException
            at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native 
Method)
            at 
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
            at 
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
            at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
            at 
org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1740)
            ... 31 more
    Caused by: MetaException(message:Version information not found in 
metastore. )
            at 
org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:83)
            at 
org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:92)
            at 
org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:6902)
            at 
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:162)
            at 
org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:70)
            ... 36 more
    Caused by: MetaException(message:Version information not found in 
metastore. )
            at 
org.apache.hadoop.hive.metastore.ObjectStore.checkSchema(ObjectStore.java:7810)
            at 
org.apache.hadoop.hive.metastore.ObjectStore.verifySchema(ObjectStore.java:7788)
            at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
            at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
            at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
            at java.lang.reflect.Method.invoke(Method.java:498)
            at 
org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:101)
            at com.sun.proxy.$Proxy37.verifySchema(Unknown Source)
            at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMSForConf(HiveMetaStore.java:595)
            at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:588)
            at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:655)
            at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:431)
            at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
            at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
            at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
            at java.lang.reflect.Method.invoke(Method.java:498)
            at 
org.apache.hadoop.hive.metastore.RetryingHMSHandler.invokeInternal(RetryingHMSHandler.java:148)
            at 
org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107)
            at 
org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:79)
            ... 40 more
    ```
    ### Does this PR introduce _any_ user-facing change?
    
    ### How was this patch tested?
    
    locally verified
    
    Closes #40211 from yaooqinn/SPARK-42616.
    
    Authored-by: Kent Yao <y...@apache.org>
    Signed-off-by: Kent Yao <y...@apache.org>
    (cherry picked from commit 41c26b61e64ba260915d2d271cb49ad6d82d4213)
    Signed-off-by: Kent Yao <y...@apache.org>
---
 .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala  | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 7ed5e7a7611..51b314ad2c1 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -47,6 +47,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.client.HiveClientImpl
 import org.apache.spark.sql.hive.security.HiveDelegationTokenProvider
+import 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.closeHiveSessionStateIfStarted
 import org.apache.spark.sql.internal.SharedState
 import org.apache.spark.sql.internal.SQLConf.LEGACY_EMPTY_CURRENT_DB_IN_CLI
 import org.apache.spark.util.ShutdownHookManager
@@ -111,12 +112,12 @@ private[hive] object SparkSQLCLIDriver extends Logging {
       sessionState.err = new PrintStream(System.err, true, UTF_8.name())
     } catch {
       case e: UnsupportedEncodingException =>
-        sessionState.close()
+        closeHiveSessionStateIfStarted(sessionState)
         exit(ERROR_PATH_NOT_FOUND)
     }
 
     if (!oproc.process_stage2(sessionState)) {
-      sessionState.close()
+      closeHiveSessionStateIfStarted(sessionState)
       exit(ERROR_MISUSE_SHELL_BUILTIN)
     }
 
@@ -151,7 +152,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
 
     // Clean up after we exit
     ShutdownHookManager.addShutdownHook { () =>
-      sessionState.close()
+      closeHiveSessionStateIfStarted(sessionState)
       SparkSQLEnv.stop(exitCode)
     }
 
@@ -315,7 +316,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
       line = reader.readLine(currentPrompt + "> ")
     }
 
-    sessionState.close()
+    closeHiveSessionStateIfStarted(sessionState)
 
     exit(ret)
   }
@@ -331,6 +332,11 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     ReflectionUtils.invoke(classOf[OptionsProcessor], processor, "printUsage")
   }
 
+  private def closeHiveSessionStateIfStarted(state: SessionState): Unit = {
+    if (ReflectionUtils.getSuperField(state, 
"isStarted").asInstanceOf[Boolean]) {
+      state.close()
+    }
+  }
 }
 
 private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
@@ -376,7 +382,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver 
with Logging {
     val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim()
     if (cmd_lower.equals("quit") ||
       cmd_lower.equals("exit")) {
-      sessionState.close()
+      closeHiveSessionStateIfStarted(sessionState)
       SparkSQLCLIDriver.exit(EXIT_SUCCESS)
     }
     if (tokens(0).toLowerCase(Locale.ROOT).equals("source") ||


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to