This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new ab2862e764d [SPARK-42616][SQL] SparkSQLCLIDriver shall only close started hive sessionState ab2862e764d is described below commit ab2862e764df01c4a49ee530b407e4a25df879dd Author: Kent Yao <y...@apache.org> AuthorDate: Wed Mar 1 09:47:45 2023 +0800 [SPARK-42616][SQL] SparkSQLCLIDriver shall only close started hive sessionState ### What changes were proposed in this pull request? the hive sessionState initiated in SparkSQLCLIDriver will be started later in HiveClient during communicating with HMS if necessary. There are some cases that it will not get started: - fail early before reaching HiveClient - HiveClient is not used, e.g., v2 catalog only - ... ### Why are the changes needed? Bugfix, an app will end up with unexpected states, e.g., ```java bin/spark-sql -c spark.sql.catalogImplementation=in-memory -e "select 1" 23/02/28 13:40:22 WARN Utils: Your hostname, hulk.local resolves to a loopback address: 127.0.0.1; using 10.221.102.180 instead (on interface en0) 23/02/28 13:40:22 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 23/02/28 13:40:22 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Spark master: local[*], Application Id: local-1677562824027 1 Time taken: 2.578 seconds, Fetched 1 row(s) 23/02/28 13:40:28 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist 23/02/28 13:40:28 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist 23/02/28 13:40:29 WARN Hive: Failed to register all functions. java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1742) at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:83) at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:133) at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104) at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3607) at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3659) at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3639) at org.apache.hadoop.hive.ql.metadata.Hive.getAllFunctions(Hive.java:3901) at org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:248) at org.apache.hadoop.hive.ql.metadata.Hive.registerAllFunctionsOnce(Hive.java:231) at org.apache.hadoop.hive.ql.metadata.Hive.<init>(Hive.java:395) at org.apache.hadoop.hive.ql.metadata.Hive.create(Hive.java:339) at org.apache.hadoop.hive.ql.metadata.Hive.getInternal(Hive.java:319) at org.apache.hadoop.hive.ql.metadata.Hive.get(Hive.java:288) at org.apache.hadoop.hive.ql.session.SessionState.unCacheDataNucleusClassLoaders(SessionState.java:1596) at org.apache.hadoop.hive.ql.session.SessionState.close(SessionState.java:1586) at org.apache.hadoop.hive.cli.CliSessionState.close(CliSessionState.java:66) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.$anonfun$main$2(SparkSQLCLIDriver.scala:153) at org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:214) at org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$2(ShutdownHookManager.scala:188) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2079) at org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$1(ShutdownHookManager.scala:188) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at scala.util.Try$.apply(Try.scala:213) at org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188) at org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:750) Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:423) at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1740) ... 31 more Caused by: MetaException(message:Version information not found in metastore. ) at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:83) at org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:92) at org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:6902) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:162) at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:70) ... 36 more Caused by: MetaException(message:Version information not found in metastore. ) at org.apache.hadoop.hive.metastore.ObjectStore.checkSchema(ObjectStore.java:7810) at org.apache.hadoop.hive.metastore.ObjectStore.verifySchema(ObjectStore.java:7788) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:101) at com.sun.proxy.$Proxy37.verifySchema(Unknown Source) at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMSForConf(HiveMetaStore.java:595) at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:588) at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:655) at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:431) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invokeInternal(RetryingHMSHandler.java:148) at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107) at org.apache.hadoop.hive.metastore.RetryingHMSHandler.<init>(RetryingHMSHandler.java:79) ... 40 more ``` ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? locally verified Closes #40211 from yaooqinn/SPARK-42616. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Kent Yao <y...@apache.org> (cherry picked from commit 41c26b61e64ba260915d2d271cb49ad6d82d4213) Signed-off-by: Kent Yao <y...@apache.org> --- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 7ed5e7a7611..51b314ad2c1 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -47,6 +47,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.client.HiveClientImpl import org.apache.spark.sql.hive.security.HiveDelegationTokenProvider +import org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.closeHiveSessionStateIfStarted import org.apache.spark.sql.internal.SharedState import org.apache.spark.sql.internal.SQLConf.LEGACY_EMPTY_CURRENT_DB_IN_CLI import org.apache.spark.util.ShutdownHookManager @@ -111,12 +112,12 @@ private[hive] object SparkSQLCLIDriver extends Logging { sessionState.err = new PrintStream(System.err, true, UTF_8.name()) } catch { case e: UnsupportedEncodingException => - sessionState.close() + closeHiveSessionStateIfStarted(sessionState) exit(ERROR_PATH_NOT_FOUND) } if (!oproc.process_stage2(sessionState)) { - sessionState.close() + closeHiveSessionStateIfStarted(sessionState) exit(ERROR_MISUSE_SHELL_BUILTIN) } @@ -151,7 +152,7 @@ private[hive] object SparkSQLCLIDriver extends Logging { // Clean up after we exit ShutdownHookManager.addShutdownHook { () => - sessionState.close() + closeHiveSessionStateIfStarted(sessionState) SparkSQLEnv.stop(exitCode) } @@ -315,7 +316,7 @@ private[hive] object SparkSQLCLIDriver extends Logging { line = reader.readLine(currentPrompt + "> ") } - sessionState.close() + closeHiveSessionStateIfStarted(sessionState) exit(ret) } @@ -331,6 +332,11 @@ private[hive] object SparkSQLCLIDriver extends Logging { ReflectionUtils.invoke(classOf[OptionsProcessor], processor, "printUsage") } + private def closeHiveSessionStateIfStarted(state: SessionState): Unit = { + if (ReflectionUtils.getSuperField(state, "isStarted").asInstanceOf[Boolean]) { + state.close() + } + } } private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { @@ -376,7 +382,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim() if (cmd_lower.equals("quit") || cmd_lower.equals("exit")) { - sessionState.close() + closeHiveSessionStateIfStarted(sessionState) SparkSQLCLIDriver.exit(EXIT_SUCCESS) } if (tokens(0).toLowerCase(Locale.ROOT).equals("source") || --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org