This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 2911ac2 [KYUUBI #2024] Hive Backend Engine - ProcBuilder for
HiveEngine
2911ac2 is described below
commit 2911ac26c7f80395b02d621cee6caa7fbd11063f
Author: Min Zhao <[email protected]>
AuthorDate: Sat Mar 19 14:56:57 2022 +0800
[KYUUBI #2024] Hive Backend Engine - ProcBuilder for HiveEngine
### _Why are the changes needed?_
Add ProcBuilder for HiveEngine.
### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including
negative and positive cases if possible
- [ ] Add screenshots for manual tests if appropriate
- [ ] [Run
test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests)
locally before make a pull request
Closes #2146 from zhaomin1423/hive_process_builder.
Closes #2024
1baf1397 [Min Zhao] [KYUUBI #2024] Hive Backend Engine - ProcBuilder for
HiveEngine
23fdc3b7 [Min Zhao] [KYUUBI #2024] Hive Backend Engine - ProcBuilder for
HiveEngine
Authored-by: Min Zhao <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
---
bin/load-kyuubi-env.sh | 3 +
docs/deployment/settings.md | 1 +
.../kyuubi-hive-sql-engine/bin/hive-sql-engine.sh | 46 ++++++++++
.../org/apache/kyuubi/config/KyuubiConf.scala | 8 ++
.../org/apache/kyuubi/engine/EngineType.scala | 2 +-
.../scala/org/apache/kyuubi/engine/EngineRef.scala | 5 +-
.../kyuubi/engine/hive/HiveProcessBuilder.scala | 100 +++++++++++++++++++++
.../engine/hive/HiveProcessBuilderSuite.scala | 19 ++--
8 files changed, 175 insertions(+), 9 deletions(-)
diff --git a/bin/load-kyuubi-env.sh b/bin/load-kyuubi-env.sh
index 087eb21..bb1b62a 100755
--- a/bin/load-kyuubi-env.sh
+++ b/bin/load-kyuubi-env.sh
@@ -81,6 +81,7 @@ fi
export FLINK_HOME="${FLINK_HOME:-"${FLINK_BUILTIN}"}"
export FLINK_ENGINE_HOME="${KYUUBI_HOME}/externals/engines/flink"
export TRINO_ENGINE_HOME="${KYUUBI_HOME}/externals/engines/trino"
+export HIVE_ENGINE_HOME="${KYUUBI_HOME}/externals/engines/hive"
export SPARK_HOME="${SPARK_HOME:-"${SPARK_BUILTIN}"}"
# Print essential environment variables to console
@@ -101,6 +102,8 @@ if [ $silent -eq 0 ]; then
echo "TRINO_ENGINE_HOME: ${TRINO_ENGINE_HOME}"
+ echo "HIVE_ENGINE_HOME: ${HIVE_ENGINE_HOME}"
+
echo "HADOOP_CONF_DIR: ${HADOOP_CONF_DIR}"
echo "YARN_CONF_DIR: ${YARN_CONF_DIR}"
diff --git a/docs/deployment/settings.md b/docs/deployment/settings.md
index 51b2fd8..3e1ec78 100644
--- a/docs/deployment/settings.md
+++ b/docs/deployment/settings.md
@@ -324,6 +324,7 @@ Key | Default | Meaning | Type | Since
<code>kyuubi.session.engine.check.interval</code>|<div style='width:
65pt;word-wrap: break-word;white-space: normal'>PT1M</div>|<div style='width:
170pt;word-wrap: break-word;white-space: normal'>The check interval for engine
timeout</div>|<div style='width: 30pt'>duration</div>|<div style='width:
20pt'>1.0.0</div>
<code>kyuubi.session.engine.flink.main.resource</code>|<div style='width:
65pt;word-wrap: break-word;white-space: normal'><undefined></div>|<div
style='width: 170pt;word-wrap: break-word;white-space: normal'>The package used
to create Flink SQL engine remote job. If it is undefined, Kyuubi will use the
default</div>|<div style='width: 30pt'>string</div>|<div style='width:
20pt'>1.4.0</div>
<code>kyuubi.session.engine.flink.max.rows</code>|<div style='width:
65pt;word-wrap: break-word;white-space: normal'>1000000</div>|<div
style='width: 170pt;word-wrap: break-word;white-space: normal'>Max rows of
Flink query results. For batch queries, rows that exceeds the limit would be
ignored. For streaming queries, the query would be canceled if the limit is
reached.</div>|<div style='width: 30pt'>int</div>|<div style='width:
20pt'>1.5.0</div>
+<code>kyuubi.session.engine.hive.main.resource</code>|<div style='width:
65pt;word-wrap: break-word;white-space: normal'><undefined></div>|<div
style='width: 170pt;word-wrap: break-word;white-space: normal'>The package used
to create Hive engine remote job. If it is undefined, Kyuubi will use the
default</div>|<div style='width: 30pt'>string</div>|<div style='width:
20pt'>1.6.0</div>
<code>kyuubi.session.engine.idle.timeout</code>|<div style='width:
65pt;word-wrap: break-word;white-space: normal'>PT30M</div>|<div style='width:
170pt;word-wrap: break-word;white-space: normal'>engine timeout, the engine
will self-terminate when it's not accessed for this duration. 0 or negative
means not to self-terminate.</div>|<div style='width: 30pt'>duration</div>|<div
style='width: 20pt'>1.0.0</div>
<code>kyuubi.session.engine.initialize.timeout</code>|<div style='width:
65pt;word-wrap: break-word;white-space: normal'>PT3M</div>|<div style='width:
170pt;word-wrap: break-word;white-space: normal'>Timeout for starting the
background engine, e.g. SparkSQLEngine.</div>|<div style='width:
30pt'>duration</div>|<div style='width: 20pt'>1.0.0</div>
<code>kyuubi.session.engine.launch.async</code>|<div style='width:
65pt;word-wrap: break-word;white-space: normal'>true</div>|<div style='width:
170pt;word-wrap: break-word;white-space: normal'>When opening kyuubi session,
whether to launch backend engine asynchronously. When true, the Kyuubi server
will set up the connection with the client without delay as the backend engine
will be created asynchronously.</div>|<div style='width:
30pt'>boolean</div>|<div style='width: 20pt'>1.4.0</div>
diff --git a/externals/kyuubi-hive-sql-engine/bin/hive-sql-engine.sh
b/externals/kyuubi-hive-sql-engine/bin/hive-sql-engine.sh
new file mode 100755
index 0000000..b7d80fc
--- /dev/null
+++ b/externals/kyuubi-hive-sql-engine/bin/hive-sql-engine.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+if [[ -z ${JAVA_HOME} ]]; then
+ echo "[ERROR] JAVA_HOME IS NOT SET! CANNOT PROCEED."
+ exit 1
+fi
+
+RUNNER="${JAVA_HOME}/bin/java"
+
+if [[ "$HIVE_ENGINE_HOME" == "$KYUUBI_HOME/externals/engines/hive" ]]; then
+ HIVE_CLIENT_JAR="$HIVE_ENGINE_JAR"
+ HIVE_CLIENT_JARS_DIR="$HIVE_ENGINE_HOME/jars"
+else
+ echo "\nHIVE_ENGINE_HOME $HIVE_ENGINE_HOME doesn't match production
directory, assuming in development environment..."
+ HIVE_CLIENT_JAR=$(find $HIVE_ENGINE_HOME/target -regex
'.*/kyuubi-hive-sql-engine_.*.jar$' | grep -v '\-sources.jar$' | grep -v
'\-javadoc.jar$' | grep -v '\-tests.jar$')
+ HIVE_CLIENT_JARS_DIR=$(find $HIVE_ENGINE_HOME/target -regex '.*/jars')
+fi
+
+HIVE_CLIENT_CLASSPATH="$HIVE_CLIENT_JARS_DIR/*"
+if [[ -z ${YARN_CONF_DIR} ]]; then
+
FULL_CLASSPATH="$HIVE_CLIENT_CLASSPATH:$HIVE_CLIENT_JAR:$HADOOP_CONF_DIR:$HIVE_CONF_DIR"
+else
+
FULL_CLASSPATH="$HIVE_CLIENT_CLASSPATH:$HIVE_CLIENT_JAR:$HADOOP_CONF_DIR:$HIVE_CONF_DIR:$YARN_CONF_DIR"
+fi
+
+if [ -n "$HIVE_CLIENT_JAR" ]; then
+ exec $RUNNER ${HIVE_ENGINE_DYNAMIC_ARGS} -cp ${FULL_CLASSPATH}
org.apache.kyuubi.engine.hive.HiveSQLEngine "$@"
+else
+ (>&2 echo "[ERROR] HIVE Engine JAR file 'kyuubi-hive-sql-engine*.jar' should
be located in $HIVE_ENGINE_HOME/jars.")
+ exit 1
+fi
diff --git
a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
index ebf84bc..7c0a084 100644
--- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
+++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala
@@ -648,6 +648,14 @@ object KyuubiConf {
.stringConf
.createOptional
+ val ENGINE_HIVE_MAIN_RESOURCE: OptionalConfigEntry[String] =
+ buildConf("session.engine.hive.main.resource")
+ .doc("The package used to create Hive engine remote job. If it is
undefined," +
+ " Kyuubi will use the default")
+ .version("1.6.0")
+ .stringConf
+ .createOptional
+
val ENGINE_LOGIN_TIMEOUT: ConfigEntry[Long] =
buildConf("session.engine.login.timeout")
.doc("The timeout of creating the connection to remote sql query engine")
.version("1.0.0")
diff --git
a/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/EngineType.scala
b/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/EngineType.scala
index 849cc14..9fab460 100644
--- a/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/EngineType.scala
+++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/EngineType.scala
@@ -23,5 +23,5 @@ package org.apache.kyuubi.engine
object EngineType extends Enumeration {
type EngineType = Value
- val SPARK_SQL, FLINK_SQL, TRINO = Value
+ val SPARK_SQL, FLINK_SQL, TRINO, HIVE_SQL = Value
}
diff --git
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
index 65133fb..9294257 100644
--- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
+++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala
@@ -33,9 +33,10 @@ import org.apache.kyuubi.{KYUUBI_VERSION,
KyuubiSQLException, Logging, Utils}
import org.apache.kyuubi.config.KyuubiConf
import org.apache.kyuubi.config.KyuubiConf._
import
org.apache.kyuubi.config.KyuubiReservedKeys.KYUUBI_ENGINE_SUBMIT_TIME_KEY
-import org.apache.kyuubi.engine.EngineType.{EngineType, FLINK_SQL, SPARK_SQL,
TRINO}
+import org.apache.kyuubi.engine.EngineType.{EngineType, FLINK_SQL, HIVE_SQL,
SPARK_SQL, TRINO}
import org.apache.kyuubi.engine.ShareLevel.{CONNECTION, GROUP, SERVER,
ShareLevel}
import org.apache.kyuubi.engine.flink.FlinkProcessBuilder
+import org.apache.kyuubi.engine.hive.HiveProcessBuilder
import org.apache.kyuubi.engine.spark.SparkProcessBuilder
import org.apache.kyuubi.engine.trino.TrinoProcessBuilder
import org.apache.kyuubi.ha.HighAvailabilityConf.HA_ZK_ENGINE_REF_ID
@@ -198,6 +199,8 @@ private[kyuubi] class EngineRef(
new FlinkProcessBuilder(appUser, conf, extraEngineLog)
case TRINO =>
new TrinoProcessBuilder(appUser, conf, extraEngineLog)
+ case HIVE_SQL =>
+ new HiveProcessBuilder(appUser, conf, extraEngineLog)
}
MetricsSystem.tracing(_.incCount(ENGINE_TOTAL))
diff --git
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala
new file mode 100644
index 0000000..4dd3085
--- /dev/null
+++
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilder.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kyuubi.engine.hive
+
+import java.net.URI
+import java.nio.file.{Files, Paths}
+
+import org.apache.kyuubi.{KYUUBI_VERSION, KyuubiSQLException, Logging,
SCALA_COMPILE_VERSION}
+import org.apache.kyuubi.config.KyuubiConf
+import org.apache.kyuubi.config.KyuubiConf.ENGINE_HIVE_MAIN_RESOURCE
+import org.apache.kyuubi.engine.ProcBuilder
+import org.apache.kyuubi.engine.hive.HiveProcessBuilder.HIVE_ENGINE_BINARY_FILE
+import org.apache.kyuubi.operation.log.OperationLog
+
+class HiveProcessBuilder(
+ override val proxyUser: String,
+ override val conf: KyuubiConf,
+ val extraEngineLog: Option[OperationLog] = None)
+ extends ProcBuilder with Logging {
+
+ override protected def executable: String = {
+ val hiveHomeOpt = env.get("HIVE_ENGINE_HOME").orElse {
+ val cwd = getClass.getProtectionDomain.getCodeSource.getLocation.getPath
+ .split("kyuubi-server")
+ assert(cwd.length > 1)
+ Option(
+ Paths.get(cwd.head)
+ .resolve("externals")
+ .resolve(module)
+ .toFile)
+ .map(_.getAbsolutePath)
+ }
+
+ hiveHomeOpt.map { dir =>
+ Paths.get(dir, "bin",
HIVE_ENGINE_BINARY_FILE).toAbsolutePath.toFile.getCanonicalPath
+ }.getOrElse {
+ throw KyuubiSQLException("HIVE_ENGINE_HOME is not set! " +
+ "For more detail information on installing and configuring Hive,
please visit " +
+
"https://kyuubi.apache.org/docs/stable/deployment/settings.html#environments")
+ }
+ }
+
+ override protected def mainResource: Option[String] = {
+ val jarName = s"${module}_$SCALA_COMPILE_VERSION-$KYUUBI_VERSION.jar"
+ // 1. get the main resource jar for user specified config first
+ conf.get(ENGINE_HIVE_MAIN_RESOURCE).filter { userSpecified =>
+ // skip check exist if not local file.
+ val uri = new URI(userSpecified)
+ val schema = if (uri.getScheme != null) uri.getScheme else "file"
+ schema match {
+ case "file" => Files.exists(Paths.get(userSpecified))
+ case _ => true
+ }
+ }.orElse {
+ // 2. get the main resource jar from system build default
+ env.get(KyuubiConf.KYUUBI_HOME)
+ .map { Paths.get(_, "externals", "engines", "hive", "jars", jarName) }
+ .filter(Files.exists(_)).map(_.toAbsolutePath.toFile.getCanonicalPath)
+ }.orElse {
+ // 3. get the main resource from dev environment
+ Option(Paths.get("externals", module, "target", jarName))
+ .filter(Files.exists(_)).orElse {
+ Some(Paths.get("..", "externals", module, "target", jarName))
+ }.map(_.toAbsolutePath.toFile.getCanonicalPath)
+ }
+ }
+
+ override protected def childProcEnv: Map[String, String] = conf.getEnvs +
+ ("HIVE_ENGINE_JAR" -> mainResource.get) +
+ ("HIVE_ENGINE_DYNAMIC_ARGS" ->
+ conf.getAll.map { case (k, v) => s"-D$k=$v" }.mkString(" "))
+
+ override protected def module: String = "kyuubi-hive-sql-engine"
+
+ override protected def mainClass: String =
"org.apache.kyuubi.engine.hive.HiveSQLEngine"
+
+ override protected def commands: Array[String] = Array(executable)
+
+}
+
+object HiveProcessBuilder {
+
+ val HIVE_ENGINE_BINARY_FILE = "hive-sql-engine.sh"
+
+}
diff --git
a/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/EngineType.scala
b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilderSuite.scala
similarity index 64%
copy from kyuubi-common/src/main/scala/org/apache/kyuubi/engine/EngineType.scala
copy to
kyuubi-server/src/test/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilderSuite.scala
index 849cc14..42aaeda 100644
--- a/kyuubi-common/src/main/scala/org/apache/kyuubi/engine/EngineType.scala
+++
b/kyuubi-server/src/test/scala/org/apache/kyuubi/engine/hive/HiveProcessBuilderSuite.scala
@@ -15,13 +15,18 @@
* limitations under the License.
*/
-package org.apache.kyuubi.engine
+package org.apache.kyuubi.engine.hive
-/**
- * Defines different engine types supported by Kyuubi.
- */
-object EngineType extends Enumeration {
- type EngineType = Value
+import org.apache.kyuubi.KyuubiFunSuite
+import org.apache.kyuubi.config.KyuubiConf
+
+class HiveProcessBuilderSuite extends KyuubiFunSuite {
+
+ private def conf = KyuubiConf().set("kyuubi.on", "off")
- val SPARK_SQL, FLINK_SQL, TRINO = Value
+ test("hive process builder") {
+ val builder = new HiveProcessBuilder("kyuubi", conf)
+ val commands = builder.toString.split(' ')
+ assert(commands.exists(_.endsWith("hive-sql-engine.sh")))
+ }
}