KYLIN-3125 Support SparkSql in Cube building step 'Create Intermediate Flat Hive Table'
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/acc9d0c8 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/acc9d0c8 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/acc9d0c8 Branch: refs/heads/master Commit: acc9d0c834bf48ede6c57e2cc7aee73b76531fa2 Parents: bcda327 Author: Li Yang <liy...@apache.org> Authored: Thu Dec 21 16:39:56 2017 +0800 Committer: æ <cheng.w...@kyligence.io> Committed: Sat Dec 23 22:22:21 2017 +0800 ---------------------------------------------------------------------- build/bin/find-hive-dependency.sh | 3 ++- .../apache/kylin/common/KylinConfigBase.java | 16 ++++++++++++++ .../kylin/common/util/HiveCmdBuilder.java | 19 ++++++++++++---- .../main/resources/kylin-defaults.properties | 9 ++++++++ .../kylin/common/util/HiveCmdBuilderTest.java | 23 +++++++++++++++++++- .../apache/kylin/source/hive/HiveTableMeta.java | 4 ++-- 6 files changed, 66 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/acc9d0c8/build/bin/find-hive-dependency.sh ---------------------------------------------------------------------- diff --git a/build/bin/find-hive-dependency.sh b/build/bin/find-hive-dependency.sh index cb9c8f5..653db83 100755 --- a/build/bin/find-hive-dependency.sh +++ b/build/bin/find-hive-dependency.sh @@ -29,8 +29,9 @@ hive_env= if [ "${client_mode}" == "beeline" ] then + beeline_shell=`$KYLIN_HOME/bin/get-properties.sh kylin.source.hive.beeline-shell` beeline_params=`bash ${KYLIN_HOME}/bin/get-properties.sh kylin.source.hive.beeline-params` - hive_env=`beeline ${hive_conf_properties} ${beeline_params} --outputformat=dsv -e set 2>&1 | grep 'env:CLASSPATH' ` + hive_env=`${beeline_shell} ${hive_conf_properties} ${beeline_params} --outputformat=dsv -e set 2>&1 | grep 'env:CLASSPATH' ` else hive_env=`hive ${hive_conf_properties} -e set 2>&1 | grep 'env:CLASSPATH'` fi http://git-wip-us.apache.org/repos/asf/kylin/blob/acc9d0c8/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 5620ac0..93fbc23 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -691,10 +691,26 @@ abstract public class KylinConfigBase implements Serializable { return getOptional("kylin.source.hive.client", "cli"); } + public String getHiveBeelineShell() { + return getOptional("kylin.source.hive.beeline-shell", "beeline"); + } + public String getHiveBeelineParams() { return getOptional("kylin.source.hive.beeline-params", ""); } + public boolean getEnableSparkSqlForTableOps() { + return Boolean.parseBoolean(getOptional("kylin.source.hive.enable-sparksql-for-table-ops", "false")); + } + + public String getSparkSqlBeelineShell() { + return getOptional("kylin.source.hive.sparksql-beeline-shell", ""); + } + + public String getSparkSqlBeelineParams() { + return getOptional("kylin.source.hive.sparksql-beeline-params", ""); + } + public String getFlatHiveTableClusterByDictColumn() { return getOptional("kylin.source.hive.flat-table-cluster-by-dict-column"); } http://git-wip-us.apache.org/repos/asf/kylin/blob/acc9d0c8/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java b/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java index 77dc579..add53db 100644 --- a/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java +++ b/core-common/src/main/java/org/apache/kylin/common/util/HiveCmdBuilder.java @@ -50,18 +50,28 @@ public class HiveCmdBuilder { CLI, BEELINE } - private HiveClientMode clientMode; private KylinConfig kylinConfig; final private Map<String, String> hiveConfProps = new HashMap<>(); final private ArrayList<String> statements = Lists.newArrayList(); public HiveCmdBuilder() { kylinConfig = KylinConfig.getInstanceFromEnv(); - clientMode = HiveClientMode.valueOf(kylinConfig.getHiveClientMode().toUpperCase()); loadHiveConfiguration(); } public String build() { + HiveClientMode clientMode = HiveClientMode.valueOf(kylinConfig.getHiveClientMode().toUpperCase()); + String beelineShell = kylinConfig.getHiveBeelineShell(); + String beelineParams = kylinConfig.getHiveBeelineParams(); + if (kylinConfig.getEnableSparkSqlForTableOps()) { + clientMode = HiveClientMode.BEELINE; + beelineShell = kylinConfig.getSparkSqlBeelineShell(); + beelineParams = kylinConfig.getSparkSqlBeelineParams(); + if (StringUtils.isBlank(beelineShell)) { + throw new IllegalStateException("Missing config 'kylin.source.hive.sparksql-beeline-shell', please check kylin.properties"); + } + } + StringBuffer buf = new StringBuffer(); switch (clientMode) { @@ -83,8 +93,9 @@ public class HiveCmdBuilder { bw.write(statement); bw.newLine(); } - buf.append("beeline "); - buf.append(kylinConfig.getHiveBeelineParams()); + buf.append(beelineShell); + buf.append(" "); + buf.append(beelineParams); buf.append(parseProps()); buf.append(" -f "); buf.append(tmpHql.getAbsolutePath()); http://git-wip-us.apache.org/repos/asf/kylin/blob/acc9d0c8/core-common/src/main/resources/kylin-defaults.properties ---------------------------------------------------------------------- diff --git a/core-common/src/main/resources/kylin-defaults.properties b/core-common/src/main/resources/kylin-defaults.properties index e19ff2e..d8b2d6a 100644 --- a/core-common/src/main/resources/kylin-defaults.properties +++ b/core-common/src/main/resources/kylin-defaults.properties @@ -61,9 +61,18 @@ kylin.restclient.connection.max-total=200 # Hive client, valid value [cli, beeline] kylin.source.hive.client=cli +# Absolute path to beeline shell, can be set to spark beeline instead of the default hive beeline on PATH +#kylin.source.hive.beeline-shell=beeline + # Parameters for beeline client, only necessary if hive client is beeline #kylin.source.hive.beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000 +# While hive client uses above settings to read hive table metadata, +# table operations can go through a separate SparkSQL command line, given SparkSQL connects to the same Hive metastore. +kylin.source.hive.enable-sparksql-for-table-ops=false +#kylin.source.hive.sparksql-beeline-shell=/path/to/spark-client/bin/beeline +#kylin.source.hive.sparksql-beeline-params=-n root --hiveconf hive.security.authorization.sqlstd.confwhitelist.append='mapreduce.job.*|dfs.*' -u jdbc:hive2://localhost:10000 + kylin.source.hive.keep-flat-table=false # Hive database name for putting the intermediate flat tables http://git-wip-us.apache.org/repos/asf/kylin/blob/acc9d0c8/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java ---------------------------------------------------------------------- diff --git a/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java b/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java index d69d4d2..4262722 100644 --- a/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java +++ b/core-common/src/test/java/org/apache/kylin/common/util/HiveCmdBuilderTest.java @@ -43,7 +43,12 @@ public class HiveCmdBuilderTest { @After public void after() throws Exception { System.clearProperty("kylin.source.hive.client"); + System.clearProperty("kylin.source.hive.beeline-shell"); System.clearProperty("kylin.source.hive.beeline-params"); + + System.clearProperty("kylin.source.hive.enable-sparksql-for-table-ops"); + System.clearProperty("kylin.source.hive.sparksql-beeline-shell"); + System.clearProperty("kylin.source.hive.sparksql-beeline-params"); } @Test @@ -67,6 +72,7 @@ public class HiveCmdBuilderTest { public void testBeeline() throws IOException { String lineSeparator = java.security.AccessController.doPrivileged(new sun.security.action.GetPropertyAction("line.separator")); System.setProperty("kylin.source.hive.client", "beeline"); + System.setProperty("kylin.source.hive.beeline-shell", "/spark-client/bin/beeline"); System.setProperty("kylin.source.hive.beeline-params", "-u jdbc_url"); HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder(); @@ -75,7 +81,7 @@ public class HiveCmdBuilderTest { hiveCmdBuilder.addStatement("SHOW\n TABLES;"); String cmd = hiveCmdBuilder.build(); - assertTrue(cmd.startsWith("beeline -u jdbc_url")); + assertTrue(cmd.startsWith("/spark-client/bin/beeline -u jdbc_url")); String hqlFile = cmd.substring(cmd.lastIndexOf("-f ") + 3).trim(); hqlFile = hqlFile.substring(0, hqlFile.length() - ";exit $ret_code".length()); @@ -85,4 +91,19 @@ public class HiveCmdBuilderTest { FileUtils.forceDelete(new File(hqlFile)); } + + @Test + public void testSparkSqlForTableOps() throws IOException { + System.setProperty("kylin.source.hive.enable-sparksql-for-table-ops", "true"); + System.setProperty("kylin.source.hive.sparksql-beeline-shell", "/spark-client/bin/beeline"); + System.setProperty("kylin.source.hive.sparksql-beeline-params", "-u jdbc_url"); + + HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder(); + hiveCmdBuilder.addStatement("USE default;"); + hiveCmdBuilder.addStatement("DROP TABLE test;"); + hiveCmdBuilder.addStatement("SHOW\n TABLES;"); + + String cmd = hiveCmdBuilder.build(); + assertTrue(cmd.startsWith("/spark-client/bin/beeline -u jdbc_url")); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/acc9d0c8/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableMeta.java ---------------------------------------------------------------------- diff --git a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableMeta.java b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableMeta.java index fa9eb29..9a26c14 100644 --- a/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableMeta.java +++ b/source-hive/src/main/java/org/apache/kylin/source/hive/HiveTableMeta.java @@ -20,8 +20,8 @@ package org.apache.kylin.source.hive; import java.util.List; -class HiveTableMeta { - static class HiveTableColumnMeta { +public class HiveTableMeta { + public static class HiveTableColumnMeta { String name; String dataType; String comment;