HIVE-17111: Add TestLocalSparkCliDriver (Sahil Takiar, reviewed by Aihua Xu, Peter Vary, Xuefu Zhang)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c2545574 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c2545574 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c2545574 Branch: refs/heads/hive-14535 Commit: c25455746ae46af61e44591ba3ee4833f0b4b8d0 Parents: 842d4df Author: Sahil Takiar <takiar.sa...@gmail.com> Authored: Mon Oct 9 16:05:16 2017 -0700 Committer: Sahil Takiar <stak...@cloudera.com> Committed: Mon Oct 9 16:05:16 2017 -0700 ---------------------------------------------------------------------- data/conf/spark/local/hive-site.xml | 258 +++++++++++++++++++ .../hive/cli/TestLocalSparkCliDriver.java | 62 +++++ .../test/resources/testconfiguration.properties | 1 + .../hadoop/hive/cli/control/CliConfigs.java | 22 ++ pom.xml | 2 + .../clientpositive/spark_local_queries.q | 16 ++ .../spark/spark_local_queries.q.out | 131 ++++++++++ 7 files changed, 492 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/data/conf/spark/local/hive-site.xml ---------------------------------------------------------------------- diff --git a/data/conf/spark/local/hive-site.xml b/data/conf/spark/local/hive-site.xml new file mode 100644 index 0000000..8bade0f --- /dev/null +++ b/data/conf/spark/local/hive-site.xml @@ -0,0 +1,258 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<configuration> + +<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files --> +<!-- that are implied by Hadoop setup variables. --> +<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive --> +<!-- users do not have to edit hadoop configuration files (that may be managed as a centralized --> +<!-- resource). --> + +<!-- Hive Execution Parameters --> +<property> + <name>hadoop.tmp.dir</name> + <value>${test.tmp.dir}/hadoop-tmp</value> + <description>A base for other temporary directories.</description> +</property> + +<property> + <name>hive.exec.scratchdir</name> + <value>${test.tmp.dir}/scratchdir</value> + <description>Scratch space for Hive jobs</description> +</property> + +<property> + <name>hive.exec.local.scratchdir</name> + <value>${test.tmp.dir}/localscratchdir/</value> + <description>Local scratch space for Hive jobs</description> +</property> + +<property> + <name>datanucleus.schema.autoCreateAll</name> + <value>true</value> +</property> + +<property> + <name>hive.metastore.schema.verification</name> + <value>false</value> +</property> + +<property> + <name>javax.jdo.option.ConnectionURL</name> + <value>jdbc:derby:;databaseName=${test.tmp.dir}/junit_metastore_db;create=true</value> +</property> + +<property> + <name>javax.jdo.option.ConnectionDriverName</name> + <value>org.apache.derby.jdbc.EmbeddedDriver</value> +</property> + +<property> + <name>javax.jdo.option.ConnectionUserName</name> + <value>APP</value> +</property> + +<property> + <name>javax.jdo.option.ConnectionPassword</name> + <value>mine</value> +</property> + +<property> + <!-- this should eventually be deprecated since the metastore should supply this --> + <name>hive.metastore.warehouse.dir</name> + <value>${test.warehouse.dir}</value> + <description></description> +</property> + +<property> + <name>hive.metastore.metadb.dir</name> + <value>file://${test.tmp.dir}/metadb/</value> + <description> + Required by metastore server or if the uris argument below is not supplied + </description> +</property> + +<property> + <name>test.log.dir</name> + <value>${test.tmp.dir}/log/</value> + <description></description> +</property> + +<property> + <name>test.data.files</name> + <value>${hive.root}/data/files</value> + <description></description> +</property> + +<property> + <name>hive.jar.path</name> + <value>${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar</value> + <description></description> +</property> + +<property> + <name>test.data.scripts</name> + <value>${hive.root}/data/scripts</value> + <description></description> +</property> + +<property> + <name>hive.metastore.rawstore.impl</name> + <value>org.apache.hadoop.hive.metastore.ObjectStore</value> + <description>Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. This class is used to store and retrieval of raw metadata objects such as table, database</description> +</property> + +<property> + <name>hive.querylog.location</name> + <value>${test.tmp.dir}/tmp</value> + <description>Location of the structured hive logs</description> +</property> + +<property> + <name>hive.exec.pre.hooks</name> + <value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value> + <description>Pre Execute Hook for Tests</description> +</property> + +<property> + <name>hive.exec.post.hooks</name> + <value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value> + <description>Post Execute Hook for Tests</description> +</property> + +<property> + <name>hive.support.concurrency</name> + <value>false</value> + <description>Whether hive supports concurrency or not. A zookeeper instance must be up and running for the default hive lock manager to support read-write locks.</description> +</property> + +<property> + <name>fs.pfile.impl</name> + <value>org.apache.hadoop.fs.ProxyLocalFileSystem</value> + <description>A proxy for local file system used for cross file system testing</description> +</property> + +<property> + <name>hive.exec.mode.local.auto</name> + <value>false</value> + <description> + Let hive determine whether to run in local mode automatically + Disabling this for tests so that minimr is not affected + </description> +</property> + +<property> + <name>hive.auto.convert.join</name> + <value>false</value> + <description>Whether Hive enable the optimization about converting common join into mapjoin based on the input file size</description> +</property> + +<property> + <name>hive.ignore.mapjoin.hint</name> + <value>false</value> + <description>Whether Hive ignores the mapjoin hint</description> +</property> + +<property> + <name>io.sort.mb</name> + <value>10</value> +</property> + +<property> + <name>hive.input.format</name> + <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value> + <description>The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat. </description> +</property> + +<property> + <name>hive.default.rcfile.serde</name> + <value>org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe</value> + <description>The default SerDe hive will use for the rcfile format</description> +</property> + +<property> + <name>hive.stats.dbclass</name> + <value>fs</value> + <description>The default storatge that stores temporary hive statistics. Currently, fs type is supported</description> +</property> + +<property> + <name>hive.execution.engine</name> + <value>spark</value> + <description>Chooses execution engine. Options are: mr (Map reduce, default), tez (hadoop 2 only), spark</description> +</property> + +<property> + <name>spark.master</name> + <value>local[*]</value> +</property> + +<property> + <name>hive.prewarm.enabled</name> + <value>true</value> +</property> + +<property> + <name>hive.prewarm.numcontainers</name> + <value>1</value> +</property> + +<property> + <name>hive.prewarm.spark.timeout</name> + <value>30s</value> +</property> + +<property> + <name>spark.serializer</name> + <value>org.apache.spark.serializer.KryoSerializer</value> +</property> + +<property> + <name>spark.akka.logLifecycleEvents</name> + <value>true</value> +</property> + +<property> + <name>hive.spark.log.dir</name> + <value>${spark.home}/logs/</value> +</property> + +<property> + <name>spark.driver.extraClassPath</name> + <value>${maven.local.repository}/org/apache/hive/hive-it-util/${hive.version}/hive-it-util-${hive.version}.jar:${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar:${maven.local.repository}/org/antlr/antlr-runtime/${antlr.version}/antlr-runtime-${antlr.version}.jar</value> +</property> + +<property> + <name>hive.aux.jars.path</name> + <value>${maven.local.repository}/org/apache/hive/hive-it-util/${hive.version}/hive-it-util-${hive.version}.jar</value> +</property> + +<property> + <name>hive.users.in.admin.role</name> + <value>hive_admin_user</value> +</property> + +<property> + <name>hive.in.test</name> + <value>true</value> + <description>Internal marker for test. Used for masking env-dependent values</description> +</property> + +</configuration> http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestLocalSparkCliDriver.java ---------------------------------------------------------------------- diff --git a/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestLocalSparkCliDriver.java b/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestLocalSparkCliDriver.java new file mode 100644 index 0000000..603a492 --- /dev/null +++ b/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestLocalSparkCliDriver.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.cli; + +import java.io.File; +import java.util.List; + +import org.apache.hadoop.hive.cli.control.CliAdapter; +import org.apache.hadoop.hive.cli.control.CliConfigs; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestRule; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestLocalSparkCliDriver { + + static CliAdapter adapter = new CliConfigs.LocalSparkCliConfig().getCliAdapter(); + + @Parameters(name = "{0}") + public static List<Object[]> getParameters() throws Exception { + return adapter.getParameters(); + } + + @ClassRule + public static TestRule cliClassRule = adapter.buildClassRule(); + + @Rule + public TestRule cliTestRule = adapter.buildTestRule(); + + private String name; + private File qfile; + + public TestLocalSparkCliDriver(String name, File qfile) { + this.name = name; + this.qfile = qfile; + } + + @Test + public void testCliDriver() throws Exception { + adapter.runTest(name, qfile); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 6b23739..264c2b2 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1496,6 +1496,7 @@ miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\ # ql_rewrite_gbtoidx_cbo_1.q,\ # smb_mapjoin_8.q,\ +localSpark.only.query.files=spark_local_queries.q spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\ groupby2_multi_distinct.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java ---------------------------------------------------------------------- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index 3d8ef0d..c9e1543 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -494,6 +494,28 @@ public class CliConfigs { } } + public static class LocalSparkCliConfig extends AbstractCliConfig { + public LocalSparkCliConfig() { + super(CoreCliDriver.class); + try { + setQueryDir("ql/src/test/queries/clientpositive"); + + includesFrom(testConfigProps, "localSpark.only.query.files"); + + setResultsDir("ql/src/test/results/clientpositive/spark"); + setLogDir("itests/qtest-spark/target/qfile-results/clientpositive/spark"); + + setInitScript("q_test_init.sql"); + setCleanupScript("q_test_cleanup.sql"); + + setHiveConfDir("data/conf/spark/local"); + setClusterType(MiniClusterType.spark); + } catch (Exception e) { + throw new RuntimeException("can't construct cliconfig", e); + } + } + } + public static class SparkOnYarnCliConfig extends AbstractCliConfig { public SparkOnYarnCliConfig() { super(CoreCliDriver.class); http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 52e5301..62b95a9 100644 --- a/pom.xml +++ b/pom.xml @@ -1143,6 +1143,8 @@ <HIVE_HADOOP_TEST_CLASSPATH>${test.hive.hadoop.classpath}</HIVE_HADOOP_TEST_CLASSPATH> <SPARK_SUBMIT_CLASSPATH>${spark.home}/lib/spark-assembly-${spark.version}-hadoop2.4.0.jar:${test.hive.hadoop.classpath}</SPARK_SUBMIT_CLASSPATH> <SPARK_OSX_TEST_OPTS>-Dorg.xerial.snappy.tempdir=/tmp -Dorg.xerial.snappy.lib.name=libsnappyjava.jnilib</SPARK_OSX_TEST_OPTS> + <SPARK_SCALA_VERSION>2.11</SPARK_SCALA_VERSION> + <SPARK_HOME>${spark.home}</SPARK_HOME> <PATH>${env.PATH}${test.extra.path}</PATH> </environmentVariables> <systemPropertyVariables> http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/ql/src/test/queries/clientpositive/spark_local_queries.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/spark_local_queries.q b/ql/src/test/queries/clientpositive/spark_local_queries.q new file mode 100644 index 0000000..265d922 --- /dev/null +++ b/ql/src/test/queries/clientpositive/spark_local_queries.q @@ -0,0 +1,16 @@ +-- Some basic tests to test HoS works with spark.master = local + +-- Test that a basic explain plan can be generated +explain select * from src order by key limit 10; + +-- Test order by +select * from src order by key limit 10; + +-- Test join +select * from src join src1 on src.key = src1.key limit 10; + +-- Test filer on partitioned table +select * from srcpart where ds = "2008-04-08" limit 10; + +-- Test group by +select key, count(*) from src group by key limit 10; http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/ql/src/test/results/clientpositive/spark/spark_local_queries.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/spark_local_queries.q.out b/ql/src/test/results/clientpositive/spark/spark_local_queries.q.out new file mode 100644 index 0000000..770369c --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/spark_local_queries.q.out @@ -0,0 +1,131 @@ +PREHOOK: query: explain select * from src order by key limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src order by key limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select * from src order by key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src order by key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +PREHOOK: query: select * from src join src1 on src.key = src1.key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src join src1 on src.key = src1.key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +213 val_213 213 val_213 +213 val_213 213 val_213 +150 val_150 150 val_150 +238 val_238 238 val_238 +238 val_238 238 val_238 +146 val_146 146 val_146 +146 val_146 146 val_146 +255 val_255 255 val_255 +255 val_255 255 val_255 +401 val_401 401 val_401 +PREHOOK: query: select * from srcpart where ds = "2008-04-08" limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where ds = "2008-04-08" limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +238 val_238 2008-04-08 11 +86 val_86 2008-04-08 11 +311 val_311 2008-04-08 11 +27 val_27 2008-04-08 11 +165 val_165 2008-04-08 11 +409 val_409 2008-04-08 11 +255 val_255 2008-04-08 11 +278 val_278 2008-04-08 11 +98 val_98 2008-04-08 11 +484 val_484 2008-04-08 11 +PREHOOK: query: select key, count(*) from src group by key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) from src group by key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +114 1 +111 1 +113 2 +103 2 +10 1 +100 2 +104 2 +11 1 +105 1 +0 3