[58/61] [abbrv] hive git commit: HIVE-17111: Add TestLocalSparkCliDriver (Sahil Takiar, reviewed by Aihua Xu, Peter Vary, Xuefu Zhang)

sershe Mon, 09 Oct 2017 17:52:13 -0700

HIVE-17111: Add TestLocalSparkCliDriver (Sahil Takiar, reviewed by Aihua Xu, 
Peter Vary, Xuefu Zhang)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c2545574
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c2545574
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c2545574

Branch: refs/heads/hive-14535
Commit: c25455746ae46af61e44591ba3ee4833f0b4b8d0
Parents: 842d4df
Author: Sahil Takiar <takiar.sa...@gmail.com>
Authored: Mon Oct 9 16:05:16 2017 -0700
Committer: Sahil Takiar <stak...@cloudera.com>
Committed: Mon Oct 9 16:05:16 2017 -0700

----------------------------------------------------------------------
 data/conf/spark/local/hive-site.xml             | 258 +++++++++++++++++++
 .../hive/cli/TestLocalSparkCliDriver.java       |  62 +++++
 .../test/resources/testconfiguration.properties |   1 +
 .../hadoop/hive/cli/control/CliConfigs.java     |  22 ++
 pom.xml                                         |   2 +
 .../clientpositive/spark_local_queries.q        |  16 ++
 .../spark/spark_local_queries.q.out             | 131 ++++++++++
 7 files changed, 492 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/data/conf/spark/local/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/spark/local/hive-site.xml 
b/data/conf/spark/local/hive-site.xml
new file mode 100644
index 0000000..8bade0f
--- /dev/null
+++ b/data/conf/spark/local/hive-site.xml
@@ -0,0 +1,258 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<configuration>
+
+<!-- Hive Configuration can either be stored in this file or in the hadoop 
configuration files  -->
+<!-- that are implied by Hadoop setup variables.                               
                 -->
+<!-- Aside from Hadoop setup variables - this file is provided as a 
convenience so that Hive    -->
+<!-- users do not have to edit hadoop configuration files (that may be managed 
as a centralized -->
+<!-- resource).                                                                
                 -->
+
+<!-- Hive Execution Parameters -->
+<property>
+  <name>hadoop.tmp.dir</name>
+  <value>${test.tmp.dir}/hadoop-tmp</value>
+  <description>A base for other temporary directories.</description>
+</property>
+
+<property>
+  <name>hive.exec.scratchdir</name>
+  <value>${test.tmp.dir}/scratchdir</value>
+  <description>Scratch space for Hive jobs</description>
+</property>
+
+<property>
+  <name>hive.exec.local.scratchdir</name>
+  <value>${test.tmp.dir}/localscratchdir/</value>
+  <description>Local scratch space for Hive jobs</description>
+</property>
+
+<property>
+  <name>datanucleus.schema.autoCreateAll</name>
+  <value>true</value>
+</property>
+
+<property>
+  <name>hive.metastore.schema.verification</name>
+  <value>false</value>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionURL</name>
+  
<value>jdbc:derby:;databaseName=${test.tmp.dir}/junit_metastore_db;create=true</value>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionDriverName</name>
+  <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionUserName</name>
+  <value>APP</value>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionPassword</name>
+  <value>mine</value>
+</property>
+
+<property>
+  <!--  this should eventually be deprecated since the metastore should supply 
this -->
+  <name>hive.metastore.warehouse.dir</name>
+  <value>${test.warehouse.dir}</value>
+  <description></description>
+</property>
+
+<property>
+  <name>hive.metastore.metadb.dir</name>
+  <value>file://${test.tmp.dir}/metadb/</value>
+  <description>
+  Required by metastore server or if the uris argument below is not supplied
+  </description>
+</property>
+
+<property>
+  <name>test.log.dir</name>
+  <value>${test.tmp.dir}/log/</value>
+  <description></description>
+</property>
+
+<property>
+  <name>test.data.files</name>
+  <value>${hive.root}/data/files</value>
+  <description></description>
+</property>
+
+<property>
+  <name>hive.jar.path</name>
+  
<value>${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar</value>
+  <description></description>
+</property>
+
+<property>
+  <name>test.data.scripts</name>
+  <value>${hive.root}/data/scripts</value>
+  <description></description>
+</property>
+
+<property>
+  <name>hive.metastore.rawstore.impl</name>
+  <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
+  <description>Name of the class that implements 
org.apache.hadoop.hive.metastore.rawstore interface. This class is used to 
store and retrieval of raw metadata objects such as table, 
database</description>
+</property>
+
+<property>
+  <name>hive.querylog.location</name>
+  <value>${test.tmp.dir}/tmp</value>
+  <description>Location of the structured hive logs</description>
+</property>
+
+<property>
+  <name>hive.exec.pre.hooks</name>
+  <value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, 
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
+  <description>Pre Execute Hook for Tests</description>
+</property>
+
+<property>
+  <name>hive.exec.post.hooks</name>
+  <value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
+  <description>Post Execute Hook for Tests</description>
+</property>
+
+<property>
+  <name>hive.support.concurrency</name>
+  <value>false</value>
+  <description>Whether hive supports concurrency or not. A zookeeper instance 
must be up and running for the default hive lock manager to support read-write 
locks.</description>
+</property>
+
+<property>
+  <name>fs.pfile.impl</name>
+  <value>org.apache.hadoop.fs.ProxyLocalFileSystem</value>
+  <description>A proxy for local file system used for cross file system 
testing</description>
+</property>
+
+<property>
+  <name>hive.exec.mode.local.auto</name>
+  <value>false</value>
+  <description>
+    Let hive determine whether to run in local mode automatically
+    Disabling this for tests so that minimr is not affected
+  </description>
+</property>
+
+<property>
+  <name>hive.auto.convert.join</name>
+  <value>false</value>
+  <description>Whether Hive enable the optimization about converting common 
join into mapjoin based on the input file size</description>
+</property>
+
+<property>
+  <name>hive.ignore.mapjoin.hint</name>
+  <value>false</value>
+  <description>Whether Hive ignores the mapjoin hint</description>
+</property>
+
+<property>
+  <name>io.sort.mb</name>
+  <value>10</value>
+</property>
+
+<property>
+  <name>hive.input.format</name>
+  <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
+  <description>The default input format, if it is not specified, the system 
assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, 
whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always 
overwrite it - if there is a bug in CombineHiveInputFormat, it can always be 
manually set to HiveInputFormat. </description>
+</property>
+
+<property>
+  <name>hive.default.rcfile.serde</name>
+  <value>org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe</value>
+  <description>The default SerDe hive will use for the rcfile 
format</description>
+</property>
+
+<property>
+  <name>hive.stats.dbclass</name>
+  <value>fs</value>
+  <description>The default storatge that stores temporary hive statistics. 
Currently, fs type is supported</description>
+</property>
+
+<property>
+  <name>hive.execution.engine</name>
+  <value>spark</value>
+  <description>Chooses execution engine. Options are: mr (Map reduce, 
default), tez (hadoop 2 only), spark</description>
+</property>
+
+<property>
+  <name>spark.master</name>
+  <value>local[*]</value>
+</property>
+
+<property>
+  <name>hive.prewarm.enabled</name>
+  <value>true</value>
+</property>
+
+<property>
+  <name>hive.prewarm.numcontainers</name>
+  <value>1</value>
+</property>
+
+<property>
+  <name>hive.prewarm.spark.timeout</name>
+  <value>30s</value>
+</property>
+
+<property>
+  <name>spark.serializer</name>
+  <value>org.apache.spark.serializer.KryoSerializer</value>
+</property>
+
+<property>
+  <name>spark.akka.logLifecycleEvents</name>
+  <value>true</value>
+</property>
+
+<property>
+  <name>hive.spark.log.dir</name>
+  <value>${spark.home}/logs/</value>
+</property>
+
+<property>
+  <name>spark.driver.extraClassPath</name>
+  
<value>${maven.local.repository}/org/apache/hive/hive-it-util/${hive.version}/hive-it-util-${hive.version}.jar:${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar:${maven.local.repository}/org/antlr/antlr-runtime/${antlr.version}/antlr-runtime-${antlr.version}.jar</value>
+</property>
+
+<property>
+  <name>hive.aux.jars.path</name>
+  
<value>${maven.local.repository}/org/apache/hive/hive-it-util/${hive.version}/hive-it-util-${hive.version}.jar</value>
+</property>
+
+<property>
+  <name>hive.users.in.admin.role</name>
+  <value>hive_admin_user</value>
+</property>
+
+<property>
+  <name>hive.in.test</name>
+  <value>true</value>
+  <description>Internal marker for test. Used for masking env-dependent 
values</description>
+</property>
+
+</configuration>

http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestLocalSparkCliDriver.java
----------------------------------------------------------------------
diff --git 
a/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestLocalSparkCliDriver.java
 
b/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestLocalSparkCliDriver.java
new file mode 100644
index 0000000..603a492
--- /dev/null
+++ 
b/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestLocalSparkCliDriver.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.cli;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.hadoop.hive.cli.control.CliAdapter;
+import org.apache.hadoop.hive.cli.control.CliConfigs;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(Parameterized.class)
+public class TestLocalSparkCliDriver {
+
+  static CliAdapter adapter = new 
CliConfigs.LocalSparkCliConfig().getCliAdapter();
+
+  @Parameters(name = "{0}")
+  public static List<Object[]> getParameters() throws Exception {
+    return adapter.getParameters();
+  }
+
+  @ClassRule
+  public static TestRule cliClassRule = adapter.buildClassRule();
+
+  @Rule
+  public TestRule cliTestRule = adapter.buildTestRule();
+
+  private String name;
+  private File qfile;
+
+  public TestLocalSparkCliDriver(String name, File qfile) {
+    this.name = name;
+    this.qfile = qfile;
+  }
+
+  @Test
+  public void testCliDriver() throws Exception {
+    adapter.runTest(name, qfile);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 6b23739..264c2b2 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1496,6 +1496,7 @@ miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\
 #  ql_rewrite_gbtoidx_cbo_1.q,\
 #  smb_mapjoin_8.q,\
 
+localSpark.only.query.files=spark_local_queries.q
 
 spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\
   groupby2_multi_distinct.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
----------------------------------------------------------------------
diff --git 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
index 3d8ef0d..c9e1543 100644
--- 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
+++ 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
@@ -494,6 +494,28 @@ public class CliConfigs {
     }
   }
 
+  public static class LocalSparkCliConfig extends AbstractCliConfig {
+    public LocalSparkCliConfig() {
+      super(CoreCliDriver.class);
+      try {
+        setQueryDir("ql/src/test/queries/clientpositive");
+
+        includesFrom(testConfigProps, "localSpark.only.query.files");
+
+        setResultsDir("ql/src/test/results/clientpositive/spark");
+        
setLogDir("itests/qtest-spark/target/qfile-results/clientpositive/spark");
+
+        setInitScript("q_test_init.sql");
+        setCleanupScript("q_test_cleanup.sql");
+
+        setHiveConfDir("data/conf/spark/local");
+        setClusterType(MiniClusterType.spark);
+      } catch (Exception e) {
+        throw new RuntimeException("can't construct cliconfig", e);
+      }
+    }
+  }
+
   public static class SparkOnYarnCliConfig extends AbstractCliConfig {
     public SparkOnYarnCliConfig() {
       super(CoreCliDriver.class);

http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 52e5301..62b95a9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1143,6 +1143,8 @@
             
<HIVE_HADOOP_TEST_CLASSPATH>${test.hive.hadoop.classpath}</HIVE_HADOOP_TEST_CLASSPATH>
             
<SPARK_SUBMIT_CLASSPATH>${spark.home}/lib/spark-assembly-${spark.version}-hadoop2.4.0.jar:${test.hive.hadoop.classpath}</SPARK_SUBMIT_CLASSPATH>
             <SPARK_OSX_TEST_OPTS>-Dorg.xerial.snappy.tempdir=/tmp 
-Dorg.xerial.snappy.lib.name=libsnappyjava.jnilib</SPARK_OSX_TEST_OPTS>
+            <SPARK_SCALA_VERSION>2.11</SPARK_SCALA_VERSION>
+            <SPARK_HOME>${spark.home}</SPARK_HOME>
             <PATH>${env.PATH}${test.extra.path}</PATH>
           </environmentVariables>
           <systemPropertyVariables>

http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/ql/src/test/queries/clientpositive/spark_local_queries.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/spark_local_queries.q 
b/ql/src/test/queries/clientpositive/spark_local_queries.q
new file mode 100644
index 0000000..265d922
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/spark_local_queries.q
@@ -0,0 +1,16 @@
+-- Some basic tests to test HoS works with spark.master = local
+
+-- Test that a basic explain plan can be generated
+explain select * from src order by key limit 10;
+
+-- Test order by
+select * from src order by key limit 10;
+
+-- Test join
+select * from src join src1 on src.key = src1.key limit 10;
+
+-- Test filer on partitioned table
+select * from srcpart where ds = "2008-04-08" limit 10;
+
+-- Test group by
+select key, count(*) from src group by key limit 10;

http://git-wip-us.apache.org/repos/asf/hive/blob/c2545574/ql/src/test/results/clientpositive/spark/spark_local_queries.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_local_queries.q.out 
b/ql/src/test/results/clientpositive/spark/spark_local_queries.q.out
new file mode 100644
index 0000000..770369c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/spark/spark_local_queries.q.out
@@ -0,0 +1,131 @@
+PREHOOK: query: explain select * from src order by key limit 10
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from src order by key limit 10
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 2 <- Map 1 (SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: key (type: string), value (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                      TopN Hash Memory Usage: 0.1
+                      value expressions: _col1 (type: string)
+        Reducer 2 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 
(type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 10
+                  Statistics: Num rows: 10 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 10 Data size: 100 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 10
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from src order by key limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src order by key limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      val_0
+0      val_0
+0      val_0
+10     val_10
+100    val_100
+100    val_100
+103    val_103
+103    val_103
+104    val_104
+104    val_104
+PREHOOK: query: select * from src join src1 on src.key = src1.key limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from src join src1 on src.key = src1.key limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+213    val_213 213     val_213
+213    val_213 213     val_213
+150    val_150 150     val_150
+238    val_238 238     val_238
+238    val_238 238     val_238
+146    val_146 146     val_146
+146    val_146 146     val_146
+255    val_255 255     val_255
+255    val_255 255     val_255
+401    val_401 401     val_401
+PREHOOK: query: select * from srcpart where ds = "2008-04-08" limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select * from srcpart where ds = "2008-04-08" limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+#### A masked pattern was here ####
+238    val_238 2008-04-08      11
+86     val_86  2008-04-08      11
+311    val_311 2008-04-08      11
+27     val_27  2008-04-08      11
+165    val_165 2008-04-08      11
+409    val_409 2008-04-08      11
+255    val_255 2008-04-08      11
+278    val_278 2008-04-08      11
+98     val_98  2008-04-08      11
+484    val_484 2008-04-08      11
+PREHOOK: query: select key, count(*) from src group by key limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, count(*) from src group by key limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+114    1
+111    1
+113    2
+103    2
+10     1
+100    2
+104    2
+11     1
+105    1
+0      3

[58/61] [abbrv] hive git commit: HIVE-17111: Add TestLocalSparkCliDriver (Sahil Takiar, reviewed by Aihua Xu, Peter Vary, Xuefu Zhang)

Reply via email to