HIVE-17543: Enable PerfCliDriver for HoS (Sahil Takiar, reviewed by Peter Vary)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bd371246
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bd371246
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bd371246

Branch: refs/heads/master
Commit: bd371246e2cb15f076574925b02bb31a43ac94e7
Parents: 365c031
Author: Sahil Takiar <takiar.sa...@gmail.com>
Authored: Thu Sep 28 10:01:10 2017 -0700
Committer: Sahil Takiar <stak...@cloudera.com>
Committed: Thu Sep 28 11:11:42 2017 -0700

----------------------------------------------------------------------
 data/conf/perf-reg/spark/hive-site.xml          |  268 ++
 .../hadoop/hive/cli/TestSparkPerfCliDriver.java |   62 +
 .../hadoop/hive/cli/TestPerfCliDriver.java      |   62 -
 .../hadoop/hive/cli/TestTezPerfCliDriver.java   |   62 +
 .../test/resources/testconfiguration.properties |    2 +
 .../hadoop/hive/cli/control/CliConfigs.java     |   32 +-
 .../physical/SparkCrossProductCheck.java        |    2 +-
 .../apache/hadoop/hive/ql/plan/SparkWork.java   |    4 +-
 .../clientpositive/perf/spark/query1.q.out      |  340 ++
 .../clientpositive/perf/spark/query10.q.out     |  533 +++
 .../clientpositive/perf/spark/query11.q.out     |  683 ++++
 .../clientpositive/perf/spark/query12.q.out     |  249 ++
 .../clientpositive/perf/spark/query13.q.out     |  339 ++
 .../clientpositive/perf/spark/query14.q.out     | 3237 ++++++++++++++++++
 .../clientpositive/perf/spark/query15.q.out     |  223 ++
 .../clientpositive/perf/spark/query16.q.out     |  363 ++
 .../clientpositive/perf/spark/query17.q.out     |  409 +++
 .../clientpositive/perf/spark/query18.q.out     |  360 ++
 .../clientpositive/perf/spark/query19.q.out     |  306 ++
 .../clientpositive/perf/spark/query2.q.out      |  429 +++
 .../clientpositive/perf/spark/query20.q.out     |  241 ++
 .../clientpositive/perf/spark/query21.q.out     |  245 ++
 .../clientpositive/perf/spark/query22.q.out     |  218 ++
 .../clientpositive/perf/spark/query23.q.out     | 1110 ++++++
 .../clientpositive/perf/spark/query24.q.out     |  610 ++++
 .../clientpositive/perf/spark/query25.q.out     |  415 +++
 .../clientpositive/perf/spark/query26.q.out     |  253 ++
 .../clientpositive/perf/spark/query27.q.out     |  265 ++
 .../clientpositive/perf/spark/query28.q.out     |  517 +++
 .../clientpositive/perf/spark/query29.q.out     |  415 +++
 .../clientpositive/perf/spark/query3.q.out      |  183 +
 .../clientpositive/perf/spark/query30.q.out     |  424 +++
 .../clientpositive/perf/spark/query31.q.out     |  815 +++++
 .../clientpositive/perf/spark/query32.q.out     |  284 ++
 .../clientpositive/perf/spark/query33.q.out     |  683 ++++
 .../clientpositive/perf/spark/query34.q.out     |  273 ++
 .../clientpositive/perf/spark/query35.q.out     |  524 +++
 .../clientpositive/perf/spark/query36.q.out     |  282 ++
 .../clientpositive/perf/spark/query37.q.out     |  192 ++
 .../clientpositive/perf/spark/query38.q.out     |  458 +++
 .../clientpositive/perf/spark/query39.q.out     |  473 +++
 .../clientpositive/perf/spark/query4.q.out      |  986 ++++++
 .../clientpositive/perf/spark/query40.q.out     |  274 ++
 .../clientpositive/perf/spark/query42.q.out     |  191 ++
 .../clientpositive/perf/spark/query43.q.out     |  184 +
 .../clientpositive/perf/spark/query44.q.out     |  486 +++
 .../clientpositive/perf/spark/query45.q.out     |  374 ++
 .../clientpositive/perf/spark/query46.q.out     |  355 ++
 .../clientpositive/perf/spark/query47.q.out     |  771 +++++
 .../clientpositive/perf/spark/query48.q.out     |  329 ++
 .../clientpositive/perf/spark/query49.q.out     |  876 +++++
 .../clientpositive/perf/spark/query5.q.out      |  740 ++++
 .../clientpositive/perf/spark/query50.q.out     |  333 ++
 .../clientpositive/perf/spark/query51.q.out     |  383 +++
 .../clientpositive/perf/spark/query52.q.out     |  188 +
 .../clientpositive/perf/spark/query53.q.out     |  267 ++
 .../clientpositive/perf/spark/query54.q.out     |  742 ++++
 .../clientpositive/perf/spark/query55.q.out     |  168 +
 .../clientpositive/perf/spark/query56.q.out     |  669 ++++
 .../clientpositive/perf/spark/query57.q.out     |  765 +++++
 .../clientpositive/perf/spark/query58.q.out     |  944 +++++
 .../clientpositive/perf/spark/query59.q.out     |  445 +++
 .../clientpositive/perf/spark/query6.q.out      |  459 +++
 .../clientpositive/perf/spark/query60.q.out     |  691 ++++
 .../clientpositive/perf/spark/query61.q.out     |  586 ++++
 .../clientpositive/perf/spark/query63.q.out     |  269 ++
 .../clientpositive/perf/spark/query65.q.out     |  333 ++
 .../clientpositive/perf/spark/query66.q.out     |  873 +++++
 .../clientpositive/perf/spark/query67.q.out     |  315 ++
 .../clientpositive/perf/spark/query68.q.out     |  363 ++
 .../clientpositive/perf/spark/query69.q.out     |  516 +++
 .../clientpositive/perf/spark/query7.q.out      |  253 ++
 .../clientpositive/perf/spark/query70.q.out     |  429 +++
 .../clientpositive/perf/spark/query71.q.out     |  343 ++
 .../clientpositive/perf/spark/query72.q.out     |  489 +++
 .../clientpositive/perf/spark/query73.q.out     |  261 ++
 .../clientpositive/perf/spark/query74.q.out     |  639 ++++
 .../clientpositive/perf/spark/query75.q.out     |  898 +++++
 .../clientpositive/perf/spark/query76.q.out     |  383 +++
 .../clientpositive/perf/spark/query77.q.out     |  912 +++++
 .../clientpositive/perf/spark/query78.q.out     |  523 +++
 .../clientpositive/perf/spark/query79.q.out     |  258 ++
 .../clientpositive/perf/spark/query8.q.out      |  549 +++
 .../clientpositive/perf/spark/query80.q.out     |  903 +++++
 .../clientpositive/perf/spark/query81.q.out     |  429 +++
 .../clientpositive/perf/spark/query82.q.out     |  192 ++
 .../clientpositive/perf/spark/query83.q.out     |  742 ++++
 .../clientpositive/perf/spark/query84.q.out     |  253 ++
 .../clientpositive/perf/spark/query85.q.out     |  493 +++
 .../clientpositive/perf/spark/query86.q.out     |  240 ++
 .../clientpositive/perf/spark/query87.q.out     |  500 +++
 .../clientpositive/perf/spark/query88.q.out     | 1294 +++++++
 .../clientpositive/perf/spark/query89.q.out     |  281 ++
 .../clientpositive/perf/spark/query9.q.out      |  824 +++++
 .../clientpositive/perf/spark/query90.q.out     |  351 ++
 .../clientpositive/perf/spark/query91.q.out     |  347 ++
 .../clientpositive/perf/spark/query92.q.out     |  303 ++
 .../clientpositive/perf/spark/query93.q.out     |  181 +
 .../clientpositive/perf/spark/query94.q.out     |  359 ++
 .../clientpositive/perf/spark/query95.q.out     |  468 +++
 .../clientpositive/perf/spark/query96.q.out     |  193 ++
 .../clientpositive/perf/spark/query97.q.out     |  250 ++
 .../clientpositive/perf/spark/query98.q.out     |  243 ++
 .../clientpositive/perf/spark/query99.q.out     |  270 ++
 104 files changed, 46703 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/data/conf/perf-reg/spark/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/perf-reg/spark/hive-site.xml 
b/data/conf/perf-reg/spark/hive-site.xml
new file mode 100644
index 0000000..497a61f
--- /dev/null
+++ b/data/conf/perf-reg/spark/hive-site.xml
@@ -0,0 +1,268 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<configuration>
+
+<!-- Hive Configuration can either be stored in this file or in the hadoop 
configuration files  -->
+<!-- that are implied by Hadoop setup variables.                               
                 -->
+<!-- Aside from Hadoop setup variables - this file is provided as a 
convenience so that Hive    -->
+<!-- users do not have to edit hadoop configuration files (that may be managed 
as a centralized -->
+<!-- resource).                                                                
                 -->
+
+<!-- Hive Execution Parameters -->
+<property>
+  <name>hadoop.tmp.dir</name>
+  <value>${test.tmp.dir}/hadoop-tmp</value>
+  <description>A base for other temporary directories.</description>
+</property>
+
+<property>
+  <name>hive.exec.scratchdir</name>
+  <value>${test.tmp.dir}/scratchdir</value>
+  <description>Scratch space for Hive jobs</description>
+</property>
+
+<property>
+  <name>datanucleus.autoCreateSchema</name>
+  <value>true</value>
+</property>
+
+<property>
+  <name>datanucleus.fixedDatastore</name>
+  <value>false</value>
+</property>
+
+<property>
+  <name>hive.metastore.schema.verification</name>
+  <value>false</value>
+</property>
+
+<property>
+  <name>hive.exec.local.scratchdir</name>
+  <value>${test.tmp.dir}/localscratchdir/</value>
+  <description>Local scratch space for Hive jobs</description>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionURL</name>
+  
<value>jdbc:derby:;databaseName=${test.tmp.dir}/junit_metastore_db;create=true</value>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionDriverName</name>
+  <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionUserName</name>
+  <value>APP</value>
+</property>
+
+<property>
+  <name>javax.jdo.option.ConnectionPassword</name>
+  <value>mine</value>
+</property>
+
+<property>
+  <!--  this should eventually be deprecated since the metastore should supply 
this -->
+  <name>hive.metastore.warehouse.dir</name>
+  <value>${test.warehouse.dir}</value>
+  <description></description>
+</property>
+
+<property>
+  <name>hive.metastore.metadb.dir</name>
+  <value>file://${test.tmp.dir}/metadb/</value>
+  <description>
+  Required by metastore server or if the uris argument below is not supplied
+  </description>
+</property>
+
+<property>
+  <name>test.log.dir</name>
+  <value>${test.tmp.dir}/log/</value>
+  <description></description>
+</property>
+
+<property>
+  <name>test.data.files</name>
+  <value>${hive.root}/data/files</value>
+  <description></description>
+</property>
+
+<property>
+  <name>test.data.scripts</name>
+  <value>${hive.root}/data/scripts</value>
+  <description></description>
+</property>
+
+<property>
+  <name>hive.jar.path</name>
+  
<value>${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar</value>
+  <description></description>
+</property>
+
+<property>
+  <name>hive.metastore.rawstore.impl</name>
+  <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
+  <description>Name of the class that implements 
org.apache.hadoop.hive.metastore.rawstore interface. This class is used to 
store and retrieval of raw metadata objects such as table, 
database</description>
+</property>
+
+<property>
+  <name>hive.querylog.location</name>
+  <value>${test.tmp.dir}/tmp</value>
+  <description>Location of the structured hive logs</description>
+</property>
+
+<property>
+  <name>hive.exec.pre.hooks</name>
+  <value>org.apache.hadoop.hive.ql.hooks.PreExecutePrinter, 
org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables</value>
+  <description>Pre Execute Hook for Tests</description>
+</property>
+
+<property>
+  <name>hive.exec.post.hooks</name>
+  <value>org.apache.hadoop.hive.ql.hooks.PostExecutePrinter</value>
+  <description>Post Execute Hook for Tests</description>
+</property>
+
+<property>
+  <name>hive.support.concurrency</name>
+  <value>false</value>
+  <description>Whether hive supports concurrency or not. A zookeeper instance 
must be up and running for the default hive lock manager to support read-write 
locks.</description>
+</property>
+
+<property>
+  <name>fs.pfile.impl</name>
+  <value>org.apache.hadoop.fs.ProxyLocalFileSystem</value>
+  <description>A proxy for local file system used for cross file system 
testing</description>
+</property>
+
+<property>
+  <name>hive.exec.mode.local.auto</name>
+  <value>false</value>
+  <description>
+    Let hive determine whether to run in local mode automatically
+    Disabling this for tests so that minimr is not affected
+  </description>
+</property>
+
+<property>
+  <name>hive.auto.convert.join</name>
+  <value>true</value>
+  <description>Whether Hive enable the optimization about converting common 
join into mapjoin based on the input file size</description>
+</property>
+
+<property>
+  <name>hive.ignore.mapjoin.hint</name>
+  <value>true</value>
+  <description>Whether Hive ignores the mapjoin hint</description>
+</property>
+
+<property>
+  <name>io.sort.mb</name>
+  <value>10</value>
+</property>
+
+<property>
+  <name>hive.input.format</name>
+  <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
+  <description>The default input format, if it is not specified, the system 
assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, 
whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always 
overwrite it - if there is a bug in CombineHiveInputFormat, it can always be 
manually set to HiveInputFormat. </description>
+</property>
+
+<property>
+  <name>hive.default.rcfile.serde</name>
+  <value>org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe</value>
+  <description>The default SerDe hive will use for the rcfile 
format</description>
+</property>
+
+<property>
+  <name>hive.stats.dbclass</name>
+  <value>fs</value>
+  <description>The default storatge that stores temporary hive statistics. 
Currently, fs type is supported</description>
+</property>
+
+<property>
+  <name>hive.execution.engine</name>
+  <value>spark</value>
+  <description>Whether to use MR or Spark</description>
+</property>
+
+<property>
+  <name>hive.prewarm.enabled</name>
+  <value>true</value>
+</property>
+
+<property>
+  <name>hive.prewarm.numcontainers</name>
+  <value>1</value>
+</property>
+
+<property>
+  <name>spark.master</name>
+  <value>local-cluster[1,2,1024]</value>
+</property>
+
+<property>
+  <name>hive.prewarm.spark.timeout</name>
+  <value>30s</value>
+</property>
+
+<property>
+  <name>spark.serializer</name>
+  <value>org.apache.spark.serializer.KryoSerializer</value>
+</property>
+
+<property>
+  <name>spark.akka.logLifecycleEvents</name>
+  <value>true</value>
+</property>
+
+<property>
+  <name>hive.spark.log.dir</name>
+  <value>${spark.home}/logs/</value>
+</property>
+
+<property>
+  <name>spark.driver.extraClassPath</name>
+  
<value>${maven.local.repository}/org/apache/hive/hive-it-util/${hive.version}/hive-it-util-${hive.version}.jar:${maven.local.repository}/org/apache/hive/hive-exec/${hive.version}/hive-exec-${hive.version}.jar:${maven.local.repository}/org/antlr/antlr-runtime/${antlr.version}/antlr-runtime-${antlr.version}.jar</value>
+</property>
+
+<property>
+  <name>hive.aux.jars.path</name>
+  
<value>${maven.local.repository}/org/apache/hive/hive-it-util/${hive.version}/hive-it-util-${hive.version}.jar</value>
+</property>
+
+<property>
+  <name>hive.users.in.admin.role</name>
+  <value>hive_admin_user</value>
+</property>
+
+<property>
+  <name>hive.in.test</name>
+  <value>true</value>
+  <description>Internal marker for test. Used for masking env-dependent 
values</description>
+</property>
+
+<property>
+  <name>hive.metastore.rawstore.impl</name>
+  <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
+</property>
+
+</configuration>

http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestSparkPerfCliDriver.java
----------------------------------------------------------------------
diff --git 
a/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestSparkPerfCliDriver.java
 
b/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestSparkPerfCliDriver.java
new file mode 100644
index 0000000..fe786f5
--- /dev/null
+++ 
b/itests/qtest-spark/src/test/java/org/apache/hadoop/hive/cli/TestSparkPerfCliDriver.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.cli;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.hadoop.hive.cli.control.CliAdapter;
+import org.apache.hadoop.hive.cli.control.CliConfigs;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(Parameterized.class)
+public class TestSparkPerfCliDriver {
+
+  static CliAdapter adapter = new 
CliConfigs.SparkPerfCliConfig().getCliAdapter();
+
+  @Parameters(name = "{0}")
+  public static List<Object[]> getParameters() throws Exception {
+    return adapter.getParameters();
+  }
+
+  @ClassRule
+  public static TestRule cliClassRule = adapter.buildClassRule();
+
+  @Rule
+  public TestRule cliTestRule = adapter.buildTestRule();
+
+  private String name;
+  private File qfile;
+
+  public TestSparkPerfCliDriver(String name, File qfile) {
+    this.name = name;
+    this.qfile = qfile;
+  }
+
+  @Test
+  public void testCliDriver() throws Exception {
+    adapter.runTest(name, qfile);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestPerfCliDriver.java
----------------------------------------------------------------------
diff --git 
a/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestPerfCliDriver.java 
b/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestPerfCliDriver.java
deleted file mode 100644
index 4df4eeb..0000000
--- 
a/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestPerfCliDriver.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.cli;
-
-import java.io.File;
-import java.util.List;
-
-import org.apache.hadoop.hive.cli.control.CliAdapter;
-import org.apache.hadoop.hive.cli.control.CliConfigs;
-import org.junit.ClassRule;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestRule;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-
-@RunWith(Parameterized.class)
-public class TestPerfCliDriver {
-
-  static CliAdapter adapter = new CliConfigs.PerfCliConfig().getCliAdapter();
-
-  @Parameters(name = "{0}")
-  public static List<Object[]> getParameters() throws Exception {
-    return adapter.getParameters();
-  }
-
-  @ClassRule
-  public static TestRule cliClassRule = adapter.buildClassRule();
-
-  @Rule
-  public TestRule cliTestRule = adapter.buildTestRule();
-
-  private String name;
-  private File qfile;
-
-  public TestPerfCliDriver(String name, File qfile) {
-    this.name = name;
-    this.qfile = qfile;
-  }
-
-  @Test
-  public void testCliDriver() throws Exception {
-    adapter.runTest(name, qfile);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfCliDriver.java
----------------------------------------------------------------------
diff --git 
a/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfCliDriver.java
 
b/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfCliDriver.java
new file mode 100644
index 0000000..0c9b2ba
--- /dev/null
+++ 
b/itests/qtest/src/test/java/org/apache/hadoop/hive/cli/TestTezPerfCliDriver.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.cli;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.hadoop.hive.cli.control.CliAdapter;
+import org.apache.hadoop.hive.cli.control.CliConfigs;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(Parameterized.class)
+public class TestTezPerfCliDriver {
+
+  static CliAdapter adapter = new 
CliConfigs.TezPerfCliConfig().getCliAdapter();
+
+  @Parameters(name = "{0}")
+  public static List<Object[]> getParameters() throws Exception {
+    return adapter.getParameters();
+  }
+
+  @ClassRule
+  public static TestRule cliClassRule = adapter.buildClassRule();
+
+  @Rule
+  public TestRule cliTestRule = adapter.buildTestRule();
+
+  private String name;
+  private File qfile;
+
+  public TestTezPerfCliDriver(String name, File qfile) {
+    this.name = name;
+    this.qfile = qfile;
+  }
+
+  @Test
+  public void testCliDriver() throws Exception {
+    adapter.runTest(name, qfile);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 52161ff..2df49a7 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1497,3 +1497,5 @@ 
spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\
   spark_job_max_tasks.q,\
   spark_stage_max_tasks.q
 
+spark.perf.disabled.query.files=query14.q,\
+  query64.q

http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
----------------------------------------------------------------------
diff --git 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
index 569f48f..3d8ef0d 100644
--- 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
+++ 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
@@ -236,8 +236,8 @@ public class CliConfigs {
     }
   }
 
-  public static class PerfCliConfig extends AbstractCliConfig {
-    public PerfCliConfig() {
+  public static class TezPerfCliConfig extends AbstractCliConfig {
+    public TezPerfCliConfig() {
       super(CorePerfCliDriver.class);
       try {
         setQueryDir("ql/src/test/queries/clientpositive/perf");
@@ -246,13 +246,13 @@ public class CliConfigs {
         excludesFrom(testConfigProps, "minitez.query.files");
         excludesFrom(testConfigProps, "encrypted.query.files");
 
-        setResultsDir("ql/src/test/results/clientpositive/perf/");
-        setLogDir("itests/qtest/target/qfile-results/clientpositive/");
+        setResultsDir("ql/src/test/results/clientpositive/perf/tez");
+        setLogDir("itests/qtest/target/qfile-results/clientpositive/tez");
 
         setInitScript("q_perf_test_init.sql");
         setCleanupScript("q_perf_test_cleanup.sql");
 
-        setHiveConfDir("data/conf/perf-reg/");
+        setHiveConfDir("data/conf/perf-reg/tez");
         setClusterType(MiniClusterType.tez);
       } catch (Exception e) {
         throw new RuntimeException("can't construct cliconfig", e);
@@ -260,6 +260,28 @@ public class CliConfigs {
     }
   }
 
+  public static class SparkPerfCliConfig extends AbstractCliConfig {
+    public SparkPerfCliConfig() {
+      super(CorePerfCliDriver.class);
+      try {
+        setQueryDir("ql/src/test/queries/clientpositive/perf");
+
+        excludesFrom(testConfigProps, "spark.perf.disabled.query.files");
+
+        setResultsDir("ql/src/test/results/clientpositive/perf/spark");
+        setLogDir("itests/qtest/target/qfile-results/clientpositive/spark");
+
+        setInitScript("q_perf_test_init.sql");
+        setCleanupScript("q_perf_test_cleanup.sql");
+
+        setHiveConfDir("data/conf/perf-reg/spark");
+        setClusterType(MiniClusterType.spark);
+      } catch (Exception e) {
+        throw new RuntimeException("can't construct cliconfig", e);
+      }
+    }
+  }
+
   public static class CompareCliConfig extends AbstractCliConfig {
     public CompareCliConfig() {
       super(CoreCompareCliDriver.class);

http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java
index 3e8727c..f904451 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkCrossProductCheck.java
@@ -104,7 +104,7 @@ public class SparkCrossProductCheck implements 
PhysicalPlanResolver, Dispatcher
 
   private void checkMapJoin(SparkTask sparkTask) throws SemanticException {
     SparkWork sparkWork = sparkTask.getWork();
-    for (BaseWork baseWork : sparkWork.getAllWorkUnsorted()) {
+    for (BaseWork baseWork : sparkWork.getAllWork()) {
       List<String> warnings =
           new 
CrossProductCheck.MapJoinCheck(sparkTask.toString()).analyze(baseWork);
       for (String w : warnings) {

http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
index 9ca5544..fda7080 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java
@@ -56,9 +56,9 @@ public class SparkWork extends AbstractOperatorDesc {
   private final Set<BaseWork> leaves = new LinkedHashSet<>();
 
   protected final Map<BaseWork, List<BaseWork>> workGraph =
-      new HashMap<BaseWork, List<BaseWork>>();
+      new LinkedHashMap<BaseWork, List<BaseWork>>();
   protected final Map<BaseWork, List<BaseWork>> invertedWorkGraph =
-      new HashMap<BaseWork, List<BaseWork>>();
+      new LinkedHashMap<BaseWork, List<BaseWork>>();
   protected final Map<Pair<BaseWork, BaseWork>, SparkEdgeProperty> 
edgeProperties =
       new HashMap<Pair<BaseWork, BaseWork>, SparkEdgeProperty>();
 

http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/ql/src/test/results/clientpositive/perf/spark/query1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query1.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query1.q.out
new file mode 100644
index 0000000..58a833b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query1.q.out
@@ -0,0 +1,340 @@
+PREHOOK: query: explain
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select  c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: store
+                  Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((s_state = 'NM') and s_store_sk is not null) 
(type: boolean)
+                    Statistics: Num rows: 852 Data size: 1628138 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: s_store_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 852 Data size: 1628138 Basic 
stats: COMPLETE Column stats: NONE
+                      Spark HashTable Sink Operator
+                        keys:
+                          0 _col1 (type: int)
+                          1 _col0 (type: int)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 36), Map 13 
(PARTITION-LEVEL SORT, 36)
+        Reducer 12 <- Reducer 11 (GROUP PARTITION-LEVEL SORT, 39)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 36), Map 7 (PARTITION-LEVEL 
SORT, 36)
+        Reducer 3 <- Reducer 2 (GROUP, 39)
+        Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 559), Reducer 3 
(PARTITION-LEVEL SORT, 559)
+        Reducer 5 <- Reducer 12 (PARTITION-LEVEL SORT, 601), Reducer 4 
(PARTITION-LEVEL SORT, 601)
+        Reducer 6 <- Reducer 5 (SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: store_returns
+                  Statistics: Num rows: 57591150 Data size: 4462194832 Basic 
stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (sr_customer_sk is not null and 
sr_returned_date_sk is not null and sr_store_sk is not null) (type: boolean)
+                    Statistics: Num rows: 57591150 Data size: 4462194832 Basic 
stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: sr_returned_date_sk (type: int), 
sr_customer_sk (type: int), sr_store_sk (type: int), sr_fee (type: decimal(7,2))
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 57591150 Data size: 4462194832 
Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 57591150 Data size: 4462194832 
Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: decimal(7,2))
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: store_returns
+                  Statistics: Num rows: 57591150 Data size: 4462194832 Basic 
stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (sr_returned_date_sk is not null and 
sr_store_sk is not null) (type: boolean)
+                    Statistics: Num rows: 57591150 Data size: 4462194832 Basic 
stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: sr_returned_date_sk (type: int), 
sr_customer_sk (type: int), sr_store_sk (type: int), sr_fee (type: decimal(7,2))
+                      outputColumnNames: _col0, _col1, _col2, _col3
+                      Statistics: Num rows: 57591150 Data size: 4462194832 
Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 57591150 Data size: 4462194832 
Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: decimal(7,2))
+        Map 13 
+            Map Operator Tree:
+                TableScan
+                  alias: date_dim
+                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((d_year = 2000) and d_date_sk is not null) 
(type: boolean)
+                    Statistics: Num rows: 36524 Data size: 40870356 Basic 
stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_date_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 36524 Data size: 40870356 Basic 
stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 36524 Data size: 40870356 Basic 
stats: COMPLETE Column stats: NONE
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: date_dim
+                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((d_year = 2000) and d_date_sk is not null) 
(type: boolean)
+                    Statistics: Num rows: 36524 Data size: 40870356 Basic 
stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_date_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 36524 Data size: 40870356 Basic 
stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 36524 Data size: 40870356 Basic 
stats: COMPLETE Column stats: NONE
+        Map 9 
+            Map Operator Tree:
+                TableScan
+                  alias: customer
+                  Statistics: Num rows: 80000000 Data size: 68801615852 Basic 
stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: c_customer_sk is not null (type: boolean)
+                    Statistics: Num rows: 80000000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: c_customer_sk (type: int), c_customer_id 
(type: string)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 80000000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 80000000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: string)
+        Reducer 11 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col2, _col3
+                Statistics: Num rows: 63350266 Data size: 4908414421 Basic 
stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(_col3)
+                  keys: _col2 (type: int), _col1 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 63350266 Data size: 4908414421 Basic 
stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int), _col1 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 63350266 Data size: 4908414421 Basic 
stats: COMPLETE Column stats: NONE
+                    value expressions: _col2 (type: decimal(17,2))
+        Reducer 12 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 31675133 Data size: 2454207210 Basic 
stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col2 (type: decimal(17,2))
+                  outputColumnNames: _col1, _col2
+                  Statistics: Num rows: 31675133 Data size: 2454207210 Basic 
stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: avg(_col2)
+                    keys: _col1 (type: int)
+                    mode: complete
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 15837566 Data size: 1227103566 Basic 
stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: (_col1 * 1.2) (type: decimal(24,7)), true 
(type: boolean), _col0 (type: int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 15837566 Data size: 1227103566 
Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col2 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: int)
+                        Statistics: Num rows: 15837566 Data size: 1227103566 
Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: decimal(24,7)), _col1 
(type: boolean)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col2, _col3
+                Statistics: Num rows: 63350266 Data size: 4908414421 Basic 
stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: sum(_col3)
+                  keys: _col2 (type: int), _col1 (type: int)
+                  mode: hash
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 63350266 Data size: 4908414421 Basic 
stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int), _col1 (type: int)
+                    sort order: ++
+                    Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
+                    Statistics: Num rows: 63350266 Data size: 4908414421 Basic 
stats: COMPLETE Column stats: NONE
+                    value expressions: _col2 (type: decimal(17,2))
+        Reducer 3 
+            Local Work:
+              Map Reduce Local Work
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 31675133 Data size: 2454207210 Basic 
stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col1 (type: int), _col0 (type: int), _col2 
(type: decimal(17,2))
+                  outputColumnNames: _col0, _col1, _col2
+                  Statistics: Num rows: 31675133 Data size: 2454207210 Basic 
stats: COMPLETE Column stats: NONE
+                  Map Join Operator
+                    condition map:
+                         Inner Join 0 to 1
+                    keys:
+                      0 _col1 (type: int)
+                      1 _col0 (type: int)
+                    outputColumnNames: _col0, _col1, _col2
+                    input vertices:
+                      1 Map 8
+                    Statistics: Num rows: 34842647 Data size: 2699627989 Basic 
stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 34842647 Data size: 2699627989 
Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: int), _col2 (type: 
decimal(17,2))
+        Reducer 4 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col2, _col6
+                Statistics: Num rows: 88000001 Data size: 75681779077 Basic 
stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: int)
+                  Statistics: Num rows: 88000001 Data size: 75681779077 Basic 
stats: COMPLETE Column stats: NONE
+                  value expressions: _col2 (type: decimal(17,2)), _col6 (type: 
string)
+        Reducer 5 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: int)
+                  1 _col2 (type: int)
+                outputColumnNames: _col2, _col6, _col7, _col8
+                Statistics: Num rows: 96800003 Data size: 83249958789 Basic 
stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col2 > CASE WHEN (_col8 is null) THEN (null) 
ELSE (_col7) END) (type: boolean)
+                  Statistics: Num rows: 32266667 Data size: 27749985689 Basic 
stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col6 (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 32266667 Data size: 27749985689 
Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 32266667 Data size: 27749985689 
Basic stats: COMPLETE Column stats: NONE
+                      TopN Hash Memory Usage: 0.1
+        Reducer 6 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 32266667 Data size: 27749985689 Basic 
stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 100
+                  Statistics: Num rows: 100 Data size: 86000 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 100 Data size: 86000 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 100
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/bd371246/ql/src/test/results/clientpositive/perf/spark/query10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query10.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query10.q.out
new file mode 100644
index 0000000..eb3a2f6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/spark/query10.q.out
@@ -0,0 +1,533 @@
+PREHOOK: query: explain
+select  
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  count(*) cnt1,
+  cd_purchase_estimate,
+  count(*) cnt2,
+  cd_credit_rating,
+  count(*) cnt3,
+  cd_dep_count,
+  count(*) cnt4,
+  cd_dep_employed_count,
+  count(*) cnt5,
+  cd_dep_college_count,
+  count(*) cnt6
+ from
+  customer c,customer_address ca,customer_demographics
+ where
+  c.c_current_addr_sk = ca.ca_address_sk and
+  ca_county in ('Walker County','Richland County','Gaines County','Douglas 
County','Dona Ana County') and
+  cd_demo_sk = c.c_current_cdemo_sk and 
+  exists (select *
+          from store_sales,date_dim
+          where c.c_customer_sk = ss_customer_sk and
+                ss_sold_date_sk = d_date_sk and
+                d_year = 2002 and
+                d_moy between 4 and 4+3) and
+   (exists (select *
+            from web_sales,date_dim
+            where c.c_customer_sk = ws_bill_customer_sk and
+                  ws_sold_date_sk = d_date_sk and
+                  d_year = 2002 and
+                  d_moy between 4 ANd 4+3) or 
+    exists (select * 
+            from catalog_sales,date_dim
+            where c.c_customer_sk = cs_ship_customer_sk and
+                  cs_sold_date_sk = d_date_sk and
+                  d_year = 2002 and
+                  d_moy between 4 and 4+3))
+ group by cd_gender,
+          cd_marital_status,
+          cd_education_status,
+          cd_purchase_estimate,
+          cd_credit_rating,
+          cd_dep_count,
+          cd_dep_employed_count,
+          cd_dep_college_count
+ order by cd_gender,
+          cd_marital_status,
+          cd_education_status,
+          cd_purchase_estimate,
+          cd_credit_rating,
+          cd_dep_count,
+          cd_dep_employed_count,
+          cd_dep_college_count
+limit 100
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select  
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  count(*) cnt1,
+  cd_purchase_estimate,
+  count(*) cnt2,
+  cd_credit_rating,
+  count(*) cnt3,
+  cd_dep_count,
+  count(*) cnt4,
+  cd_dep_employed_count,
+  count(*) cnt5,
+  cd_dep_college_count,
+  count(*) cnt6
+ from
+  customer c,customer_address ca,customer_demographics
+ where
+  c.c_current_addr_sk = ca.ca_address_sk and
+  ca_county in ('Walker County','Richland County','Gaines County','Douglas 
County','Dona Ana County') and
+  cd_demo_sk = c.c_current_cdemo_sk and 
+  exists (select *
+          from store_sales,date_dim
+          where c.c_customer_sk = ss_customer_sk and
+                ss_sold_date_sk = d_date_sk and
+                d_year = 2002 and
+                d_moy between 4 and 4+3) and
+   (exists (select *
+            from web_sales,date_dim
+            where c.c_customer_sk = ws_bill_customer_sk and
+                  ws_sold_date_sk = d_date_sk and
+                  d_year = 2002 and
+                  d_moy between 4 ANd 4+3) or 
+    exists (select * 
+            from catalog_sales,date_dim
+            where c.c_customer_sk = cs_ship_customer_sk and
+                  cs_sold_date_sk = d_date_sk and
+                  d_year = 2002 and
+                  d_moy between 4 and 4+3))
+ group by cd_gender,
+          cd_marital_status,
+          cd_education_status,
+          cd_purchase_estimate,
+          cd_credit_rating,
+          cd_dep_count,
+          cd_dep_employed_count,
+          cd_dep_college_count
+ order by cd_gender,
+          cd_marital_status,
+          cd_education_status,
+          cd_purchase_estimate,
+          cd_credit_rating,
+          cd_dep_count,
+          cd_dep_employed_count,
+          cd_dep_college_count
+limit 100
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-3 depends on stages: Stage-2
+  Stage-4 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 12 
+            Map Operator Tree:
+                TableScan
+                  alias: date_dim
+                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((d_year = 2002) and d_date_sk is not null and 
d_moy BETWEEN 4 AND 7) (type: boolean)
+                    Statistics: Num rows: 4058 Data size: 4540902 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_date_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 4058 Data size: 4540902 Basic 
stats: COMPLETE Column stats: NONE
+                      Spark HashTable Sink Operator
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-3
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 15 
+            Map Operator Tree:
+                TableScan
+                  alias: date_dim
+                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((d_year = 2002) and d_date_sk is not null and 
d_moy BETWEEN 4 AND 7) (type: boolean)
+                    Statistics: Num rows: 4058 Data size: 4540902 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_date_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 4058 Data size: 4540902 Basic 
stats: COMPLETE Column stats: NONE
+                      Spark HashTable Sink Operator
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-4
+    Spark
+#### A masked pattern was here ####
+      Vertices:
+        Map 18 
+            Map Operator Tree:
+                TableScan
+                  alias: date_dim
+                  Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((d_year = 2002) and d_date_sk is not null and 
d_moy BETWEEN 4 AND 7) (type: boolean)
+                    Statistics: Num rows: 4058 Data size: 4540902 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: d_date_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 4058 Data size: 4540902 Basic 
stats: COMPLETE Column stats: NONE
+                      Spark HashTable Sink Operator
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+            Local Work:
+              Map Reduce Local Work
+
+  Stage: Stage-1
+    Spark
+      Edges:
+        Reducer 11 <- Map 10 (GROUP, 169)
+        Reducer 14 <- Map 13 (GROUP, 437)
+        Reducer 17 <- Map 16 (GROUP, 336)
+        Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 8 (PARTITION-LEVEL 
SORT, 697)
+        Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 
(PARTITION-LEVEL SORT, 597)
+        Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 953), Reducer 14 
(PARTITION-LEVEL SORT, 953), Reducer 3 (PARTITION-LEVEL SORT, 953)
+        Reducer 5 <- Reducer 17 (PARTITION-LEVEL SORT, 648), Reducer 4 
(PARTITION-LEVEL SORT, 648)
+        Reducer 6 <- Reducer 5 (GROUP, 529)
+        Reducer 7 <- Reducer 6 (SORT, 1)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: c
+                  Statistics: Num rows: 80000000 Data size: 68801615852 Basic 
stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (c_current_addr_sk is not null and 
c_current_cdemo_sk is not null) (type: boolean)
+                    Statistics: Num rows: 80000000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: c_customer_sk (type: int), 
c_current_cdemo_sk (type: int), c_current_addr_sk (type: int)
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 80000000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col2 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: int)
+                        Statistics: Num rows: 80000000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int), _col1 (type: int)
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: web_sales
+                  Statistics: Num rows: 144002668 Data size: 19580198212 Basic 
stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (ws_bill_customer_sk is not null and 
ws_sold_date_sk is not null) (type: boolean)
+                    Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ws_sold_date_sk (type: int), 
ws_bill_customer_sk (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col1
+                        input vertices:
+                          1 Map 12
+                        Statistics: Num rows: 158402938 Data size: 21538218500 
Basic stats: COMPLETE Column stats: NONE
+                        Group By Operator
+                          keys: _col1 (type: int)
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 158402938 Data size: 
21538218500 Basic stats: COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            key expressions: _col0 (type: int)
+                            sort order: +
+                            Map-reduce partition columns: _col0 (type: int)
+                            Statistics: Num rows: 158402938 Data size: 
21538218500 Basic stats: COMPLETE Column stats: NONE
+            Local Work:
+              Map Reduce Local Work
+        Map 13 
+            Map Operator Tree:
+                TableScan
+                  alias: store_sales
+                  Statistics: Num rows: 575995635 Data size: 50814502088 Basic 
stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (ss_customer_sk is not null and ss_sold_date_sk 
is not null) (type: boolean)
+                    Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ss_sold_date_sk (type: int), ss_customer_sk 
(type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col1
+                        input vertices:
+                          1 Map 15
+                        Statistics: Num rows: 633595212 Data size: 55895953508 
Basic stats: COMPLETE Column stats: NONE
+                        Group By Operator
+                          keys: _col1 (type: int)
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 633595212 Data size: 
55895953508 Basic stats: COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            key expressions: _col0 (type: int)
+                            sort order: +
+                            Map-reduce partition columns: _col0 (type: int)
+                            Statistics: Num rows: 633595212 Data size: 
55895953508 Basic stats: COMPLETE Column stats: NONE
+            Local Work:
+              Map Reduce Local Work
+        Map 16 
+            Map Operator Tree:
+                TableScan
+                  alias: catalog_sales
+                  Statistics: Num rows: 287989836 Data size: 38999608952 Basic 
stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (cs_ship_customer_sk is not null and 
cs_sold_date_sk is not null) (type: boolean)
+                    Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cs_sold_date_sk (type: int), 
cs_ship_customer_sk (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col1
+                        input vertices:
+                          1 Map 18
+                        Statistics: Num rows: 316788826 Data size: 42899570777 
Basic stats: COMPLETE Column stats: NONE
+                        Group By Operator
+                          keys: _col1 (type: int)
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 316788826 Data size: 
42899570777 Basic stats: COMPLETE Column stats: NONE
+                          Reduce Output Operator
+                            key expressions: _col0 (type: int)
+                            sort order: +
+                            Map-reduce partition columns: _col0 (type: int)
+                            Statistics: Num rows: 316788826 Data size: 
42899570777 Basic stats: COMPLETE Column stats: NONE
+            Local Work:
+              Map Reduce Local Work
+        Map 8 
+            Map Operator Tree:
+                TableScan
+                  alias: ca
+                  Statistics: Num rows: 40000000 Data size: 40595195284 Basic 
stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: ((ca_county) IN ('Walker County', 'Richland 
County', 'Gaines County', 'Douglas County', 'Dona Ana County') and 
ca_address_sk is not null) (type: boolean)
+                    Statistics: Num rows: 20000000 Data size: 20297597642 
Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ca_address_sk (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 20000000 Data size: 20297597642 
Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 20000000 Data size: 20297597642 
Basic stats: COMPLETE Column stats: NONE
+        Map 9 
+            Map Operator Tree:
+                TableScan
+                  alias: customer_demographics
+                  Statistics: Num rows: 1861800 Data size: 717186159 Basic 
stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: cd_demo_sk is not null (type: boolean)
+                    Statistics: Num rows: 1861800 Data size: 717186159 Basic 
stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: cd_demo_sk (type: int), cd_gender (type: 
string), cd_marital_status (type: string), cd_education_status (type: string), 
cd_purchase_estimate (type: int), cd_credit_rating (type: string), cd_dep_count 
(type: int), cd_dep_employed_count (type: int), cd_dep_college_count (type: int)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+                      Statistics: Num rows: 1861800 Data size: 717186159 Basic 
stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 1861800 Data size: 717186159 
Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: string), _col2 (type: 
string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 
(type: int), _col7 (type: int), _col8 (type: int)
+        Reducer 11 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 79201469 Data size: 10769109250 Basic 
stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), true (type: boolean)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 79201469 Data size: 10769109250 Basic 
stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 79201469 Data size: 10769109250 
Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: boolean)
+        Reducer 14 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 316797606 Data size: 27947976754 Basic 
stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 316797606 Data size: 27947976754 Basic 
stats: COMPLETE Column stats: NONE
+        Reducer 17 
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 158394413 Data size: 21449785388 Basic 
stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), true (type: boolean)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 158394413 Data size: 21449785388 Basic 
stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 158394413 Data size: 21449785388 
Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: boolean)
+        Reducer 2 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col2 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 88000001 Data size: 75681779077 Basic 
stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: int)
+                  Statistics: Num rows: 88000001 Data size: 75681779077 Basic 
stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: int)
+        Reducer 3 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, 
_col11, _col12, _col13
+                Statistics: Num rows: 96800003 Data size: 83249958789 Basic 
stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 96800003 Data size: 83249958789 Basic 
stats: COMPLETE Column stats: NONE
+                  value expressions: _col6 (type: string), _col7 (type: 
string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 
(type: int), _col12 (type: int), _col13 (type: int)
+        Reducer 4 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                     Inner Join 0 to 2
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                  2 _col0 (type: int)
+                outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, 
_col11, _col12, _col13, _col15
+                Statistics: Num rows: 696954748 Data size: 61485550191 Basic 
stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col6 (type: string), _col7 
(type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), 
_col11 (type: int), _col12 (type: int), _col13 (type: int), _col15 (type: 
boolean)
+                  outputColumnNames: _col0, _col6, _col7, _col8, _col9, 
_col10, _col11, _col12, _col13, _col16
+                  Statistics: Num rows: 696954748 Data size: 61485550191 Basic 
stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: int)
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: int)
+                    Statistics: Num rows: 696954748 Data size: 61485550191 
Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col6 (type: string), _col7 (type: 
string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 
(type: int), _col12 (type: int), _col13 (type: int), _col16 (type: boolean)
+        Reducer 5 
+            Reduce Operator Tree:
+              Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col11, 
_col12, _col13, _col16, _col18
+                Statistics: Num rows: 766650239 Data size: 67634106676 Basic 
stats: COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: (_col16 is not null or _col18 is not null) (type: 
boolean)
+                  Statistics: Num rows: 766650239 Data size: 67634106676 Basic 
stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col6 (type: string), _col7 (type: string), 
_col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: 
int), _col12 (type: int), _col13 (type: int)
+                    outputColumnNames: _col6, _col7, _col8, _col9, _col10, 
_col11, _col12, _col13
+                    Statistics: Num rows: 766650239 Data size: 67634106676 
Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: _col6 (type: string), _col7 (type: string), _col8 
(type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), 
_col12 (type: int), _col13 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+                      Statistics: Num rows: 766650239 Data size: 67634106676 
Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 
(type: int), _col6 (type: int), _col7 (type: int)
+                        sort order: ++++++++
+                        Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: 
string), _col5 (type: int), _col6 (type: int), _col7 (type: int)
+                        Statistics: Num rows: 766650239 Data size: 67634106676 
Basic stats: COMPLETE Column stats: NONE
+                        TopN Hash Memory Usage: 0.1
+                        value expressions: _col8 (type: bigint)
+        Reducer 6 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string), 
KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: string), 
KEY._col5 (type: int), KEY._col6 (type: int), KEY._col7 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+                Statistics: Num rows: 383325119 Data size: 33817053293 Basic 
stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: string), _col8 (type: bigint), _col3 (type: int), _col4 (type: 
string), _col5 (type: int), _col6 (type: int), _col7 (type: int)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, 
_col8, _col10, _col12
+                  Statistics: Num rows: 383325119 Data size: 33817053293 Basic 
stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: string), _col4 (type: int), _col6 (type: string), _col8 
(type: int), _col10 (type: int), _col12 (type: int)
+                    sort order: ++++++++
+                    Statistics: Num rows: 383325119 Data size: 33817053293 
Basic stats: COMPLETE Column stats: NONE
+                    TopN Hash Memory Usage: 0.1
+                    value expressions: _col3 (type: bigint)
+        Reducer 7 
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), 
KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), 
VALUE._col0 (type: bigint), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: 
bigint), KEY.reducesinkkey4 (type: string), VALUE._col0 (type: bigint), 
KEY.reducesinkkey5 (type: int), VALUE._col0 (type: bigint), KEY.reducesinkkey6 
(type: int), VALUE._col0 (type: bigint), KEY.reducesinkkey7 (type: int), 
VALUE._col0 (type: bigint)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+                Statistics: Num rows: 383325119 Data size: 33817053293 Basic 
stats: COMPLETE Column stats: NONE
+                Limit
+                  Number of rows: 100
+                  Statistics: Num rows: 100 Data size: 8800 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 100 Data size: 8800 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 100
+      Processor Tree:
+        ListSink
+

Reply via email to