Repository: hive Updated Branches: refs/heads/master e091bc271 -> 09b6f9a36
http://git-wip-us.apache.org/repos/asf/hive/blob/09b6f9a3/ql/src/test/results/clientpositive/perf/query98.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query98.q.out b/ql/src/test/results/clientpositive/perf/query98.q.out new file mode 100644 index 0000000..ccd5874 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/query98.q.out @@ -0,0 +1,135 @@ +PREHOOK: query: explain select i_item_desc ,i_category ,i_class ,i_current_price ,i_item_id ,sum(ss_ext_sales_price) as itemrevenue ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over (partition by i_class) as revenueratio from store_sales ,item ,date_dim where store_sales.ss_item_sk = item.i_item_sk and i_category in ('Jewelry', 'Sports', 'Books') and store_sales.ss_sold_date_sk = date_dim.d_date_sk and d_date between cast('2001-01-12' as date) and (cast('2001-02-11' as date)) group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio +PREHOOK: type: QUERY +POSTHOOK: query: explain select i_item_desc ,i_category ,i_class ,i_current_price ,i_item_id ,sum(ss_ext_sales_price) as itemrevenue ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over (partition by i_class) as revenueratio from store_sales ,item ,date_dim where store_sales.ss_item_sk = item.i_item_sk and i_category in ('Jewelry', 'Sports', 'Books') and store_sales.ss_sold_date_sk = date_dim.d_date_sk and d_date between cast('2001-01-12' as date) and (cast('2001-02-11' as date)) group by i_item_id ,i_item_desc ,i_category ,i_class ,i_current_price order by i_category ,i_class ,i_item_id ,i_item_desc ,revenueratio +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 6 + File Output Operator [FS_31] + compressed:false + Statistics:Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} + Select Operator [SEL_30] + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + | Statistics:Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_29] + key expressions:_col1 (type: string), _col2 (type: string), _col4 (type: string), _col0 (type: string), _col6 (type: decimal(38,23)) + sort order:+++++ + Statistics:Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + value expressions:_col3 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + Select Operator [SEL_27] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Statistics:Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + PTF Operator [PTF_26] + Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"name:":"windowingtablefunction","order by:":"_col3","partition by:":"_col3"}] + Statistics:Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_25] + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | Statistics:Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_24] + key expressions:_col3 (type: string) + Map-reduce partition columns:_col3 (type: string) + sort order:+ + Statistics:Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)) + Select Operator [SEL_23] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_22] + | aggregations:["sum(VALUE._col0)"] + | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: decimal(7,2)), KEY._col3 (type: string), KEY._col4 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | Statistics:Num rows: 139755 Data size: 200727046 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_21] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2)), _col3 (type: string), _col4 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: decimal(7,2)), _col3 (type: string), _col4 (type: string) + sort order:+++++ + Statistics:Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + value expressions:_col5 (type: decimal(17,2)) + Group By Operator [GBY_20] + aggregations:["sum(_col2)"] + keys:_col4 (type: string), _col5 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_19] + outputColumnNames:["_col4","_col5","_col6","_col7","_col8","_col2"] + Statistics:Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_41] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"} + | outputColumnNames:["_col2","_col4","_col5","_col6","_col7","_col8"] + | Statistics:Num rows: 279510 Data size: 401454092 Basic stats: COMPLETE Column stats: NONE + |<-Map 8 [SIMPLE_EDGE] + | Reduce Output Operator [RS_17] + | key expressions:_col0 (type: int) + | Map-reduce partition columns:_col0 (type: int) + | sort order:+ + | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_8] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_39] + | predicate:(d_date BETWEEN 2001-01-12 AND 2001-02-11 and d_date_sk is not null) (type: boolean) + | Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_6] + | alias:date_dim + | Statistics:Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_15] + key expressions:_col0 (type: int) + Map-reduce partition columns:_col0 (type: int) + sort order:+ + Statistics:Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + value expressions:_col2 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(7,2)), _col7 (type: string), _col8 (type: string) + Merge Join Operator [MERGEJOIN_40] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"0":"_col1 (type: int)","1":"_col0 (type: int)"} + | outputColumnNames:["_col0","_col2","_col4","_col5","_col6","_col7","_col8"] + | Statistics:Num rows: 254100 Data size: 364958258 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_10] + | key expressions:_col1 (type: int) + | Map-reduce partition columns:_col1 (type: int) + | sort order:+ + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | value expressions:_col0 (type: int), _col2 (type: decimal(7,2)) + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | Filter Operator [FIL_37] + | predicate:(ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean) + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | TableScan [TS_0] + | alias:store_sales + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + |<-Map 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] + key expressions:_col0 (type: int) + Map-reduce partition columns:_col0 (type: int) + sort order:+ + Statistics:Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string) + Select Operator [SEL_5] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_38] + predicate:((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean) + Statistics:Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_3] + alias:item + Statistics:Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + http://git-wip-us.apache.org/repos/asf/hive/blob/09b6f9a3/ql/src/test/templates/TestPerfCliDriver.vm ---------------------------------------------------------------------- diff --git a/ql/src/test/templates/TestPerfCliDriver.vm b/ql/src/test/templates/TestPerfCliDriver.vm new file mode 100644 index 0000000..fb17e96 --- /dev/null +++ b/ql/src/test/templates/TestPerfCliDriver.vm @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.cli; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +import java.io.*; +import java.util.*; +import java.nio.*; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; + +import org.apache.hadoop.hive.ql.QTestUtil; +import org.apache.hadoop.hive.ql.QTestUtil.MiniClusterType; +import org.apache.hadoop.hive.ql.session.SessionState; + +/** + This is the TestPerformance Cli Driver for integrating performance regression tests + as part of the Hive Unit tests. + Currently this includes support for : + 1. Running explain plans for TPCDS workload (non-partitioned dataset) on 30TB scaleset. + TODO : + 1. Support for partitioned data set + 2. Use HBase Metastore instead of Derby + +This suite differs from TestCliDriver w.r.t the fact that we modify the underlying metastore +database to reflect the dataset before running the queries. +*/ +public class $className extends TestCase { + + private static final String HIVE_ROOT = QTestUtil.ensurePathEndsInSlash(System.getProperty("hive.root")); + private static QTestUtil qt; + + public static class TestPerfCliDriverAddTestFromQFiles implements QTestUtil.SuiteAddTestFunctor { + public void addTestToSuite(TestSuite suite, Object setup, String tName) { + suite.addTest(new $className("testPerfCliDriver_"+tName)); + } + } + + static { + MiniClusterType miniMR = MiniClusterType.valueForString("$clusterMode"); + String hiveConfDir = "$hiveConfDir"; + String initScript = "$initScript"; + String cleanupScript = "$cleanupScript"; + try { + String hadoopVer = "$hadoopVersion"; + if (!hiveConfDir.isEmpty()) { + hiveConfDir = HIVE_ROOT + hiveConfDir; + } + qt = new QTestUtil((HIVE_ROOT + "$resultsDir"), (HIVE_ROOT + "$logDir"), miniMR, + hiveConfDir, hadoopVer, initScript, cleanupScript, false, false); + + // do a one time initialization + qt.cleanUp(); + qt.createSources(); + // Manually modify the underlying metastore db to reflect statistics corresponding to + // the 30TB TPCDS scale set. This way the optimizer will generate plans for a 30 TB set. + QTestUtil.setupMetaStoreTableColumnStatsFor30TBTPCDSWorkload(qt.getConf()); + } catch (Exception e) { + System.err.println("Exception: " + e.getMessage()); + e.printStackTrace(); + System.err.flush(); + fail("Unexpected exception in static initialization: "+e.getMessage()); + } + } + + public $className(String name) { + super(name); + } + + /** + * Dummy last test. This is only meant to shutdown qt + */ + public void testPerfCliDriver_shutdown() { + System.err.println ("Cleaning up " + "$className"); + } + + @Override + protected void tearDown() { + try { + qt.clearPostTestEffects(); + if (getName().equals("testPerfCliDriver_shutdown")) + qt.shutdown(); + } + catch (Exception e) { + System.err.println("Exception: " + e.getMessage()); + e.printStackTrace(); + System.err.flush(); + fail("Unexpected exception in tearDown"); + } + } + + public static Test suite() { + Set<String> qFilesToExecute = new HashSet<String>(); + String qFiles = System.getProperty("qfile", "").trim(); + if(!qFiles.isEmpty()) { + for(String qFile : qFiles.split(",")) { + qFile = qFile.trim(); + if(!qFile.isEmpty()) { + qFilesToExecute.add(qFile); + } + } + } + + TestSuite suite = new TestSuite(); + + QTestUtil.addTestsToSuiteFromQfileNames("$qFileNamesFile", qFilesToExecute, + suite, null, new TestPerfCliDriverAddTestFromQFiles()); + suite.addTest(new $className("testPerfCliDriver_shutdown")); + return suite; + } + + static String debugHint = "\nSee ./ql/target/tmp/log/hive.log or ./itests/qtest/target/tmp/log/hive.log, " + + "or check ./ql/target/surefire-reports or ./itests/qtest/target/surefire-reports/ for specific test cases logs."; + +#foreach ($qf in $qfiles) + #set ($fname = $qf.getName()) + #set ($eidx = $fname.indexOf('.')) + #set ($tname = $fname.substring(0, $eidx)) + #set ($fpath = $qfilesMap.get($fname)) + public void testPerfCliDriver_$tname() throws Exception { + runTest("$tname", "$fname", (HIVE_ROOT + "$fpath")); + } + +#end + + private void runTest(String tname, String fname, String fpath) throws Exception { + long startTime = System.currentTimeMillis(); + try { + System.err.println("Begin query: " + fname); + + qt.addFile(fpath); + + if (qt.shouldBeSkipped(fname)) { + return; + } + + qt.cliInit(fname, false); + + int ecode = qt.executeClient(fname); + if (ecode != 0) { + qt.failed(ecode, fname, debugHint); + } + ecode = qt.checkCliDriverResults(fname); + if (ecode != 0) { + qt.failedDiff(ecode, fname, debugHint); + } + } + catch (Throwable e) { + qt.failed(e, fname, debugHint); + } + + long elapsedTime = System.currentTimeMillis() - startTime; + System.err.println("Done query: " + fname + " elapsedTime=" + elapsedTime/1000 + "s"); + assertTrue("Test passed", true); + } +}