hive git commit: HIVE-16125 : Split work between reducers. (Slim Bouguerra via Ashutosh Chauhan)
Repository: hive Updated Branches: refs/heads/master e0bf12d98 -> e05e0fa19 HIVE-16125 : Split work between reducers. (Slim Bouguerra via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e05e0fa1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e05e0fa1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e05e0fa1 Branch: refs/heads/master Commit: e05e0fa19d7fd7c48617c4a770fa579b7f01f40e Parents: e0bf12d Author: Slim Bouguerra Authored: Thu Feb 8 20:46:00 2018 -0800 Committer: Ashutosh Chauhan Committed: Tue Feb 20 10:34:11 2018 -0800 -- .../org/apache/hadoop/hive/conf/Constants.java | 3 + .../hadoop/hive/druid/io/DruidOutputFormat.java | 12 +- .../hadoop/hive/druid/io/DruidRecordWriter.java | 72 ++- .../hadoop/hive/druid/serde/DruidSerDe.java | 26 +- .../test/resources/testconfiguration.properties | 3 +- ...tedDynPartitionTimeGranularityOptimizer.java | 237 --- .../druidmini_dynamic_partition.q | 170 + .../druid/druidmini_dynamic_partition.q.out | 625 +++ 8 files changed, 1038 insertions(+), 110 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/e05e0fa1/common/src/java/org/apache/hadoop/hive/conf/Constants.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/Constants.java b/common/src/java/org/apache/hadoop/hive/conf/Constants.java index 51408b1..10aaee1 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/Constants.java +++ b/common/src/java/org/apache/hadoop/hive/conf/Constants.java @@ -33,7 +33,10 @@ public class Constants { public static final String DRUID_DATA_SOURCE = "druid.datasource"; public static final String DRUID_SEGMENT_GRANULARITY = "druid.segment.granularity"; public static final String DRUID_QUERY_GRANULARITY = "druid.query.granularity"; + public static final String DRUID_TARGET_SHARDS_PER_GRANULARITY = + "druid.segment.targetShardsPerGranularity"; public static final String DRUID_TIMESTAMP_GRANULARITY_COL_NAME = "__time_granularity"; + public static final String DRUID_SHARD_KEY_COL_NAME = "__druid_extra_partition_key"; public static final String DRUID_QUERY_JSON = "druid.query.json"; public static final String DRUID_QUERY_TYPE = "druid.query.type"; public static final String DRUID_QUERY_FETCH = "druid.query.fetch"; http://git-wip-us.apache.org/repos/asf/hive/blob/e05e0fa1/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java -- diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java index 8c25d62..b758efd 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java +++ b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidOutputFormat.java @@ -92,6 +92,10 @@ public class DruidOutputFormat implements HiveOutputFormat 0 ? -1 : HiveConf +.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_PARTITION_SIZE); // If datasource is in the table properties, it is an INSERT/INSERT OVERWRITE as the datasource // name was already persisted. Otherwise, it is a CT/CTAS and we need to get the name from the // job properties that are set by configureOutputJobProperties in the DruidStorageHandler @@ -191,8 +195,10 @@ public class DruidOutputFormat implements HiveOutputFormat aggregatorFactories = aggregatorFactoryBuilder.build(); final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec( new TimestampSpec(DruidStorageHandlerUtils.DEFAULT_TIMESTAMP_COLUMN, "auto", null), -new DimensionsSpec(dimensions, - Lists.newArrayList(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME), null +new DimensionsSpec(dimensions, Lists +.newArrayList(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME, +Constants.DRUID_SHARD_KEY_COL_NAME +), null ) )); @@ -209,8 +215,6 @@ public class DruidOutputFormat implements HiveOutputFormathttp://git-wip-us.apache.org/repos/asf/hive/blob/e05e0fa1/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java -- diff --git a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java b/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java index cf4dad6..7d2bb91 100644 --- a/druid-handler/src/java/org/apache/hadoop/hive/druid/io/DruidRecordWriter.java +++ b/druid-handler/src/j
hive git commit: HIVE-18541 : Secure HS2 web UI with PAM (Oleksiy Sayankin via Szehon)
Repository: hive Updated Branches: refs/heads/master e05e0fa19 -> 03a1e6247 HIVE-18541 : Secure HS2 web UI with PAM (Oleksiy Sayankin via Szehon) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/03a1e624 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/03a1e624 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/03a1e624 Branch: refs/heads/master Commit: 03a1e624760f15c57bed04cba9ff6f3a5f1846c4 Parents: e05e0fa Author: Szehon Ho Authored: Tue Feb 20 20:12:59 2018 +0100 Committer: Szehon Ho Committed: Tue Feb 20 20:12:59 2018 +0100 -- common/pom.xml | 20 ++ .../org/apache/hadoop/hive/conf/HiveConf.java | 2 + .../java/org/apache/hive/http/HttpServer.java | 45 - .../hive/http/security/PamAuthenticator.java| 140 ++ .../hive/http/security/PamConstraint.java | 34 .../http/security/PamConstraintMapping.java | 27 +++ .../hive/http/security/PamLoginService.java | 78 .../hive/http/security/PamUserIdentity.java | 38 .../apache/hive/service/server/HiveServer2.java | 25 +++ .../service/server/TestHS2HttpServerPam.java| 182 +++ .../TestHS2HttpServerPamConfiguration.java | 128 + 11 files changed, 718 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/03a1e624/common/pom.xml -- diff --git a/common/pom.xml b/common/pom.xml index aaeecc0..4da46f2 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -79,6 +79,11 @@ org.eclipse.jetty + jetty-http + ${jetty.version} + + + org.eclipse.jetty jetty-rewrite @@ -160,6 +165,21 @@ + net.sf.jpam + jpam + ${jpam.version} + + + org.slf4j + slf4j-log4j12 + + + commmons-logging + commons-logging + + + + org.apache.hadoop hadoop-mapreduce-client-core ${hadoop.version} http://git-wip-us.apache.org/repos/asf/hive/blob/03a1e624/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3d777f9..b51dc7e 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2450,6 +2450,8 @@ public class HiveConf extends Configuration { "the value of hive.server2.webui.host or the correct host name."), HIVE_SERVER2_WEBUI_MAX_HISTORIC_QUERIES("hive.server2.webui.max.historic.queries", 25, "The maximum number of past queries to show in HiverSever2 WebUI."), +HIVE_SERVER2_WEBUI_USE_PAM("hive.server2.webui.use.pam", false, +"If true, the HiveServer2 WebUI will be secured with PAM."), // Tez session settings HIVE_SERVER2_TEZ_INTERACTIVE_QUEUE("hive.server2.tez.interactive.queue", "", http://git-wip-us.apache.org/repos/asf/hive/blob/03a1e624/common/src/java/org/apache/hive/http/HttpServer.java -- diff --git a/common/src/java/org/apache/hive/http/HttpServer.java b/common/src/java/org/apache/hive/http/HttpServer.java index 2a8f7ae..71b2668 100644 --- a/common/src/java/org/apache/hive/http/HttpServer.java +++ b/common/src/java/org/apache/hive/http/HttpServer.java @@ -21,6 +21,7 @@ package org.apache.hive.http; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URL; +import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; @@ -45,6 +46,10 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authentication.server.AuthenticationFilter; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hive.http.security.PamAuthenticator; +import org.apache.hive.http.security.PamConstraint; +import org.apache.hive.http.security.PamConstraintMapping; +import org.apache.hive.http.security.PamLoginService; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.core.Appender; import org.apache.logging.log4j.core.Logger; @@ -54,7 +59,11 @@ import org.apache.logging.log4j.core.appender.FileManager; import org.apache.logging.log4j.core.appender.OutputStreamManager; import org.eclipse.jetty.rewrite.handler.RewriteHandler; import org.eclipse.jetty.rewrite.handler.RewriteRegexRule; +import org.eclipse.jetty.security.Constra
hive git commit: HIVE-18625: SessionState Not Checking For Directory Creation Result (Andrew Sherman, reviewed by Sahil Takiar)
Repository: hive Updated Branches: refs/heads/master 03a1e6247 -> 111ed0964 HIVE-18625: SessionState Not Checking For Directory Creation Result (Andrew Sherman, reviewed by Sahil Takiar) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/111ed096 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/111ed096 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/111ed096 Branch: refs/heads/master Commit: 111ed096496c37674601173cfeaa40cbf948f878 Parents: 03a1e62 Author: Andrew Sherman Authored: Tue Feb 20 11:28:20 2018 -0800 Committer: Sahil Takiar Committed: Tue Feb 20 11:28:34 2018 -0800 -- .../hadoop/hive/ql/session/SessionState.java| 7 ++- .../hive/ql/session/TestSessionState.java | 53 2 files changed, 58 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/111ed096/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index dfc2dfa..0071a9a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -740,7 +740,8 @@ public class SessionState { * @return * @throws IOException */ - private static void createPath(HiveConf conf, Path path, String permission, boolean isLocal, + @VisibleForTesting + static void createPath(HiveConf conf, Path path, String permission, boolean isLocal, boolean isCleanUp) throws IOException { FsPermission fsPermission = new FsPermission(permission); FileSystem fs; @@ -750,7 +751,9 @@ public class SessionState { fs = path.getFileSystem(conf); } if (!fs.exists(path)) { - fs.mkdirs(path, fsPermission); + if (!fs.mkdirs(path, fsPermission)) { +throw new IOException("Failed to create directory " + path + " on fs " + fs.getUri()); + } String dirType = isLocal ? "local" : "HDFS"; LOG.info("Created " + dirType + " directory: " + path.toString()); } http://git-wip-us.apache.org/repos/asf/hive/blob/111ed096/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java b/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java index 8750196..0fa1c81 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/session/TestSessionState.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.session; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; @@ -27,6 +29,10 @@ import java.util.Arrays; import java.util.Collection; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.ParentNotDirectoryException; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.metastore.Warehouse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -257,4 +263,51 @@ public class TestSessionState { } } } + + /** + * Unit test for SessionState.createPath(). + */ + @Test + public void testCreatePath() throws Exception { +HiveConf conf = new HiveConf(); +LocalFileSystem localFileSystem = FileSystem.getLocal(conf); + +Path repeatedCreate = new Path("repeatedCreate"); +SessionState.createPath(conf, repeatedCreate, "700", true, true); +assertTrue(localFileSystem.exists(repeatedCreate)); +// second time will complete silently +SessionState.createPath(conf, repeatedCreate, "700", true, true); + +Path fileNotDirectory = new Path("fileNotDirectory"); +localFileSystem.create(fileNotDirectory); +localFileSystem.deleteOnExit(fileNotDirectory); + +// Show we cannot create a child of a file +try { + SessionState.createPath(conf, new Path(fileNotDirectory, "child"), "700", true, true); + fail("did not get expected exception creating a child of a file"); +} catch (ParentNotDirectoryException e) { + assertTrue(e.getMessage().contains("Parent path is not a directory")); +} + +// Show we cannot create a child of a null directory +try { + //noinspection ConstantConditions + SessionState.createPath(conf, new Path((String) null, "child"), "700", true, true); + fail("did not get expected exception creating a Path fr
hive git commit: HIVE-18742: Vectorization acid/inputformat check should allow NullRowsInputFormat/OneNullRowInputFormat (Jason Dere, reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/master 111ed0964 -> e51f7c9d2 HIVE-18742: Vectorization acid/inputformat check should allow NullRowsInputFormat/OneNullRowInputFormat (Jason Dere, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e51f7c9d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e51f7c9d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e51f7c9d Branch: refs/heads/master Commit: e51f7c9d277c8a1a7a289063b9bcf43ad6de8e99 Parents: 111ed09 Author: Jason Dere Authored: Tue Feb 20 12:49:16 2018 -0800 Committer: Jason Dere Committed: Tue Feb 20 12:49:16 2018 -0800 -- .../hive/ql/optimizer/physical/Vectorizer.java | 12 +- .../test/queries/clientpositive/acid_nullscan.q | 17 ++ .../results/clientpositive/acid_nullscan.q.out | 162 +++ 3 files changed, 190 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/e51f7c9d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 27b53b8..52ef2d3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -101,6 +101,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport.Support; import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.io.NullRowsInputFormat; +import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; @@ -353,6 +355,14 @@ public class Vectorizer implements PhysicalPlanResolver { vectorDeserializeTextSupportSet.addAll(Arrays.asList(Support.values())); } + private static final Set supportedAcidInputFormats = new TreeSet(); + static { +supportedAcidInputFormats.add(OrcInputFormat.class.getName()); +// For metadataonly or empty rows optimizations, null/onerow input format can be selected. +supportedAcidInputFormats.add(NullRowsInputFormat.class.getName()); +supportedAcidInputFormats.add(OneNullRowInputFormat.class.getName()); + } + private BaseWork currentBaseWork; private Operator currentOperator; private Collection> vectorizedInputFormatExcludes; @@ -1201,7 +1211,7 @@ public class Vectorizer implements PhysicalPlanResolver { // Today, ACID tables are only ORC and that format is vectorizable. Verify these // assumptions. Preconditions.checkState(isInputFileFormatVectorized); - Preconditions.checkState(inputFileFormatClassName.equals(OrcInputFormat.class.getName())); + Preconditions.checkState(supportedAcidInputFormats.contains(inputFileFormatClassName)); if (!useVectorizedInputFileFormat) { enabledConditionsNotMetList.add("Vectorizing ACID tables requires " http://git-wip-us.apache.org/repos/asf/hive/blob/e51f7c9d/ql/src/test/queries/clientpositive/acid_nullscan.q -- diff --git a/ql/src/test/queries/clientpositive/acid_nullscan.q b/ql/src/test/queries/clientpositive/acid_nullscan.q new file mode 100644 index 000..d048231 --- /dev/null +++ b/ql/src/test/queries/clientpositive/acid_nullscan.q @@ -0,0 +1,17 @@ + +set hive.mapred.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.vectorized.execution.enabled=true; + +CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); +insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10; +insert into table acid_vectorized values (1, 'bar'); + +explain extended +select sum(a) from acid_vectorized where false; + +select sum(a) from acid_vectorized where false; + http://git-wip-us.apache.org/repos/asf/hive/blob/e51f7c9d/ql/src/test/results/clientpositive/acid_nullscan.q.out -- diff --git a/ql/src/test/results/clientpositive/acid_nullscan.q.out b/ql/src/test/results/clientpositive/acid_nullscan.q.out new file mode
[2/2] hive git commit: HIVE-18658 : WM: allow not specifying scheduling policy when creating a pool (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
HIVE-18658 : WM: allow not specifying scheduling policy when creating a pool (Sergey Shelukhin, reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3df6bc28 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3df6bc28 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3df6bc28 Branch: refs/heads/master Commit: 3df6bc28b7c4a3223a2771caa0058a63941cec39 Parents: dbf38ed Author: sergey Authored: Tue Feb 20 17:25:38 2018 -0800 Committer: sergey Committed: Tue Feb 20 17:25:38 2018 -0800 -- .../hive/ql/parse/DDLSemanticAnalyzer.java | 7 +- .../test/queries/clientpositive/resourceplan.q | 6 +- .../clientpositive/llap/resourceplan.q.out | 256 +++ .../results/clientpositive/llap/sysdb.q.out | 8 +- .../hive/metastore/MetaStoreDirectSql.java | 2 + 5 files changed, 227 insertions(+), 52 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/3df6bc28/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 8ad6fce..718faff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1135,8 +1135,8 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { private void analyzeCreatePool(ASTNode ast) throws SemanticException { // TODO: allow defaults for e.g. scheduling policy. -if (ast.getChildCount() != 5) { - throw new SemanticException("Invalid syntax for create pool."); +if (ast.getChildCount() < 3) { + throw new SemanticException("Expected more arguments: " + ast.toStringTree()); } String rpName = unescapeIdentifier(ast.getChild(0).getText()); String poolPath = poolPath(ast.getChild(1)); @@ -1168,6 +1168,9 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { if (!pool.isSetAllocFraction()) { throw new SemanticException("alloc_fraction should be specified for a pool"); } +if (!pool.isSetQueryParallelism()) { + throw new SemanticException("query_parallelism should be specified for a pool"); +} CreateOrAlterWMPoolDesc desc = new CreateOrAlterWMPoolDesc(pool, poolPath, false); addServiceOutput(); rootTasks.add(TaskFactory.get( http://git-wip-us.apache.org/repos/asf/hive/blob/3df6bc28/ql/src/test/queries/clientpositive/resourceplan.q -- diff --git a/ql/src/test/queries/clientpositive/resourceplan.q b/ql/src/test/queries/clientpositive/resourceplan.q index 7314585..009816d 100644 --- a/ql/src/test/queries/clientpositive/resourceplan.q +++ b/ql/src/test/queries/clientpositive/resourceplan.q @@ -1,5 +1,6 @@ -- Continue on errors, we do check some error conditions below. set hive.cli.errors.ignore=true; +set hive.test.authz.sstd.hs2.mode=true; -- Prevent NPE in calcite. set hive.cbo.enable=false; @@ -210,8 +211,9 @@ SELECT * FROM SYS.WM_TRIGGERS; CREATE POOL plan_1.default WITH ALLOC_FRACTION=1.0, QUERY_PARALLELISM=5, SCHEDULING_POLICY='default'; -CREATE POOL plan_2.default WITH - ALLOC_FRACTION=1.0, QUERY_PARALLELISM=5, SCHEDULING_POLICY='default'; +CREATE POOL plan_2.default WITH QUERY_PARALLELISM=5, SCHEDULING_POLICY='default'; +CREATE POOL plan_2.default WITH ALLOC_FRACTION=1.0; +CREATE POOL plan_2.default WITH ALLOC_FRACTION=1.0, QUERY_PARALLELISM=5; SELECT * FROM SYS.WM_POOLS; CREATE POOL plan_2.default.c1 WITH http://git-wip-us.apache.org/repos/asf/hive/blob/3df6bc28/ql/src/test/results/clientpositive/llap/resourceplan.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/resourceplan.q.out b/ql/src/test/results/clientpositive/llap/resourceplan.q.out index b23720d..d790b44 100644 --- a/ql/src/test/results/clientpositive/llap/resourceplan.q.out +++ b/ql/src/test/results/clientpositive/llap/resourceplan.q.out @@ -66,9 +66,9 @@ default srcpart hive_test_user USER DELETE true-1 hive_test_user defaultsrcpart hive_test_user USERINSERT true -1 hive_test_user defaultsrcpart hive_test_user USERSELECT true -1 hive_test_user defaultsrcpart hive_test_user USERUPDATE true -1 hive_test_user -PREHOOK: query: DROP DATABASE IF EXISTS SYS +PREHOOK: query: DROP DATABASE IF EXISTS SYS CASCADE PREHOOK: type: DROPDATABASE -POSTHOOK: query: DROP DATABASE IF EXISTS
[1/2] hive git commit: HIVE-18737 : add an option to disable LLAP IO ACID for non-original files (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan)
Repository: hive Updated Branches: refs/heads/master e51f7c9d2 -> 3df6bc28b HIVE-18737 : add an option to disable LLAP IO ACID for non-original files (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dbf38ed1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dbf38ed1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dbf38ed1 Branch: refs/heads/master Commit: dbf38ed1434656f487f942b2f9df4fea3e29e44a Parents: e51f7c9 Author: sergey Authored: Tue Feb 20 17:21:00 2018 -0800 Committer: sergey Committed: Tue Feb 20 17:21:00 2018 -0800 -- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 1 + ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java | 8 +--- 2 files changed, 6 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/dbf38ed1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b51dc7e..38f6430 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3120,6 +3120,7 @@ public class HiveConf extends Configuration { false, "Use Tez cartesian product edge to speed up cross product"), // The default is different on the client and server, so it's null here. LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer is enabled."), +LLAP_IO_ACID_ENABLED("hive.llap.io.acid", true, "Whether the LLAP IO layer is enabled for ACID."), LLAP_IO_TRACE_SIZE("hive.llap.io.trace.size", "2Mb", new SizeValidator(0L, true, (long)Integer.MAX_VALUE, false), "The buffer size for a per-fragment LLAP debug trace. 0 to disable."), http://git-wip-us.apache.org/repos/asf/hive/blob/dbf38ed1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java index 91d855b..96c5916 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java @@ -29,6 +29,7 @@ import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.ColumnarSplit; @@ -234,13 +235,13 @@ public class OrcSplit extends FileSplit implements ColumnarSplit, LlapAwareSplit public boolean canUseLlapIo(Configuration conf) { final boolean hasDelta = deltas != null && !deltas.isEmpty(); final boolean isAcidRead = AcidUtils.isFullAcidScan(conf); -final boolean isVectorized = HiveConf.getBoolVar(conf, -HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); +final boolean isVectorized = HiveConf.getBoolVar(conf, ConfVars.HIVE_VECTORIZATION_ENABLED); Boolean isSplitUpdate = null; if (isAcidRead) { final AcidUtils.AcidOperationalProperties acidOperationalProperties = AcidUtils.getAcidOperationalProperties(conf); isSplitUpdate = acidOperationalProperties.isSplitUpdate(); + // TODO: this is brittle. Who said everyone has to upgrade using upgrade process? assert isSplitUpdate : "should be true in Hive 3.0"; } @@ -250,7 +251,8 @@ public class OrcSplit extends FileSplit implements ColumnarSplit, LlapAwareSplit return true; } } else { - if (isAcidRead && hasBase && isVectorized) { + boolean isAcidEnabled = HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ACID_ENABLED); + if (isAcidEnabled && isAcidRead && hasBase && isVectorized) { if (hasDelta) { if (isSplitUpdate) { // Base with delete deltas
[2/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_12.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out index f4a5b55..591de4b 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out @@ -144,31 +144,14 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 -Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true -reduceColumnNullOrder: -reduceColumnSortOrder: -allNative: false -usesVectorUDFAdaptor: false -vectorized: true -rowBatchContext: -dataColumnCount: 9 -dataColumns: KEY._col0:double, KEY._col1:bigint, KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, VALUE._col1:struct, VALUE._col2:struct, VALUE._col3:bigint, VALUE._col4:struct -partitionColumnCount: 0 -scratchColumnTypeNames: [] +notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator +vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4) -Group By Vectorization: -aggregators: VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFVarFinal(col 5:struct) -> double aggregation: stddev_samp, VectorUDAFAvgFinal(col 6:struct) -> double, VectorUDAFSumLong(col 7:bigint) -> bigint, VectorUDAFVarFinal(col 8:struct) -> double aggregation: stddev_pop -className: VectorGroupByOperator -groupByMode: MERGEPARTIAL -keyExpressions: col 0:double, col 1:bigint, col 2:string, col 3:boolean -native: false -vectorProcessingMode: MERGE_PARTIAL -projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -176,21 +159,10 @@ STAGE PLANS: Select Operator expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- ((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- (-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * _col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 3, 2, 0, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8] - selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 9:double, LongColUnaryMinus(col 1:bigint) -> 10:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 11:bigint, DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 14:double) -> 12:double, DoubleColUnaryMinus(col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 15:double, DecimalScalarAddDecimalColumn(val -5638.15, col 16:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 16:decimal(19,0)) -> 17:decimal(22,2), Doub
[3/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out index 59a58e7..34b273c 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out @@ -117,47 +117,23 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [double] Reducer 2 -Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true -reduceColumnNullOrder: -reduceColumnSortOrder: -allNative: false -usesVectorUDFAdaptor: false -vectorized: true -rowBatchContext: -dataColumnCount: 6 -dataColumns: VALUE._col0:struct, VALUE._col1:double, VALUE._col2:tinyint, VALUE._col3:int, VALUE._col4:struct, VALUE._col5:bigint -partitionColumnCount: 0 -scratchColumnTypeNames: [] +notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator +vectorized: false Reduce Operator Tree: Group By Operator aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5) -Group By Vectorization: -aggregators: VectorUDAFVarFinal(col 0:struct) -> double aggregation: var_pop, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFMaxLong(col 2:tinyint) -> tinyint, VectorUDAFMaxLong(col 3:int) -> int, VectorUDAFVarFinal(col 4:struct) -> double aggregation: var_samp, VectorUDAFCountMerge(col 5:bigint) -> bigint -className: VectorGroupByOperator -groupByMode: MERGEPARTIAL -native: false -vectorProcessingMode: GLOBAL -projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: double), (_col0 / -26.28) (type: double), _col1 (type: double), (-1.389 + _col1) (type: double), (_col1 * (-1.389 + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389 + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175 % (- (_col1 * (-1.389 + _col1 (type: double), _col5 (type: bigint), (-563 % _col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 6, 1, 7, 9, 2, 8, 3, 12, 4, 13, 5, 14] - selectExpressions: DoubleColDivideDoubleScalar(col 0:double, val -26.28) -> 6:double, DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 7:double, DoubleColMultiplyDoubleColumn(col 1:double, col 8:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 8:double) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 8:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 8:double) -> 10:double) -> 8:double, DecimalColMultiplyDecimalScalar(col 11:decimal(10,0), val 79.553)(children: CastLongToDecimal(col 3:int) -> 11:decimal(10,0)) -> 12:decimal(16,3), DoubleScalarModuloDoubleColumn(val 10.175, col 10:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 10:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 10:double) -> 13:double) -> 10:double) -> 13:double, LongScalarModuloLongColumn(v al -563, col 3:int) -> 14:int Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false -File Sink Vectorization: -className: VectorFileSinkOperator -native: fa
[4/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 0027ab5..bd5e284 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -142,40 +142,24 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 -Execution mode: vectorized, llap +Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true -allNative: false -usesVectorUDFAdaptor: false -vectorized: true +notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator +vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) -Group By Vectorization: -aggregators: VectorUDAFAvgFinal(col 0:struct) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 3:struct) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 4:struct) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 5:struct) -> double, VectorUDAFVarFinal(col 6:struct) -> double aggregation: stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFCountMerge(col 8:bigint) -> bigint -className: VectorGroupByOperator -groupByMode: MERGEPARTIAL -native: false -vectorProcessingMode: GLOBAL -projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0 (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0 (type: double), ((_col2 - (- (- (_col0 + -3728.0 * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 9, 11, 10, 14, 1, 12, 2, 15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25] - selectExpressions: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 12:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 12:double) -> 13:double) -> 12:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 0:double) -> 1
[5/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_13.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out index e50f3e2..23914f8 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -148,31 +148,15 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [double, decimal(11,4)] Reducer 2 -Execution mode: vectorized, llap +Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true -reduceColumnNullOrder: a -reduceColumnSortOrder: + -allNative: false -usesVectorUDFAdaptor: false -vectorized: true -rowBatchContext: -dataColumnCount: 11 -dataColumns: KEY._col0:boolean, KEY._col1:tinyint, KEY._col2:timestamp, KEY._col3:float, KEY._col4:string, VALUE._col0:tinyint, VALUE._col1:double, VALUE._col2:struct, VALUE._col3:struct, VALUE._col4:float, VALUE._col5:tinyint -partitionColumnCount: 0 -scratchColumnTypeNames: [] +notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator +vectorized: false Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) -Group By Vectorization: -aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFVarFinal(col 7:struct) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 8:struct) -> double aggregation: stddev_pop, VectorUDAFMaxDouble(col 9:float) -> float, VectorUDAFMinLong(col 10:tinyint) -> tinyint -className: VectorGroupByOperator -groupByMode: MERGEPARTIAL -keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string -native: false -vectorProcessingMode: MERGE_PARTIAL -projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -180,21 +164,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10] - selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 11:tinyint, LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 13:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 15:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6:double) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 17:float, DoubleColUna
[6/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8975924e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8975924e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8975924e Branch: refs/heads/master Commit: 8975924ec070380069d71d325a3358fe9932befb Parents: 3df6bc2 Author: Matt McCline Authored: Tue Feb 20 22:33:59 2018 -0800 Committer: Matt McCline Committed: Tue Feb 20 22:33:59 2018 -0800 -- .../UDAFTemplates/VectorUDAFVarMerge.txt| 5 + .../llap/parquet_types_vectorization.q.out | 2 +- .../llap/vector_decimal_aggregate.q.out | 66 +-- .../llap/vector_decimal_udf.q.out | 100 ++ .../llap/vector_reuse_scratchcols.q.out | 58 +- .../llap/vector_string_decimal.q.out| 137 ++ .../llap/vector_udf_string_to_boolean.q.out | 189 +++ .../clientpositive/llap/vectorization_0.q.out | 50 ++--- .../clientpositive/llap/vectorization_1.q.out | 29 +-- .../clientpositive/llap/vectorization_12.q.out | 33 +--- .../clientpositive/llap/vectorization_13.q.out | 57 +- .../clientpositive/llap/vectorization_14.q.out | 33 +--- .../clientpositive/llap/vectorization_16.q.out | 30 +-- .../clientpositive/llap/vectorization_2.q.out | 29 +-- .../clientpositive/llap/vectorization_3.q.out | 29 +-- .../clientpositive/llap/vectorization_4.q.out | 29 +-- .../clientpositive/llap/vectorization_9.q.out | 30 +-- .../vectorization_input_format_excludes.q.out | 28 ++- .../llap/vectorization_part_project.q.out | 12 +- .../llap/vectorization_short_regress.q.out | 186 +++--- .../llap/vectorized_mapjoin3.q.out | 100 -- .../llap/vectorized_parquet.q.out | 7 +- .../llap/vectorized_parquet_types.q.out | 19 +- .../llap/vectorized_timestamp.q.out | 24 +-- .../llap/vectorized_timestamp_funcs.q.out | 22 +-- .../spark/parquet_vectorization_0.q.out | 46 + .../spark/parquet_vectorization_1.q.out | 28 +-- .../spark/parquet_vectorization_12.q.out| 32 +--- .../spark/parquet_vectorization_13.q.out| 55 +- .../spark/parquet_vectorization_14.q.out| 32 +--- .../spark/parquet_vectorization_16.q.out| 29 +-- .../spark/parquet_vectorization_2.q.out | 28 +-- .../spark/parquet_vectorization_3.q.out | 28 +-- .../spark/parquet_vectorization_4.q.out | 28 +-- .../spark/parquet_vectorization_9.q.out | 29 +-- .../spark/vector_decimal_aggregate.q.out| 64 +-- .../clientpositive/spark/vectorization_0.q.out | 46 + .../clientpositive/spark/vectorization_1.q.out | 28 +-- .../clientpositive/spark/vectorization_12.q.out | 32 +--- .../clientpositive/spark/vectorization_13.q.out | 55 +- .../clientpositive/spark/vectorization_14.q.out | 32 +--- .../clientpositive/spark/vectorization_16.q.out | 29 +-- .../clientpositive/spark/vectorization_2.q.out | 28 +-- .../clientpositive/spark/vectorization_3.q.out | 28 +-- .../clientpositive/spark/vectorization_4.q.out | 28 +-- .../clientpositive/spark/vectorization_9.q.out | 29 +-- .../vectorization_input_format_excludes.q.out | 24 +-- .../spark/vectorization_short_regress.q.out | 178 ++--- .../spark/vectorized_timestamp_funcs.q.out | 21 +-- 49 files changed, 575 insertions(+), 1686 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt -- diff --git a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt index 9b1c1cd..ccc5a22 100644 --- a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt +++ b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt @@ -487,6 +487,9 @@ public class extends VectorAggregateExpression { * Mode FINAL. #ENDIF FINAL */ + +/* +There seems to be a Wrong Results bug in VectorUDAFVarFinal -- disabling vectorization for now... return GenericUDAFVariance.isVarianceFamilyName(name) && inputColVectorType == ColumnVector.Type.STRUCT && @@ -498,6 +501,8 @@ public class extends VectorAggregateExpression { outputColVectorType == ColumnVector.Type.DOUBLE && mode == Mode.FINAL; #ENDIF FINAL +*/ +return false; } @Override http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out --
[1/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/master 3df6bc28b -> 8975924ec http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index 6b63764..9683efa 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -140,40 +140,23 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 -Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true -allNative: false -usesVectorUDFAdaptor: false -vectorized: true +notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator +vectorized: false Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) -Group By Vectorization: -aggregators: VectorUDAFAvgFinal(col 0:struct) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 3:struct) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 4:struct) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 5:struct) -> double, VectorUDAFVarFinal(col 6:struct) -> double aggregation: stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFCountMerge(col 8:bigint) -> bigint -className: VectorGroupByOperator -groupByMode: MERGEPARTIAL -native: false -vectorProcessingMode: GLOBAL -projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0 (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0 (type: double), ((_col2 - (- (- (_col0 + -3728.0 * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 9, 11, 10, 14, 1, 12, 2, 15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25] - selectExpressions: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 12:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 12:double) -> 13:double) -> 12:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 14:dou