[2/2] drill git commit: DRILL-6118: Handle item star columns during project / filter push down and directory pruning
DRILL-6118: Handle item star columns during project / filter push down and directory pruning 1. Added DrillFilterItemStarReWriterRule to re-write item star fields to regular field references. 2. Refactored DrillPushProjectIntoScanRule to handle item star fields, factored out helper classes and methods from PreUitl.class. 3. Fixed issue with dynamic star usage (after Calcite upgrade old usage of star was still present, replaced WILDCARD -> DYNAMIC_STAR for clarity). 4. Added unit tests to check project / filter push down and directory pruning with item star. Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/9073aed6 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/9073aed6 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/9073aed6 Branch: refs/heads/master Commit: 9073aed67d89e8b2188870d6c812706085c9c41b Parents: 50efb80 Author: Arina Ielchiieva Authored: Thu Dec 21 19:31:00 2017 +0200 Committer: Aman Sinha Committed: Sat Feb 24 19:56:35 2018 -0800 -- .../org/apache/drill/exec/dotdrill/View.java| 5 +- .../impl/project/ProjectRecordBatch.java| 12 +- .../impl/values/ValuesBatchCreator.java | 5 +- .../apache/drill/exec/planner/PlannerPhase.java | 7 +- .../drill/exec/planner/StarColumnHelper.java| 13 +- .../exec/planner/common/DrillScanRelBase.java | 10 +- .../DrillFilterItemStarReWriterRule.java| 232 + .../drill/exec/planner/logical/DrillOptiq.java | 3 +- .../planner/logical/DrillPushProjIntoScan.java | 97 --- .../logical/DrillPushProjectIntoScanRule.java | 256 +++ .../planner/logical/FieldsReWriterUtil.java | 138 ++ .../planner/logical/PreProcessLogicalRel.java | 6 +- .../drill/exec/planner/physical/PrelUtil.java | 247 +- .../visitor/SplitUpComplexExpressions.java | 6 +- .../planner/sql/handlers/SqlHandlerUtil.java| 4 +- .../exec/planner/types/RelDataTypeHolder.java | 13 +- .../text/compliant/RepeatedVarCharOutput.java | 7 +- .../drill/exec/store/mock/MockGroupScanPOP.java | 2 +- .../parquet2/DrillParquetGroupConverter.java| 12 +- .../exec/store/text/DrillTextRecordReader.java | 4 +- .../org/apache/drill/exec/util/Utilities.java | 39 ++- .../exec/vector/complex/fn/FieldSelection.java | 13 +- .../apache/drill/exec/TestWindowFunctions.java | 4 +- .../physical/unit/MiniPlanUnitTestBase.java | 3 +- .../physical/unit/PhysicalOpUnitTestBase.java | 3 +- .../TestPushDownAndPruningWithItemStar.java | 183 + .../drill/common/expression/SchemaPath.java | 12 +- 27 files changed, 923 insertions(+), 413 deletions(-) -- http://git-wip-us.apache.org/repos/asf/drill/blob/9073aed6/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/View.java -- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/View.java b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/View.java index 3524d73..615e3bc 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/View.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/dotdrill/View.java @@ -23,6 +23,7 @@ import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeFamily; +import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.exec.planner.StarColumnHelper; import org.apache.drill.exec.planner.types.RelDataTypeDrillImpl; import org.apache.drill.exec.planner.types.RelDataTypeHolder; @@ -72,9 +73,9 @@ public class View { @JsonProperty("fractionalSecondPrecision") Integer fractionalSecondPrecision, @JsonProperty("isNullable") Boolean isNullable) { // Fix for views which were created on Calcite 1.4. - // After Calcite upgrade star "*" was changed on dynamic star "**" + // After Calcite upgrade star "*" was changed on dynamic star "**" (SchemaPath.DYNAMIC_STAR) // and type of star was changed to SqlTypeName.DYNAMIC_STAR - this.name = "*".equals(name) ? "**" : name; + this.name = "*".equals(name) ? SchemaPath.DYNAMIC_STAR : name; this.type = "*".equals(name) && type == SqlTypeName.ANY ? SqlTypeName.DYNAMIC_STAR : type; this.precision = precision; this.scale = scale; http://git-wip-us.apache.org/repos/asf/drill/blob/9073aed6/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/project/ProjectRecordBatch.java -- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/project/Pr
[1/2] drill git commit: DRILL-6118: Handle item star columns during project / filter push down and directory pruning
Repository: drill Updated Branches: refs/heads/master 50efb806b -> 9073aed67 http://git-wip-us.apache.org/repos/asf/drill/blob/9073aed6/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java -- diff --git a/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java b/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java index 95f3dbb..583046a 100644 --- a/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java +++ b/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java @@ -42,8 +42,8 @@ import com.google.common.base.Preconditions; public class SchemaPath extends LogicalExpressionBase { - public static final String WILDCARD = "*"; - public static final SchemaPath STAR_COLUMN = getSimplePath(WILDCARD); + public static final String DYNAMIC_STAR = "**"; + public static final SchemaPath STAR_COLUMN = getSimplePath(DYNAMIC_STAR); private final NameSegment rootSegment; @@ -205,14 +205,14 @@ public class SchemaPath extends LogicalExpressionBase { } /** - * Return if this column is the special wildcard ("*") column which means to + * Return if this column is the special wildcard ("**") column which means to * project all table columns. * - * @return true if the column is "*" + * @return true if the column is "**" */ - public boolean isWildcard() { -return isLeaf() && nameEquals(WILDCARD); + public boolean isDynamicStar() { +return isLeaf() && nameEquals(DYNAMIC_STAR); } /**
[3/3] drill git commit: DRILL-5741: Automatically manage memory allocations during startup
DRILL-5741: Automatically manage memory allocations during startup **Note:** This commit is rebased on (hence, requires) DRILL-6068 commit This commit provides a way for distributions and users to automatically define the Drillbit JVM's Heap, Direct and CodeCache allocations. To do this, it leverages the DRILL-6068 commit, which provides for configuration setup. The only new variable introduced is an environment variable - `DRILLBIT_MAX_PROC_MEM` that defines a Drillbit's max memory parameters. The logic defined by `auto-setup.sh` will check that if any (or all) of the JVM memory parameters have been declared; they, cumulatively, don't exceed the value specified by `DRILLBIT_MAX_PROC_MEM`. ``` NOTE: To disable this, simply unset (or don't define) the environment variable DRILLBIT_MAX_PROC_MEM ``` The variable can be defined in KB, MB, or GB; similar in syntax to how the JVM MaxHeap is specified. For e.g. ``` DRILLBIT_MAX_PROC_MEM=13G DRILLBIT_MAX_PROC_MEM=8192m DRILLBIT_MAX_PROC_MEM=4194304K ``` In addition, you can specify it as a percent of the total system memory prior to the Drillbit starting up: `DRILLBIT_MAX_PROC_MEM=40%` For a system with with 48GB free memory, when set to (say) 25% (with settings defined in drill-env.sh), and heap (8GB) and direct (10GB) are defined; the Drillbit fails startup with the following message: ``` 2018-01-03 14:27:57 [WARN] 25% of System Memory (47 GB) translates to 12 GB 2018-01-03 14:27:57 [ERROR]Unable to start Drillbit due to memory constraint violations Total Memory Requested : 19 GB Check the following settings to possibly modify (or increase the Max Memory Permitted): DRILLBIT_MAX_PROC_MEM=25% DRILL_HEAP=8G DRILL_MAX_DIRECT_MEMORY=10G DRILLBIT_CODE_CACHE_SIZE=1024m *NOTE: It is recommended not to specify DRILLBIT_CODE_CACHE_SIZE as this will be auto-computed based on the HeapSize and would not exceed 1GB ``` For all other combinations, the undefined parameters are adjusted to ensure that the total memory allocated is within the value specified by `DRILLBIT_MAX_PROC_MEM`, For a system with with 48GB free memory, when set to (say) 50% (with settings defined in drill-env.sh), and heap (8GB) and direct (10GB) are defined; the Drillbit startup with the following warning: ``` 2018-01-03 14:31:06 [WARN] 50% of System Memory (47 GB) translates to 24 GB 2018-01-03 14:31:06 [WARN] You have an allocation of 4 GB that is currently unused from a total of 24 GB. You can increase your existing memory configuration to use this extra memory DRILLBIT_MAX_PROC_MEM=50% DRILL_HEAP=8G DRILL_MAX_DIRECT_MEMORY=10G DRILLBIT_CODE_CACHE_SIZE=1024m *NOTE: It is recommended not to specify DRILLBIT_CODE_CACHE_SIZE as this will be auto-computed based on the HeapSize and would not exceed 1GB ``` In addition, if the available free memory is less than the allocation, an additional warning is provided under the assumption that the OS will reclaim more free memory when required: ``` 2018-01-03 14:31:06 [WARN] Total Memory Allocation for Drillbit (19GB) exceeds available free memory (11GB) 2018-01-03 14:31:06 [WARN] Drillbit will start up, but can potentially crash due to oversubscribing of system memory. ``` For more details, refer the attachments in https://issues.apache.org/jira/browse/DRILL-5741 Changes to auto configure messaging Publishing final values prior to startup Minor update for printing to console's err stream close apache/drill#1082 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/50efb806 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/50efb806 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/50efb806 Branch: refs/heads/master Commit: 50efb806bb03494e1da4d6b48f90fbf58d699c18 Parents: 266250c Author: Kunal Khatua Authored: Thu Jan 11 17:35:13 2018 -0800 Committer: Aman Sinha Committed: Fri Feb 23 17:55:36 2018 -0800 -- distribution/src/resources/auto-setup.sh| 202 ++- distribution/src/resources/distrib-setup.sh | 2 +- distribution/src/resources/drill-env.sh | 10 ++ 3 files changed, 212 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/drill/blob/50efb806/distribution/src/resources/auto-setup.sh -- diff --git a/distribution/src/resources/auto-setup.sh b/distribution/src/resources/auto-setup.sh index 75bdda0..141648c 100644 --- a/distribution/src/resources/auto-setup.sh +++ b/distribution/src/resources/auto-setup.sh @@ -25,5 +25,205 @@ # if [ $status == "FAILED" ]; return 1; fi ###== -# FEATURES (Added here to describe supported add
[2/3] drill git commit: DRILL-6068: Support user/distrib-specific config checks during startup
DRILL-6068: Support user/distrib-specific config checks during startup 1. Allows for distrib/user specific checks to be done 2. Place-holder files for distribution and user specific checks 3. Moved JVM Version Check to head of script Separation of setups into 3 levels 1. Apache Drill (Default) 2. Distribition Specific 3. User-Defined 1 & 2 are mutually exclusive. 3 is additional checks that the user can specify. Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/266250cf Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/266250cf Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/266250cf Branch: refs/heads/master Commit: 266250cf6ca2269f8772796d6c4eb903aa795111 Parents: 27aa236 Author: Kunal Khatua Authored: Thu Jan 11 15:45:04 2018 -0800 Committer: Aman Sinha Committed: Fri Feb 23 17:55:06 2018 -0800 -- distribution/src/assemble/bin.xml | 15 ++ distribution/src/resources/auto-setup.sh| 29 distribution/src/resources/distrib-setup.sh | 29 distribution/src/resources/drill-config.sh | 178 +++ distribution/src/resources/drill-setup.sh | 26 5 files changed, 217 insertions(+), 60 deletions(-) -- http://git-wip-us.apache.org/repos/asf/drill/blob/266250cf/distribution/src/assemble/bin.xml -- diff --git a/distribution/src/assemble/bin.xml b/distribution/src/assemble/bin.xml index b4290bb..bed34ed 100644 --- a/distribution/src/assemble/bin.xml +++ b/distribution/src/assemble/bin.xml @@ -347,6 +347,21 @@ conf + src/resources/auto-setup.sh + 0755 + bin + + + src/resources/drill-setup.sh + 0755 + conf + + + src/resources/distrib-setup.sh + 0755 + conf + + src/resources/drill-override-example.conf conf http://git-wip-us.apache.org/repos/asf/drill/blob/266250cf/distribution/src/resources/auto-setup.sh -- diff --git a/distribution/src/resources/auto-setup.sh b/distribution/src/resources/auto-setup.sh new file mode 100644 index 000..75bdda0 --- /dev/null +++ b/distribution/src/resources/auto-setup.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is invoked by drill-config.sh during a Drillbit startup and provides +# default checks and autoconfiguration. +# Distributions should not put anything in this file. Checks can be +# specified in ${DRILL_HOME}/conf/distrib-setup.sh +# Users should not put anything in this file. Additional checks can be defined +# and put in ${DRILL_CONF_DIR}/drill-setup.sh instead. +# To FAIL any check, return with a non-zero return code +# e.g. +# if [ $status == "FAILED" ]; return 1; fi + +###== +# FEATURES (Added here to describe supported additions) +###== http://git-wip-us.apache.org/repos/asf/drill/blob/266250cf/distribution/src/resources/distrib-setup.sh -- diff --git a/distribution/src/resources/distrib-setup.sh b/distribution/src/resources/distrib-setup.sh new file mode 100644 index 000..dbd2b90 --- /dev/null +++ b/distribution/src/resources/distrib-setup.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License
[1/3] drill git commit: DRILL-5978: Updating of Apache and MapR Hive libraries to 2.3.2 and 2.1.2-mapr-1710 versions respectively
Repository: drill Updated Branches: refs/heads/master 1c14d3c3c -> 50efb806b DRILL-5978: Updating of Apache and MapR Hive libraries to 2.3.2 and 2.1.2-mapr-1710 versions respectively * Improvements to allow of reading Hive bucketed transactional ORC tables; * Updating hive properties for tests and resolving dependencies and API conflicts: - Fix for "hive.metastore.schema.verification", MetaException(message: Version information not found in metastore) https://cwiki.apache.org/confluence/display/Hive/Hive+Schema+Tool METASTORE_SCHEMA_VERIFICATION="false" property is added - Added METASTORE_AUTO_CREATE_ALL="true", properties to tests, because some additional tables are necessary in Hive metastore - Disabling calcite CBO for (Hive's CalcitePlanner) for tests, because it is in conflict with Drill's Calcite version for Drill unit tests. HIVE_CBO_ENABLED="false" property - jackson and parquet libraries are relocated in hive-exec-shade module - org.apache.parquet:parquet-column Drill version is added to "hive-exec" to allow of using Parquet empty group on MessageType level (PARQUET-278) - Removing of commons-codec exclusion from hive core. This dependency is necessary for hive-exec and hive-metastore. - Setting Hive internal properties for transactional scan: HiveConf.HIVE_TRANSACTIONAL_TABLE_SCAN and for schema evolution: HiveConf.HIVE_SCHEMA_EVOLUTION, IOConstants.SCHEMA_EVOLUTION_COLUMNS, IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES - "io.dropwizard.metrics:metrics-core" with last 4.0.2 version is added to dependencyManagement block in Drill root POM - Exclusion of "hive-exec" in "hive-hbase-handler" is already in Drill root dependencyManagement POM - Hive Calcite libraries are excluded (Calcite CBO was disabled) - "jackson-core" dependency is added to DependencyManagement block in Drill root POM file - For MapR Hive 2.1 client older "com.fasterxml.jackson.core:jackson-databind" is included - "log4j:log4j" dependency is excluded from "hive-exec", "hive-metastore", "hive-hbase-handler". close apache/drill# Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/27aa2369 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/27aa2369 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/27aa2369 Branch: refs/heads/master Commit: 27aa236975e6d998e5b5dc173de777c78bb44bdd Parents: 1c14d3c Author: Vitalii Diravka Authored: Mon Nov 13 16:04:03 2017 + Committer: Aman Sinha Committed: Fri Feb 23 17:48:12 2018 -0800 -- contrib/storage-hive/core/pom.xml | 6 - .../codegen/templates/HiveRecordReaders.java| 4 +- .../exec/store/hive/HiveMetadataProvider.java | 1 + .../drill/exec/store/hive/HiveUtilities.java| 65 +++- .../store/hive/readers/HiveAbstractReader.java | 57 --- .../apache/drill/exec/hive/TestHiveStorage.java | 10 ++ .../hive/BaseTestHiveImpersonation.java | 5 +- .../hive/TestSqlStdBasedAuthorization.java | 8 +- .../hive/TestStorageBasedHiveAuthorization.java | 6 + .../exec/store/hive/HiveTestDataGenerator.java | 3 + contrib/storage-hive/hive-exec-shade/pom.xml| 158 +++ exec/java-exec/pom.xml | 15 -- exec/jdbc-all/pom.xml | 20 --- exec/jdbc/pom.xml | 1 - pom.xml | 74 - 15 files changed, 292 insertions(+), 141 deletions(-) -- http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/core/pom.xml -- diff --git a/contrib/storage-hive/core/pom.xml b/contrib/storage-hive/core/pom.xml index cdf8fbe..184300f 100644 --- a/contrib/storage-hive/core/pom.xml +++ b/contrib/storage-hive/core/pom.xml @@ -63,12 +63,6 @@ org.apache.hive hive-hbase-handler - - - org.apache.hive - hive-exec - - org.apache.hbase http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/core/src/main/codegen/templates/HiveRecordReaders.java -- diff --git a/contrib/storage-hive/core/src/main/codegen/templates/HiveRecordReaders.java b/contrib/storage-hive/core/src/main/codegen/templates/HiveRecordReaders.java index 4a75ed3..a6e588b 100644 --- a/contrib/storage-hive/core/src/main/codegen/templates/HiveRecordReaders.java +++ b/contrib/storage-hive/core/src/main/codegen/templates/HiveRecordReaders.java @@ -130,7 +130,7 @@ public class Hive${entry.hiveReader}Reader extends HiveAbstractReader { while (!recordsInspector.isBatchFull() && hasNextValue(recordsInspector.getValueHold