Repository: drill Updated Branches: refs/heads/master 1c14d3c3c -> 50efb806b
DRILL-5978: Updating of Apache and MapR Hive libraries to 2.3.2 and 2.1.2-mapr-1710 versions respectively * Improvements to allow of reading Hive bucketed transactional ORC tables; * Updating hive properties for tests and resolving dependencies and API conflicts: - Fix for "hive.metastore.schema.verification", MetaException(message: Version information not found in metastore) https://cwiki.apache.org/confluence/display/Hive/Hive+Schema+Tool METASTORE_SCHEMA_VERIFICATION="false" property is added - Added METASTORE_AUTO_CREATE_ALL="true", properties to tests, because some additional tables are necessary in Hive metastore - Disabling calcite CBO for (Hive's CalcitePlanner) for tests, because it is in conflict with Drill's Calcite version for Drill unit tests. HIVE_CBO_ENABLED="false" property - jackson and parquet libraries are relocated in hive-exec-shade module - org.apache.parquet:parquet-column Drill version is added to "hive-exec" to allow of using Parquet empty group on MessageType level (PARQUET-278) - Removing of commons-codec exclusion from hive core. This dependency is necessary for hive-exec and hive-metastore. - Setting Hive internal properties for transactional scan: HiveConf.HIVE_TRANSACTIONAL_TABLE_SCAN and for schema evolution: HiveConf.HIVE_SCHEMA_EVOLUTION, IOConstants.SCHEMA_EVOLUTION_COLUMNS, IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES - "io.dropwizard.metrics:metrics-core" with last 4.0.2 version is added to dependencyManagement block in Drill root POM - Exclusion of "hive-exec" in "hive-hbase-handler" is already in Drill root dependencyManagement POM - Hive Calcite libraries are excluded (Calcite CBO was disabled) - "jackson-core" dependency is added to DependencyManagement block in Drill root POM file - For MapR Hive 2.1 client older "com.fasterxml.jackson.core:jackson-databind" is included - "log4j:log4j" dependency is excluded from "hive-exec", "hive-metastore", "hive-hbase-handler". close apache/drill#1111 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/27aa2369 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/27aa2369 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/27aa2369 Branch: refs/heads/master Commit: 27aa236975e6d998e5b5dc173de777c78bb44bdd Parents: 1c14d3c Author: Vitalii Diravka <vitalii.dira...@gmail.com> Authored: Mon Nov 13 16:04:03 2017 +0000 Committer: Aman Sinha <asi...@maprtech.com> Committed: Fri Feb 23 17:48:12 2018 -0800 ---------------------------------------------------------------------- contrib/storage-hive/core/pom.xml | 6 - .../codegen/templates/HiveRecordReaders.java | 4 +- .../exec/store/hive/HiveMetadataProvider.java | 1 + .../drill/exec/store/hive/HiveUtilities.java | 65 +++++++- .../store/hive/readers/HiveAbstractReader.java | 57 ++++--- .../apache/drill/exec/hive/TestHiveStorage.java | 10 ++ .../hive/BaseTestHiveImpersonation.java | 5 +- .../hive/TestSqlStdBasedAuthorization.java | 8 +- .../hive/TestStorageBasedHiveAuthorization.java | 6 + .../exec/store/hive/HiveTestDataGenerator.java | 3 + contrib/storage-hive/hive-exec-shade/pom.xml | 158 +++++++++++-------- exec/java-exec/pom.xml | 15 -- exec/jdbc-all/pom.xml | 20 --- exec/jdbc/pom.xml | 1 - pom.xml | 74 ++++++++- 15 files changed, 292 insertions(+), 141 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/core/pom.xml ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/pom.xml b/contrib/storage-hive/core/pom.xml index cdf8fbe..184300f 100644 --- a/contrib/storage-hive/core/pom.xml +++ b/contrib/storage-hive/core/pom.xml @@ -63,12 +63,6 @@ <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-hbase-handler</artifactId> - <exclusions> - <exclusion> - <groupId>org.apache.hive</groupId> - <artifactId>hive-exec</artifactId> - </exclusion> - </exclusions> </dependency> <dependency> <groupId>org.apache.hbase</groupId> http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/core/src/main/codegen/templates/HiveRecordReaders.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/main/codegen/templates/HiveRecordReaders.java b/contrib/storage-hive/core/src/main/codegen/templates/HiveRecordReaders.java index 4a75ed3..a6e588b 100644 --- a/contrib/storage-hive/core/src/main/codegen/templates/HiveRecordReaders.java +++ b/contrib/storage-hive/core/src/main/codegen/templates/HiveRecordReaders.java @@ -130,7 +130,7 @@ public class Hive${entry.hiveReader}Reader extends HiveAbstractReader { while (!recordsInspector.isBatchFull() && hasNextValue(recordsInspector.getValueHolder())) { Object value = recordsInspector.getNextValue(); if (value != null) { - Object deSerializedValue = partitionSerDe.deserialize((Writable) value); + Object deSerializedValue = partitionDeserializer.deserialize((Writable) value); if (partTblObjectInspectorConverter != null) { deSerializedValue = partTblObjectInspectorConverter.convert(deSerializedValue); } @@ -159,7 +159,7 @@ public class Hive${entry.hiveReader}Reader extends HiveAbstractReader { try { int recordCount = 0; while (recordCount < TARGET_RECORD_COUNT && hasNextValue(value)) { - Object deSerializedValue = partitionSerDe.deserialize((Writable) value); + Object deSerializedValue = partitionDeserializer.deserialize((Writable) value); if (partTblObjectInspectorConverter != null) { deSerializedValue = partTblObjectInspectorConverter.convert(deSerializedValue); } http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveMetadataProvider.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveMetadataProvider.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveMetadataProvider.java index d0259ca..b11ef3b 100644 --- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveMetadataProvider.java +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveMetadataProvider.java @@ -264,6 +264,7 @@ public class HiveMetadataProvider { final List<LogicalInputSplit> splits = Lists.newArrayList(); final JobConf job = new JobConf(hiveConf); HiveUtilities.addConfToJob(job, properties); + HiveUtilities.verifyAndAddTransactionalProperties(job, sd); job.setInputFormat(HiveUtilities.getInputFormatClass(job, sd, hiveReadEntry.getTable())); final Path path = new Path(sd.getLocation()); final FileSystem fs = path.getFileSystem(job); http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java index b101f49..05b7e89 100644 --- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveUtilities.java @@ -17,8 +17,11 @@ */ package org.apache.drill.exec.store.hive; +import com.google.common.base.Function; +import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import com.google.common.collect.Lists; import io.netty.buffer.DrillBuf; import org.apache.drill.common.exceptions.DrillRuntimeException; import org.apache.drill.common.exceptions.ExecutionSetupException; @@ -51,10 +54,14 @@ import org.apache.drill.exec.vector.ValueVector; import org.apache.drill.exec.work.ExecErrorConstants; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.serde.serdeConstants; @@ -70,6 +77,7 @@ import org.apache.hadoop.mapred.JobConf; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; +import javax.annotation.Nullable; import java.math.BigDecimal; import java.sql.Date; import java.sql.Timestamp; @@ -104,8 +112,7 @@ public class HiveUtilities { return Boolean.parseBoolean(value); case DECIMAL: { DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; - return HiveDecimalUtils.enforcePrecisionScale(HiveDecimal.create(value), - decimalTypeInfo.precision(), decimalTypeInfo.scale()); + return HiveDecimalUtils.enforcePrecisionScale(HiveDecimal.create(value), decimalTypeInfo); } case DOUBLE: return Double.parseDouble(value); @@ -507,5 +514,59 @@ public class HiveUtilities { int skipFooter = retrieveIntProperty(tableProperties, serdeConstants.FOOTER_COUNT, -1); return skipHeader > 0 || skipFooter > 0; } + + /** + * This method checks whether the table is transactional and set necessary properties in {@link JobConf}. + * If schema evolution properties aren't set in job conf for the input format, method sets the column names + * and types from table/partition properties or storage descriptor. + * + * @param job the job to update + * @param sd storage descriptor + */ + public static void verifyAndAddTransactionalProperties(JobConf job, StorageDescriptor sd) { + + if (AcidUtils.isTablePropertyTransactional(job)) { + AcidUtils.setTransactionalTableScan(job, true); + + // No work is needed, if schema evolution is used + if (Utilities.isSchemaEvolutionEnabled(job, true) && job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS) != null && + job.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES) != null) { + return; + } + + String colNames; + String colTypes; + + // Try to get get column names and types from table or partition properties. If they are absent there, get columns + // data from storage descriptor of the table + colNames = job.get(serdeConstants.LIST_COLUMNS); + colTypes = job.get(serdeConstants.LIST_COLUMN_TYPES); + + if (colNames == null || colTypes == null) { + colNames = Joiner.on(",").join(Lists.transform(sd.getCols(), new Function<FieldSchema, String>() + { + @Nullable + @Override + public String apply(@Nullable FieldSchema input) + { + return input.getName(); + } + })); + + colTypes = Joiner.on(",").join(Lists.transform(sd.getCols(), new Function<FieldSchema, String>() + { + @Nullable + @Override + public String apply(@Nullable FieldSchema input) + { + return input.getType(); + } + })); + } + + job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, colNames); + job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, colTypes); + } + } } http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveAbstractReader.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveAbstractReader.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveAbstractReader.java index 9df721b..b814866 100644 --- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveAbstractReader.java +++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/readers/HiveAbstractReader.java @@ -50,7 +50,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; @@ -85,10 +85,10 @@ public abstract class HiveAbstractReader extends AbstractRecordReader { protected List<TypeInfo> selectedPartitionTypes = Lists.newArrayList(); protected List<Object> selectedPartitionValues = Lists.newArrayList(); - // SerDe of the reading partition (or table if the table is non-partitioned) - protected SerDe partitionSerDe; + // Deserializer of the reading partition (or table if the table is non-partitioned) + protected Deserializer partitionDeserializer; - // ObjectInspector to read data from partitionSerDe (for a non-partitioned table this is same as the table + // ObjectInspector to read data from partitionDeserializer (for a non-partitioned table this is same as the table // ObjectInspector). protected StructObjectInspector partitionOI; @@ -143,19 +143,20 @@ public abstract class HiveAbstractReader extends AbstractRecordReader { HiveUtilities.getPartitionMetadata(partition, table); HiveUtilities.addConfToJob(job, partitionProperties); - final SerDe tableSerDe = createSerDe(job, table.getSd().getSerdeInfo().getSerializationLib(), tableProperties); - final StructObjectInspector tableOI = getStructOI(tableSerDe); + final Deserializer tableDeserializer = createDeserializer(job, table.getSd().getSerdeInfo().getSerializationLib(), tableProperties); + final StructObjectInspector tableOI = getStructOI(tableDeserializer); if (partition != null) { - partitionSerDe = createSerDe(job, partition.getSd().getSerdeInfo().getSerializationLib(), partitionProperties); - partitionOI = getStructOI(partitionSerDe); + partitionDeserializer = createDeserializer(job, partition.getSd().getSerdeInfo().getSerializationLib(), partitionProperties); + partitionOI = getStructOI(partitionDeserializer); finalOI = (StructObjectInspector)ObjectInspectorConverters.getConvertedOI(partitionOI, tableOI); partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partitionOI, finalOI); job.setInputFormat(HiveUtilities.getInputFormatClass(job, partition.getSd(), table)); + HiveUtilities.verifyAndAddTransactionalProperties(job, table.getSd()); } else { // For non-partitioned tables, there is no need to create converter as there are no schema changes expected. - partitionSerDe = tableSerDe; + partitionDeserializer = tableDeserializer; partitionOI = tableOI; partTblObjectInspectorConverter = null; finalOI = tableOI; @@ -166,7 +167,7 @@ public abstract class HiveAbstractReader extends AbstractRecordReader { for (StructField field: finalOI.getAllStructFieldRefs()) { logger.trace("field in finalOI: {}", field.getClass().getName()); } - logger.trace("partitionSerDe class is {} {}", partitionSerDe.getClass().getName()); + logger.trace("partitionDeserializer class is {} {}", partitionDeserializer.getClass().getName()); } // Get list of partition column names final List<String> partitionNames = Lists.newArrayList(); @@ -176,8 +177,8 @@ public abstract class HiveAbstractReader extends AbstractRecordReader { // We should always get the columns names from ObjectInspector. For some of the tables (ex. avro) metastore // may not contain the schema, instead it is derived from other sources such as table properties or external file. - // SerDe object knows how to get the schema with all the config and table properties passed in initialization. - // ObjectInspector created from the SerDe object has the schema. + // Deserializer object knows how to get the schema with all the config and table properties passed in initialization. + // ObjectInspector created from the Deserializer object has the schema. final StructTypeInfo sTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(finalOI); final List<String> tableColumnNames = sTypeInfo.getAllStructFieldNames(); @@ -201,7 +202,20 @@ public abstract class HiveAbstractReader extends AbstractRecordReader { } } } - ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames); + ColumnProjectionUtils.appendReadColumns(job, columnIds); + + // TODO: Use below overloaded method instead of above simpler version of it, once Hive client dependencies + // (from all profiles) will be updated to 2.3 version or above +// ColumnProjectionUtils.appendReadColumns(job, columnIds, selectedColumnNames, +// Lists.newArrayList(Iterables.transform(getColumns(), new Function<SchemaPath, String>() +// { +// @Nullable +// @Override +// public String apply(@Nullable SchemaPath path) +// { +// return path.getRootSegmentPath(); +// } +// }))); for (String columnName : selectedColumnNames) { StructField fieldRef = finalOI.getStructFieldRef(columnName); @@ -269,18 +283,19 @@ public abstract class HiveAbstractReader extends AbstractRecordReader { } /** - * Utility method which creates a SerDe object for given SerDe class name and properties. + * Utility method which creates a Deserializer object for given Deserializer class name and properties. + * TODO: Replace Deserializer interface with AbstractSerDe, once all Hive clients is upgraded to 2.3 version */ - private static SerDe createSerDe(final JobConf job, final String sLib, final Properties properties) throws Exception { - final Class<? extends SerDe> c = Class.forName(sLib).asSubclass(SerDe.class); - final SerDe serde = c.getConstructor().newInstance(); - serde.initialize(job, properties); + private static Deserializer createDeserializer(final JobConf job, final String sLib, final Properties properties) throws Exception { + final Class<? extends Deserializer> c = Class.forName(sLib).asSubclass(Deserializer.class); + final Deserializer deserializer = c.getConstructor().newInstance(); + deserializer.initialize(job, properties); - return serde; + return deserializer; } - private static StructObjectInspector getStructOI(final SerDe serDe) throws Exception { - ObjectInspector oi = serDe.getObjectInspector(); + private static StructObjectInspector getStructOI(final Deserializer deserializer) throws Exception { + ObjectInspector oi = deserializer.getObjectInspector(); if (oi.getCategory() != ObjectInspector.Category.STRUCT) { throw new UnsupportedOperationException(String.format("%s category not supported", oi.getCategory())); } http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java index c2412ad..9f46e66 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java @@ -393,6 +393,16 @@ public class TestHiveStorage extends HiveTestBase { } @Test // DRILL-3938 + public void readFromAlteredPartitionedTableWithEmptyGroupType() throws Exception { + testBuilder() + .sqlQuery("SELECT newcol FROM hive.kv_parquet LIMIT 1") + .unOrdered() + .baselineColumns("newcol") + .baselineValues(new Object[]{null}) + .go(); + } + + @Test // DRILL-3938 public void nativeReaderIsDisabledForAlteredPartitionedTable() throws Exception { try { test(String.format("alter session set `%s` = true", ExecConstants.HIVE_OPTIMIZE_SCAN_WITH_NATIVE_READERS)); http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/BaseTestHiveImpersonation.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/BaseTestHiveImpersonation.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/BaseTestHiveImpersonation.java index 3862dc6..a289af9 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/BaseTestHiveImpersonation.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/BaseTestHiveImpersonation.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -72,6 +72,9 @@ public class BaseTestHiveImpersonation extends BaseTestImpersonation { hiveConf.set(ConfVars.SCRATCHDIR.varname, "file://" + scratchDir.getAbsolutePath()); hiveConf.set(ConfVars.LOCALSCRATCHDIR.varname, localScratchDir.getAbsolutePath()); + hiveConf.set(ConfVars.METASTORE_SCHEMA_VERIFICATION.varname, "false"); + hiveConf.set(ConfVars.METASTORE_AUTO_CREATE_ALL.varname, "true"); + hiveConf.set(ConfVars.HIVE_CBO_ENABLED.varname, "false"); // Set MiniDFS conf in HiveConf hiveConf.set(FS_DEFAULT_NAME_KEY, dfsConf.get(FS_DEFAULT_NAME_KEY)); http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestSqlStdBasedAuthorization.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestSqlStdBasedAuthorization.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestSqlStdBasedAuthorization.java index e003865..ef6c547 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestSqlStdBasedAuthorization.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestSqlStdBasedAuthorization.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -41,9 +41,12 @@ import static org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_AUTHENTICATOR_MANAGER; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_CBO_ENABLED; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTOREURIS; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORE_AUTO_CREATE_ALL; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORE_SCHEMA_VERIFICATION; @Category({SlowTest.class, HiveStorageTest.class}) public class TestSqlStdBasedAuthorization extends BaseTestHiveImpersonation { @@ -101,6 +104,9 @@ public class TestSqlStdBasedAuthorization extends BaseTestHiveImpersonation { hiveConfig.put(HIVE_AUTHORIZATION_ENABLED.varname, hiveConf.get(HIVE_AUTHORIZATION_ENABLED.varname)); hiveConfig.put(HIVE_AUTHENTICATOR_MANAGER.varname, SessionStateUserAuthenticator.class.getName()); hiveConfig.put(HIVE_AUTHORIZATION_MANAGER.varname, SQLStdHiveAuthorizerFactory.class.getName()); + hiveConfig.put(METASTORE_SCHEMA_VERIFICATION.varname, hiveConf.get(METASTORE_SCHEMA_VERIFICATION.varname)); + hiveConfig.put(METASTORE_AUTO_CREATE_ALL.varname, hiveConf.get(METASTORE_AUTO_CREATE_ALL.varname)); + hiveConfig.put(HIVE_CBO_ENABLED.varname, hiveConf.get(HIVE_CBO_ENABLED.varname)); return hiveConfig; } http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestStorageBasedHiveAuthorization.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestStorageBasedHiveAuthorization.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestStorageBasedHiveAuthorization.java index 685d3bf..972c545 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestStorageBasedHiveAuthorization.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/impersonation/hive/TestStorageBasedHiveAuthorization.java @@ -41,13 +41,16 @@ import java.util.Map; import static org.apache.drill.exec.hive.HiveTestUtilities.executeQuery; import static org.apache.hadoop.fs.FileSystem.FS_DEFAULT_NAME_KEY; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_CBO_ENABLED; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_METASTORE_AUTHENTICATOR_MANAGER; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_METASTORE_AUTHORIZATION_AUTH_READS; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_METASTORE_AUTHORIZATION_MANAGER; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTOREURIS; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORE_AUTO_CREATE_ALL; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORE_PRE_EVENT_LISTENERS; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORE_SCHEMA_VERIFICATION; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.DYNAMICPARTITIONINGMODE; @Category({SlowTest.class, HiveStorageTest.class}) @@ -136,6 +139,9 @@ public class TestStorageBasedHiveAuthorization extends BaseTestHiveImpersonation hiveConfig.put(FS_DEFAULT_NAME_KEY, dfsConf.get(FS_DEFAULT_NAME_KEY)); hiveConfig.put(HIVE_SERVER2_ENABLE_DOAS.varname, hiveConf.get(HIVE_SERVER2_ENABLE_DOAS.varname)); hiveConfig.put(METASTORE_EXECUTE_SET_UGI.varname, hiveConf.get(METASTORE_EXECUTE_SET_UGI.varname)); + hiveConfig.put(METASTORE_SCHEMA_VERIFICATION.varname, hiveConf.get(METASTORE_SCHEMA_VERIFICATION.varname)); + hiveConfig.put(METASTORE_AUTO_CREATE_ALL.varname, hiveConf.get(METASTORE_AUTO_CREATE_ALL.varname)); + hiveConfig.put(HIVE_CBO_ENABLED.varname, hiveConf.get(HIVE_CBO_ENABLED.varname)); return hiveConfig; } http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java index 924d7cb..93786d0 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java @@ -149,6 +149,9 @@ public class HiveTestDataGenerator { conf.set(ConfVars.SCRATCHDIR.varname, scratchDir.getAbsolutePath()); conf.set(ConfVars.LOCALSCRATCHDIR.varname, localScratchDir.getAbsolutePath()); conf.set(ConfVars.DYNAMICPARTITIONINGMODE.varname, "nonstrict"); + conf.set(ConfVars.METASTORE_AUTO_CREATE_ALL.varname, "true"); + conf.set(ConfVars.METASTORE_SCHEMA_VERIFICATION.varname, "false"); + conf.set(ConfVars.HIVE_CBO_ENABLED.varname, "false"); SessionState ss = new SessionState(conf); SessionState.start(ss); http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/contrib/storage-hive/hive-exec-shade/pom.xml ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/hive-exec-shade/pom.xml b/contrib/storage-hive/hive-exec-shade/pom.xml index de9e055..b1c1ab4 100644 --- a/contrib/storage-hive/hive-exec-shade/pom.xml +++ b/contrib/storage-hive/hive-exec-shade/pom.xml @@ -34,89 +34,92 @@ <artifactId>hive-exec</artifactId> <scope>compile</scope> <exclusions> + <!--Hive Calcite libraries are not required. When user submits query in Drill via Hive plugin, the query + is validated and planned via Drill Calcite. Hive Calcite can be used only to setup Hive store for Drill unit + testing, where a lot of Hive specific queries are performed. But Drill Calcite and Avatica versions have + conflicts with Hive old Calcite and Avatica versions. That's why Calcite cost based optimizator + (ConfVars.HIVE_CBO_ENABLED) is disabled for Drill Hive JUnit test cases. It can be enabled again once Hive + will leverage the newest Calcite version. To do that check whether Drill Calcite and Avatica versions are + suitable for hive-exec. If no, use Hive Calcite and Avatica versions. + Note: Versions of Calcite libraries are controlled by "DependencyManagement" block in Drill's + root POM file now--> <exclusion> - <artifactId>log4j</artifactId> - <groupId>log4j</groupId> + <groupId>org.apache.calcite</groupId> + <artifactId>calcite-core</artifactId> </exclusion> <exclusion> - <groupId>commons-codec</groupId> - <artifactId>commons-codec</artifactId> + <groupId>org.apache.calcite</groupId> + <artifactId>calcite-avatica</artifactId> </exclusion> <exclusion> - <artifactId>calcite-avatica</artifactId> <groupId>org.apache.calcite</groupId> + <artifactId>calcite-linq4j</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.calcite</groupId> + <artifactId>calcite-druid</artifactId> </exclusion> </exclusions> </dependency> + <!--Once newer hive-exec version leverages parquet-column 1.9.0, this dependency can be deleted --> + <dependency> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet-column</artifactId> + </dependency> </dependencies> <build> <plugins> <plugin> - <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> - <version>2.1</version> - <executions> - <execution> - <phase>package</phase> - <goals> - <goal>shade</goal> - </goals> - <configuration> - <artifactSet> - <includes> - <include>org.apache.hive:hive-exec</include> - <include>com.twitter:parquet-column</include> - <include>com.twitter:parquet-hadoop</include> - <include>commons-codec:commons-codec</include> - <include>com.twitter:parquet-format</include> - <include>com.twitter:parquet-common</include> - <include>com.twitter:parquet-jackson</include> - <include>com.twitter:parquet-encoding</include> - <include>com.twitter:parquet-generator</include> - <include>org.apache.calcite:calcite-core</include> - <include>org.apache.calcite.avatica:avatica-core</include> - </includes> - </artifactSet> - <createDependencyReducedPom>false</createDependencyReducedPom> - <promoteTransitiveDependencies>true</promoteTransitiveDependencies> - <relocations> - <relocation> - <pattern>com.google.</pattern> - <shadedPattern>hive.com.google.</shadedPattern> - </relocation> - <relocation> - <pattern>parquet.</pattern> - <shadedPattern>hive.parquet.</shadedPattern> - </relocation> - <relocation> - <pattern>org.apache.commons.codec.</pattern> - <shadedPattern>hive.org.apache.commons.codec.</shadedPattern> - </relocation> - <relocation> - <pattern>net.hydromatic.</pattern> - <shadedPattern>hive.net.hydromatic.</shadedPattern> - </relocation> - <relocation> - <pattern>org.eigenbase.</pattern> - <shadedPattern>hive.org.eigenbase.</shadedPattern> - </relocation> - <relocation> - <pattern>org.apache.calcite.</pattern> - <shadedPattern>hive.org.apache.calcite.</shadedPattern> - </relocation> - </relocations> - <filters> - <filter> - <artifact>org.apache.hive:hive-exec</artifact> - <excludes> - <exclude>org/json/*</exclude> - </excludes> - </filter> - </filters> - </configuration> - </execution> - </executions> + <configuration> + <artifactSet> + <includes> + <include>org.apache.hive:hive-exec</include> + <include>org.apache.parquet:parquet-column</include> + <include>commons-codec:commons-codec</include> + <include>com.fasterxml.jackson.core:jackson-databind</include> + <include>com.fasterxml.jackson.core:jackson-annotations</include> + <include>com.fasterxml.jackson.core:jackson-core</include> + </includes> + </artifactSet> + <createDependencyReducedPom>false</createDependencyReducedPom> + <promoteTransitiveDependencies>true</promoteTransitiveDependencies> + <relocations> + <relocation> + <pattern>com.google.</pattern> + <shadedPattern>hive.com.google.</shadedPattern> + </relocation> + <relocation> + <pattern>org.apache.commons.codec.</pattern> + <shadedPattern>hive.org.apache.commons.codec.</shadedPattern> + </relocation> + <relocation> + <pattern>net.hydromatic.</pattern> + <shadedPattern>hive.net.hydromatic.</shadedPattern> + </relocation> + <relocation> + <pattern>org.eigenbase.</pattern> + <shadedPattern>hive.org.eigenbase.</shadedPattern> + </relocation> + <relocation> + <pattern>com.fasterxml.jackson.</pattern> + <shadedPattern>hive.com.fasterxml.jackson.</shadedPattern> + </relocation> + <relocation> + <pattern>org.apache.parquet.</pattern> + <shadedPattern>hive.org.apache.parquet.</shadedPattern> + </relocation> + </relocations> + <filters> + <filter> + <artifact>org.apache.hive:hive-exec</artifact> + <excludes> + <exclude>org/apache/parquet/schema/*</exclude> + </excludes> + </filter> + </filters> + </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> @@ -153,4 +156,23 @@ </plugin> </plugins> </build> + <profiles> + <profile> + <id>mapr</id> + <properties> + <!-- TODO: MapR Hive 2.1 client version libraries use older jackson libraries than Hive 2.3 client. + It can be removed after updating onto 2.3 version --> + <jackson.databind.mapr.hive.version>2.4.2</jackson.databind.mapr.hive.version> + </properties> + <dependencyManagement> + <dependencies> + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + <version>${jackson.databind.mapr.hive.version}</version> + </dependency> + </dependencies> + </dependencyManagement> + </profile> + </profiles> </project> http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/exec/java-exec/pom.xml ---------------------------------------------------------------------- diff --git a/exec/java-exec/pom.xml b/exec/java-exec/pom.xml index 8e64224..9c12185 100644 --- a/exec/java-exec/pom.xml +++ b/exec/java-exec/pom.xml @@ -236,21 +236,6 @@ <dependency> <groupId>org.apache.parquet</groupId> <artifactId>parquet-column</artifactId> - <version>${parquet.version}</version> - <exclusions> - <exclusion> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - </exclusion> - <exclusion> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-common</artifactId> - </exclusion> - <exclusion> - <groupId>commons-codec</groupId> - <artifactId>commons-codec</artifactId> - </exclusion> - </exclusions> </dependency> <dependency> <groupId>org.apache.parquet</groupId> http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/exec/jdbc-all/pom.xml ---------------------------------------------------------------------- diff --git a/exec/jdbc-all/pom.xml b/exec/jdbc-all/pom.xml index 80d3716..5b3486c 100644 --- a/exec/jdbc-all/pom.xml +++ b/exec/jdbc-all/pom.xml @@ -269,17 +269,7 @@ </plugin> <plugin> - <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> - <version>2.4.1</version> - <executions> - <execution> - <phase>package</phase> - <goals> - <goal>shade</goal> - </goals> - </execution> - </executions> <configuration> <shadedArtifactAttached>false</shadedArtifactAttached> <createDependencyReducedPom>true</createDependencyReducedPom> @@ -583,17 +573,7 @@ </plugin> <plugin> - <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> - <version>2.4.1</version> - <executions> - <execution> - <phase>package</phase> - <goals> - <goal>shade</goal> - </goals> - </execution> - </executions> <configuration> <shadedArtifactAttached>false</shadedArtifactAttached> <createDependencyReducedPom>true</createDependencyReducedPom> http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/exec/jdbc/pom.xml ---------------------------------------------------------------------- diff --git a/exec/jdbc/pom.xml b/exec/jdbc/pom.xml index d7087f4..e463c96 100644 --- a/exec/jdbc/pom.xml +++ b/exec/jdbc/pom.xml @@ -62,7 +62,6 @@ <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-core</artifactId> - <version>${jackson.version}</version> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> http://git-wip-us.apache.org/repos/asf/drill/blob/27aa2369/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 200b44a..2c1f5fc 100644 --- a/pom.xml +++ b/pom.xml @@ -52,10 +52,10 @@ <hamcrest.core.version>1.3</hamcrest.core.version> <!-- - Currently Hive storage plugin only supports Apache Hive 1.2 or vendor specific variants of the - Apache Hive 1.2. If the version is changed, make sure the jars and their dependencies are updated. + Currently Hive storage plugin only supports Apache Hive 2.3.2 or vendor specific variants of the + Apache Hive 2.3.2. If the version is changed, make sure the jars and their dependencies are updated. --> - <hive.version>1.2.1</hive.version> + <hive.version>2.3.2</hive.version> <hadoop.version>2.7.1</hadoop.version> <hbase.version>1.1.3</hbase.version> <fmpp.version>1.0</fmpp.version> @@ -587,6 +587,19 @@ </lifecycleMappingMetadata> </configuration> </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>3.1.0</version> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + </execution> + </executions> + </plugin> </plugins> </pluginManagement> </build> @@ -885,6 +898,22 @@ <groupId>io.netty</groupId> <artifactId>netty-all</artifactId> </exclusion> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>servlet-api</artifactId> + </exclusion> + <exclusion> + <groupId>org.mortbay.jetty</groupId> + <artifactId>servlet-api-2.5</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-client-core</artifactId> + </exclusion> + <exclusion> + <artifactId>log4j</artifactId> + <groupId>log4j</groupId> + </exclusion> </exclusions> </dependency> <dependency> @@ -893,6 +922,10 @@ <version>${hive.version}</version> <exclusions> <exclusion> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + </exclusion> + <exclusion> <groupId>org.apache.hive</groupId> <artifactId>hive-serde</artifactId> </exclusion> @@ -924,6 +957,10 @@ <version>${hive.version}</version> <exclusions> <exclusion> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + </exclusion> + <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> </exclusion> @@ -997,6 +1034,11 @@ </exclusions> </dependency> <dependency> + <groupId>io.dropwizard.metrics</groupId> + <artifactId>metrics-core</artifactId> + <version>4.0.2</version> + </dependency> + <dependency> <groupId>org.codehaus.janino</groupId> <artifactId>janino</artifactId> <version>${janino.version}</version> @@ -1012,6 +1054,11 @@ <version>${jackson.databind.version}</version> </dependency> <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-core</artifactId> + <version>${jackson.version}</version> + </dependency> + <dependency> <groupId>com.mapr.db</groupId> <artifactId>maprdb</artifactId> <version>${mapr.release.version}</version> @@ -1139,6 +1186,25 @@ <version>1.8</version> <scope>runtime</scope> </dependency> + <dependency> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet-column</artifactId> + <version>${parquet.version}</version> + <exclusions> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + </exclusion> + <exclusion> + <groupId>commons-codec</groupId> + <artifactId>commons-codec</artifactId> + </exclusion> + </exclusions> + </dependency> </dependencies> </dependencyManagement> @@ -1875,7 +1941,7 @@ <properties> <alt-hadoop>mapr</alt-hadoop> <rat.excludeSubprojects>true</rat.excludeSubprojects> - <hive.version>1.2.0-mapr-1707</hive.version> + <hive.version>2.1.1-mapr-1710</hive.version> <hbase.version>1.1.1-mapr-1602-m7-5.2.0</hbase.version> <hadoop.version>2.7.0-mapr-1707</hadoop.version> </properties>