Repository: hive Updated Branches: refs/heads/master bed17e54d -> 90d19acd2
HIVE-17683: Add explain locks <sql> command (Igor Kryvenko via Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/90d19acd Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/90d19acd Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/90d19acd Branch: refs/heads/master Commit: 90d19acd2b4f8301847ef13b4c8a91df3eafc65d Parents: bed17e5 Author: Igor Kryvenko <kryvenko7i...@gmail.com> Authored: Mon Jul 23 11:02:17 2018 -0700 Committer: Eugene Koifman <ekoif...@apache.org> Committed: Mon Jul 23 11:02:17 2018 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/exec/ExplainTask.java | 46 ++++ .../org/apache/hadoop/hive/ql/io/AcidUtils.java | 206 ++++++++++++++++++ .../hadoop/hive/ql/lockmgr/DbTxnManager.java | 218 +------------------ .../apache/hadoop/hive/ql/metadata/Table.java | 4 + .../hive/ql/parse/ExplainConfiguration.java | 8 + .../hive/ql/parse/ExplainSemanticAnalyzer.java | 2 + .../apache/hadoop/hive/ql/parse/HiveParser.g | 1 + .../apache/hadoop/hive/ql/plan/ExplainWork.java | 17 ++ .../test/queries/clientpositive/explain_locks.q | 22 ++ .../results/clientpositive/explain_locks.q.out | 91 ++++++++ .../hive/metastore/LockComponentBuilder.java | 5 + .../hive/metastore/LockRequestBuilder.java | 17 ++ 12 files changed, 429 insertions(+), 208 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/90d19acd/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 752c3f3..f185d9d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -45,9 +45,11 @@ import org.apache.hadoop.hive.common.jsonexplain.JsonParserFactory; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.LockComponent; import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; @@ -55,6 +57,7 @@ import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailL import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.ExplainLockDesc; import org.apache.hadoop.hive.ql.plan.ExplainWork; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -330,6 +333,44 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { return null; } + private JSONObject getLocks(PrintStream out, ExplainWork work) { + + JSONObject jsonObject = new JSONObject(new LinkedHashMap<>()); + + boolean jsonOutput = work.isFormatted(); + if (jsonOutput) { + out = null; + } + if (work.getParseContext() != null) { + List<LockComponent> lockComponents = AcidUtils.makeLockComponents(work.getOutputs(), work.getInputs(), conf); + if (null != out) { + out.print("LOCK INFORMATION:\n"); + } + List<ExplainLockDesc> locks = new ArrayList<>(lockComponents.size()); + + for (LockComponent component : lockComponents) { + ExplainLockDesc lockDesc = new ExplainLockDesc(component); + + if (null != out) { + out.print(lockDesc.getFullName()); + out.print(" -> "); + out.print(lockDesc.getLockType()); + out.print('\n'); + } else { + locks.add(lockDesc); + } + + } + + if (jsonOutput) { + jsonObject.put("LOCK INFORMATION:", locks); + } + } else { + System.err.println("No parse context!"); + } + return jsonObject; + } + @Override public int execute(DriverContext driverContext) { @@ -352,6 +393,11 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { } else if (work.getDependency()) { JSONObject jsonDependencies = getJSONDependencies(work); out.print(jsonDependencies); + } else if (work.isLocks()) { + JSONObject jsonLocks = getLocks(out, work); + if(work.isFormatted()) { + out.print(jsonLocks); + } } else { if (work.isUserLevelExplain()) { // Because of the implementation of the JsonParserFactory, we are sure http://git-wip-us.apache.org/repos/asf/hive/blob/90d19acd/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 16ba82e..a9983b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -32,6 +32,7 @@ import java.util.Properties; import java.util.Set; import java.util.regex.Pattern; +import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -45,17 +46,25 @@ import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.LockComponentBuilder; import org.apache.hadoop.hive.metastore.TransactionalValidationListener; import org.apache.hadoop.hive.metastore.api.DataOperationType; +import org.apache.hadoop.hive.metastore.api.LockComponent; +import org.apache.hadoop.hive.metastore.api.LockType; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.AcidUtils.ParsedDelta; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater; import org.apache.hadoop.hive.ql.io.orc.Reader; import org.apache.hadoop.hive.ql.io.orc.Writer; +import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; @@ -1984,4 +1993,201 @@ public class AcidUtils { tblProps.put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "false"); tblProps.remove(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); } + + private static boolean needsLock(Entity entity) { + switch (entity.getType()) { + case TABLE: + return isLockableTable(entity.getTable()); + case PARTITION: + return isLockableTable(entity.getPartition().getTable()); + default: + return true; + } + } + + private static Table getTable(WriteEntity we) { + Table t = we.getTable(); + if (t == null) { + throw new IllegalStateException("No table info for " + we); + } + return t; + } + + private static boolean isLockableTable(Table t) { + if (t.isTemporary()) { + return false; + } + switch (t.getTableType()) { + case MANAGED_TABLE: + case MATERIALIZED_VIEW: + return true; + default: + return false; + } + } + + /** + * Create lock components from write/read entities. + * @param outputs write entities + * @param inputs read entities + * @param conf + * @return list with lock components + */ + public static List<LockComponent> makeLockComponents(Set<WriteEntity> outputs, Set<ReadEntity> inputs, + HiveConf conf) { + List<LockComponent> lockComponents = new ArrayList<>(); + // For each source to read, get a shared lock + for (ReadEntity input : inputs) { + if (!input.needsLock() || input.isUpdateOrDelete() || !AcidUtils.needsLock(input)) { + // We don't want to acquire read locks during update or delete as we'll be acquiring write + // locks instead. Also, there's no need to lock temp tables since they're session wide + continue; + } + LockComponentBuilder compBuilder = new LockComponentBuilder(); + compBuilder.setShared(); + compBuilder.setOperationType(DataOperationType.SELECT); + + Table t = null; + switch (input.getType()) { + case DATABASE: + compBuilder.setDbName(input.getDatabase().getName()); + break; + + case TABLE: + t = input.getTable(); + compBuilder.setDbName(t.getDbName()); + compBuilder.setTableName(t.getTableName()); + break; + + case PARTITION: + case DUMMYPARTITION: + compBuilder.setPartitionName(input.getPartition().getName()); + t = input.getPartition().getTable(); + compBuilder.setDbName(t.getDbName()); + compBuilder.setTableName(t.getTableName()); + break; + + default: + // This is a file or something we don't hold locks for. + continue; + } + if (t != null) { + compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t)); + } + LockComponent comp = compBuilder.build(); + LOG.debug("Adding lock component to lock request " + comp.toString()); + lockComponents.add(comp); + } + // For each source to write to, get the appropriate lock type. If it's + // an OVERWRITE, we need to get an exclusive lock. If it's an insert (no + // overwrite) than we need a shared. If it's update or delete then we + // need a SEMI-SHARED. + for (WriteEntity output : outputs) { + LOG.debug("output is null " + (output == null)); + if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR || !AcidUtils + .needsLock(output)) { + // We don't lock files or directories. We also skip locking temp tables. + continue; + } + LockComponentBuilder compBuilder = new LockComponentBuilder(); + Table t = null; + switch (output.getType()) { + case DATABASE: + compBuilder.setDbName(output.getDatabase().getName()); + break; + + case TABLE: + case DUMMYPARTITION: // in case of dynamic partitioning lock the table + t = output.getTable(); + compBuilder.setDbName(t.getDbName()); + compBuilder.setTableName(t.getTableName()); + break; + + case PARTITION: + compBuilder.setPartitionName(output.getPartition().getName()); + t = output.getPartition().getTable(); + compBuilder.setDbName(t.getDbName()); + compBuilder.setTableName(t.getTableName()); + break; + + default: + // This is a file or something we don't hold locks for. + continue; + } + switch (output.getWriteType()) { + /* base this on HiveOperation instead? this and DDL_NO_LOCK is peppered all over the code... + Seems much cleaner if each stmt is identified as a particular HiveOperation (which I'd think + makes sense everywhere). This however would be problematic for merge...*/ + case DDL_EXCLUSIVE: + compBuilder.setExclusive(); + compBuilder.setOperationType(DataOperationType.NO_TXN); + break; + case INSERT_OVERWRITE: + t = AcidUtils.getTable(output); + if (AcidUtils.isTransactionalTable(t)) { + if (conf.getBoolVar(HiveConf.ConfVars.TXN_OVERWRITE_X_LOCK)) { + compBuilder.setExclusive(); + } else { + compBuilder.setSemiShared(); + } + compBuilder.setOperationType(DataOperationType.UPDATE); + } else { + compBuilder.setExclusive(); + compBuilder.setOperationType(DataOperationType.NO_TXN); + } + break; + case INSERT: + assert t != null; + if (AcidUtils.isTransactionalTable(t)) { + compBuilder.setShared(); + } else if (MetaStoreUtils.isNonNativeTable(t.getTTable())) { + final HiveStorageHandler storageHandler = Preconditions.checkNotNull(t.getStorageHandler(), + "Thought all the non native tables have an instance of storage handler"); + LockType lockType = storageHandler.getLockType(output); + if (null == LockType.findByValue(lockType.getValue())) { + throw new IllegalArgumentException(String + .format("Lock type [%s] for Database.Table [%s.%s] is unknown", lockType, t.getDbName(), + t.getTableName())); + } + compBuilder.setLock(lockType); + } else { + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_TXN_STRICT_LOCKING_MODE)) { + compBuilder.setExclusive(); + } else { // this is backward compatible for non-ACID resources, w/o ACID semantics + compBuilder.setShared(); + } + } + compBuilder.setOperationType(DataOperationType.INSERT); + break; + case DDL_SHARED: + compBuilder.setShared(); + compBuilder.setOperationType(DataOperationType.NO_TXN); + break; + + case UPDATE: + compBuilder.setSemiShared(); + compBuilder.setOperationType(DataOperationType.UPDATE); + break; + case DELETE: + compBuilder.setSemiShared(); + compBuilder.setOperationType(DataOperationType.DELETE); + break; + + case DDL_NO_LOCK: + continue; // No lock required here + + default: + throw new RuntimeException("Unknown write type " + output.getWriteType().toString()); + } + if (t != null) { + compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t)); + } + + compBuilder.setIsDynamicPartitionWrite(output.isDynamicPartitionWrite()); + LockComponent comp = compBuilder.build(); + LOG.debug("Adding lock component to lock request " + comp.toString()); + lockComponents.add(comp); + } + return lockComponents; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/90d19acd/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java index 78980fa..06067a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java @@ -18,20 +18,16 @@ package org.apache.hadoop.hive.ql.lockmgr; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.IMetaStoreClient; -import org.apache.hadoop.hive.metastore.LockComponentBuilder; import org.apache.hadoop.hive.metastore.LockRequestBuilder; -import org.apache.hadoop.hive.metastore.api.DataOperationType; import org.apache.hadoop.hive.metastore.api.LockComponent; import org.apache.hadoop.hive.metastore.api.LockResponse; import org.apache.hadoop.hive.metastore.api.LockState; -import org.apache.hadoop.hive.metastore.api.LockType; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchLockException; import org.apache.hadoop.hive.metastore.api.NoSuchTxnException; @@ -39,18 +35,13 @@ import org.apache.hadoop.hive.metastore.api.TxnAbortedException; import org.apache.hadoop.hive.metastore.api.TxnToWriteId; import org.apache.hadoop.hive.metastore.api.CommitTxnRequest; import org.apache.hadoop.hive.metastore.txn.TxnUtils; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryPlan; -import org.apache.hadoop.hive.ql.hooks.Entity; -import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.LockDatabaseDesc; import org.apache.hadoop.hive.ql.plan.LockTableDesc; @@ -381,28 +372,7 @@ public final class DbTxnManager extends HiveTxnManagerImpl { //todo: handle Insert Overwrite as well: HIVE-18154 return false; } - private boolean needsLock(Entity entity) { - switch (entity.getType()) { - case TABLE: - return isLockableTable(entity.getTable()); - case PARTITION: - return isLockableTable(entity.getPartition().getTable()); - default: - return true; - } - } - private boolean isLockableTable(Table t) { - if(t.isTemporary()) { - return false; - } - switch (t.getTableType()) { - case MANAGED_TABLE: - case MATERIALIZED_VIEW: - return true; - default: - return false; - } - } + /** * Normally client should call {@link #acquireLocks(org.apache.hadoop.hive.ql.QueryPlan, org.apache.hadoop.hive.ql.Context, String)} * @param isBlocking if false, the method will return immediately; thus the locks may be in LockState.WAITING @@ -414,7 +384,6 @@ public final class DbTxnManager extends HiveTxnManagerImpl { // Make sure we've built the lock manager getLockManager(); verifyState(plan); - boolean atLeastOneLock = false; queryId = plan.getQueryId(); switch (plan.getOperation()) { case SET_AUTOCOMMIT: @@ -429,193 +398,26 @@ public final class DbTxnManager extends HiveTxnManagerImpl { rqstBuilder.setTransactionId(txnId) .setUser(username); - // For each source to read, get a shared lock - for (ReadEntity input : plan.getInputs()) { - if (!input.needsLock() || input.isUpdateOrDelete() || !needsLock(input)) { - // We don't want to acquire read locks during update or delete as we'll be acquiring write - // locks instead. Also, there's no need to lock temp tables since they're session wide - continue; - } - LockComponentBuilder compBuilder = new LockComponentBuilder(); - compBuilder.setShared(); - compBuilder.setOperationType(DataOperationType.SELECT); - - Table t = null; - switch (input.getType()) { - case DATABASE: - compBuilder.setDbName(input.getDatabase().getName()); - break; - - case TABLE: - t = input.getTable(); - compBuilder.setDbName(t.getDbName()); - compBuilder.setTableName(t.getTableName()); - break; - - case PARTITION: - case DUMMYPARTITION: - compBuilder.setPartitionName(input.getPartition().getName()); - t = input.getPartition().getTable(); - compBuilder.setDbName(t.getDbName()); - compBuilder.setTableName(t.getTableName()); - break; - - default: - // This is a file or something we don't hold locks for. - continue; - } - if(t != null) { - compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t)); - } - LockComponent comp = compBuilder.build(); - LOG.debug("Adding lock component to lock request " + comp.toString()); - rqstBuilder.addLockComponent(comp); - atLeastOneLock = true; - } - - // For each source to write to, get the appropriate lock type. If it's - // an OVERWRITE, we need to get an exclusive lock. If it's an insert (no - // overwrite) than we need a shared. If it's update or delete then we - // need a SEMI-SHARED. - for (WriteEntity output : plan.getOutputs()) { - LOG.debug("output is null " + (output == null)); - if (output.getType() == Entity.Type.DFS_DIR || output.getType() == Entity.Type.LOCAL_DIR || - !needsLock(output)) { - // We don't lock files or directories. We also skip locking temp tables. - continue; - } - LockComponentBuilder compBuilder = new LockComponentBuilder(); - Table t = null; - switch (output.getType()) { - case DATABASE: - compBuilder.setDbName(output.getDatabase().getName()); - break; - - case TABLE: - case DUMMYPARTITION: // in case of dynamic partitioning lock the table - t = output.getTable(); - compBuilder.setDbName(t.getDbName()); - compBuilder.setTableName(t.getTableName()); - break; - - case PARTITION: - compBuilder.setPartitionName(output.getPartition().getName()); - t = output.getPartition().getTable(); - compBuilder.setDbName(t.getDbName()); - compBuilder.setTableName(t.getTableName()); - break; - - default: - // This is a file or something we don't hold locks for. - continue; - } - switch (output.getWriteType()) { - /* base this on HiveOperation instead? this and DDL_NO_LOCK is peppered all over the code... - Seems much cleaner if each stmt is identified as a particular HiveOperation (which I'd think - makes sense everywhere). This however would be problematic for merge...*/ - case DDL_EXCLUSIVE: - compBuilder.setExclusive(); - compBuilder.setOperationType(DataOperationType.NO_TXN); - break; - case INSERT_OVERWRITE: - t = getTable(output); - if (AcidUtils.isTransactionalTable(t)) { - if (conf.getBoolVar(HiveConf.ConfVars.TXN_OVERWRITE_X_LOCK)) { - compBuilder.setExclusive(); - } else { - compBuilder.setSemiShared(); - } - compBuilder.setOperationType(DataOperationType.UPDATE); - } else { - compBuilder.setExclusive(); - compBuilder.setOperationType(DataOperationType.NO_TXN); - } - break; - case INSERT: - assert t != null; - if (AcidUtils.isTransactionalTable(t)) { - compBuilder.setShared(); - } else if (MetaStoreUtils.isNonNativeTable(t.getTTable())) { - final HiveStorageHandler storageHandler = Preconditions.checkNotNull(t.getStorageHandler(), - "Thought all the non native tables have an instance of storage handler" - ); - LockType lockType = storageHandler.getLockType(output); - switch (lockType) { - case EXCLUSIVE: - compBuilder.setExclusive(); - break; - case SHARED_READ: - compBuilder.setShared(); - break; - case SHARED_WRITE: - compBuilder.setSemiShared(); - break; - default: - throw new IllegalArgumentException(String - .format("Lock type [%s] for Database.Table [%s.%s] is unknown", lockType, t.getDbName(), - t.getTableName() - )); - } - - } else { - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_TXN_STRICT_LOCKING_MODE)) { - compBuilder.setExclusive(); - } else { // this is backward compatible for non-ACID resources, w/o ACID semantics - compBuilder.setShared(); - } - } - compBuilder.setOperationType(DataOperationType.INSERT); - break; - case DDL_SHARED: - compBuilder.setShared(); - compBuilder.setOperationType(DataOperationType.NO_TXN); - break; - - case UPDATE: - compBuilder.setSemiShared(); - compBuilder.setOperationType(DataOperationType.UPDATE); - break; - case DELETE: - compBuilder.setSemiShared(); - compBuilder.setOperationType(DataOperationType.DELETE); - break; - - case DDL_NO_LOCK: - continue; // No lock required here - - default: - throw new RuntimeException("Unknown write type " + output.getWriteType().toString()); - } - if (t != null) { - compBuilder.setIsTransactional(AcidUtils.isTransactionalTable(t)); - } - - compBuilder.setIsDynamicPartitionWrite(output.isDynamicPartitionWrite()); - LockComponent comp = compBuilder.build(); - LOG.debug("Adding lock component to lock request " + comp.toString()); - rqstBuilder.addLockComponent(comp); - atLeastOneLock = true; - } - //plan // Make sure we need locks. It's possible there's nothing to lock in // this operation. - if (!atLeastOneLock) { + if(plan.getInputs().isEmpty() && plan.getOutputs().isEmpty()) { + LOG.debug("No locks needed for queryId" + queryId); + return null; + } + List<LockComponent> lockComponents = AcidUtils.makeLockComponents(plan.getOutputs(), plan.getInputs(), conf); + //It's possible there's nothing to lock even if we have w/r entities. + if(lockComponents.isEmpty()) { LOG.debug("No locks needed for queryId" + queryId); return null; } + rqstBuilder.addLockComponents(lockComponents); List<HiveLock> locks = new ArrayList<HiveLock>(1); LockState lockState = lockMgr.lock(rqstBuilder.build(), queryId, isBlocking, locks); ctx.setHiveLocks(locks); return lockState; } - private static Table getTable(WriteEntity we) { - Table t = we.getTable(); - if(t == null) { - throw new IllegalStateException("No table info for " + we); - } - return t; - } + /** * @param delay time to delay for first heartbeat */ http://git-wip-us.apache.org/repos/asf/hive/blob/90d19acd/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 14e60f0..03b0269 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -1087,4 +1087,8 @@ public class Table implements Serializable { public boolean hasDeserializer() { return deserializer != null; } + + public String getCatalogName() { + return this.tTable.getCatName(); + } }; http://git-wip-us.apache.org/repos/asf/hive/blob/90d19acd/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java index 105ef08..5ca6b59 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java @@ -47,6 +47,7 @@ public class ExplainConfiguration { private boolean vectorization = false; private boolean vectorizationOnly = false; private VectorizationDetailLevel vectorizationDetailLevel = VectorizationDetailLevel.SUMMARY; + private boolean locks = false; private Path explainRootPath; private Map<String, Long> opIdToRuntimeNumRows; @@ -153,4 +154,11 @@ public class ExplainConfiguration { this.opIdToRuntimeNumRows = opIdToRuntimeNumRows; } + public boolean isLocks() { + return locks; + } + + public void setLocks(boolean locks) { + this.locks = locks; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/90d19acd/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index feec0fd..3aefb61 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -112,6 +112,8 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer { i++; } } + } else if (explainOptions == HiveParser.KW_LOCKS) { + config.setLocks(true); } else { // UNDONE: UNKNOWN OPTION? } http://git-wip-us.apache.org/repos/asf/hive/blob/90d19acd/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 1f53321..6be48ca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -799,6 +799,7 @@ explainOption | KW_AUTHORIZATION | KW_ANALYZE | KW_REOPTIMIZATION + | KW_LOCKS | (KW_VECTORIZATION vectorizationOnly? vectorizatonDetail?) ; http://git-wip-us.apache.org/repos/asf/hive/blob/90d19acd/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index ce03003..3e62142 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -26,6 +26,7 @@ import java.util.List; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; @@ -42,6 +43,7 @@ public class ExplainWork implements Serializable { private ArrayList<Task<?>> rootTasks; private Task<?> fetchTask; private HashSet<ReadEntity> inputs; + private HashSet<WriteEntity> outputs; private ParseContext pCtx; private ExplainConfiguration config; @@ -72,6 +74,9 @@ public class ExplainWork implements Serializable { if (analyzer != null) { this.inputs = analyzer.getInputs(); } + if (analyzer != null) { + this.outputs = analyzer.getAllOutputs(); + } this.pCtx = pCtx; this.cboInfo = cboInfo; this.optimizedSQL = optimizedSQL; @@ -110,6 +115,14 @@ public class ExplainWork implements Serializable { this.inputs = inputs; } + public HashSet<WriteEntity> getOutputs() { + return outputs; + } + + public void setOutputs(HashSet<WriteEntity> outputs) { + this.outputs = outputs; + } + public boolean getExtended() { return config.isExtended(); } @@ -190,4 +203,8 @@ public class ExplainWork implements Serializable { this.config = config; } + public boolean isLocks() { + return config.isLocks(); + } + } http://git-wip-us.apache.org/repos/asf/hive/blob/90d19acd/ql/src/test/queries/clientpositive/explain_locks.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/explain_locks.q b/ql/src/test/queries/clientpositive/explain_locks.q new file mode 100644 index 0000000..a0e273f --- /dev/null +++ b/ql/src/test/queries/clientpositive/explain_locks.q @@ -0,0 +1,22 @@ +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +explain locks drop table test_explain_locks; +explain locks create table test_explain_locks (a int, b int); +drop table if exists target; +drop table if exists source; + +create table target (a int, b int) partitioned by (p int, q int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); +create table source (a1 int, b1 int, p1 int, q1 int) clustered by (a1) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true'); +insert into target partition(p,q) values (1,2,1,2), (3,4,1,2), (5,6,1,3), (7,8,2,2); + +-- the intent here is to record the set of ReadEntity and WriteEntity objects for these 2 update statements +explain locks update target set b = 1 where p in (select t.q1 from source t where t.a1=5); + +explain locks update source set b1 = 1 where p1 in (select t.q from target t where t.p=2); + +explain formatted locks update source set b1 = 1 where p1 in (select t.q from target t where t.p=2); + +-- the extra predicates in when matched clause match 1 partition +explain locks merge into target t using source s on t.a = s.a1 when matched and p = 1 and q = 2 then update set b = 1 when matched and p = 2 and q = 2 then delete when not matched and a1 > 100 then insert values(s.a1,s.b1,s.p1, s.q1); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/90d19acd/ql/src/test/results/clientpositive/explain_locks.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/explain_locks.q.out b/ql/src/test/results/clientpositive/explain_locks.q.out new file mode 100644 index 0000000..72aa410 --- /dev/null +++ b/ql/src/test/results/clientpositive/explain_locks.q.out @@ -0,0 +1,91 @@ +PREHOOK: query: explain locks drop table test_explain_locks +PREHOOK: type: DROPTABLE +POSTHOOK: query: explain locks drop table test_explain_locks +POSTHOOK: type: DROPTABLE +PREHOOK: query: explain locks create table test_explain_locks (a int, b int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain locks create table test_explain_locks (a int, b int) +POSTHOOK: type: CREATETABLE +LOCK INFORMATION: +default -> SHARED_READ +PREHOOK: query: drop table if exists target +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists target +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists source +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists source +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table target (a int, b int) partitioned by (p int, q int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@target +POSTHOOK: query: create table target (a int, b int) partitioned by (p int, q int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@target +PREHOOK: query: create table source (a1 int, b1 int, p1 int, q1 int) clustered by (a1) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@source +POSTHOOK: query: create table source (a1 int, b1 int, p1 int, q1 int) clustered by (a1) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@source +PREHOOK: query: insert into target partition(p,q) values (1,2,1,2), (3,4,1,2), (5,6,1,3), (7,8,2,2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@target +POSTHOOK: query: insert into target partition(p,q) values (1,2,1,2), (3,4,1,2), (5,6,1,3), (7,8,2,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@target@p=1/q=2 +POSTHOOK: Output: default@target@p=1/q=3 +POSTHOOK: Output: default@target@p=2/q=2 +POSTHOOK: Lineage: target PARTITION(p=1,q=2).a SCRIPT [] +POSTHOOK: Lineage: target PARTITION(p=1,q=2).b SCRIPT [] +POSTHOOK: Lineage: target PARTITION(p=1,q=3).a SCRIPT [] +POSTHOOK: Lineage: target PARTITION(p=1,q=3).b SCRIPT [] +POSTHOOK: Lineage: target PARTITION(p=2,q=2).a SCRIPT [] +POSTHOOK: Lineage: target PARTITION(p=2,q=2).b SCRIPT [] +PREHOOK: query: explain locks update target set b = 1 where p in (select t.q1 from source t where t.a1=5) +PREHOOK: type: QUERY +POSTHOOK: query: explain locks update target set b = 1 where p in (select t.q1 from source t where t.a1=5) +POSTHOOK: type: QUERY +LOCK INFORMATION: +default.source -> SHARED_READ +default.target.p=1/q=2 -> SHARED_READ +default.target.p=1/q=3 -> SHARED_READ +default.target.p=2/q=2 -> SHARED_READ +default.target.p=2/q=2 -> SHARED_WRITE +default.target.p=1/q=3 -> SHARED_WRITE +default.target.p=1/q=2 -> SHARED_WRITE +PREHOOK: query: explain locks update source set b1 = 1 where p1 in (select t.q from target t where t.p=2) +PREHOOK: type: QUERY +POSTHOOK: query: explain locks update source set b1 = 1 where p1 in (select t.q from target t where t.p=2) +POSTHOOK: type: QUERY +LOCK INFORMATION: +default.target -> SHARED_READ +default.target.p=2/q=2 -> SHARED_READ +default.source -> SHARED_WRITE +PREHOOK: query: explain formatted locks update source set b1 = 1 where p1 in (select t.q from target t where t.p=2) +PREHOOK: type: QUERY +POSTHOOK: query: explain formatted locks update source set b1 = 1 where p1 in (select t.q from target t where t.p=2) +POSTHOOK: type: QUERY +{"LOCK INFORMATION:":"[default.target -> SHARED_READ, default.target.p=2/q=2 -> SHARED_READ, default.source -> SHARED_WRITE]"} +PREHOOK: query: explain locks merge into target t using source s on t.a = s.a1 when matched and p = 1 and q = 2 then update set b = 1 when matched and p = 2 and q = 2 then delete when not matched and a1 > 100 then insert values(s.a1,s.b1,s.p1, s.q1) +PREHOOK: type: QUERY +POSTHOOK: query: explain locks merge into target t using source s on t.a = s.a1 when matched and p = 1 and q = 2 then update set b = 1 when matched and p = 2 and q = 2 then delete when not matched and a1 > 100 then insert values(s.a1,s.b1,s.p1, s.q1) +POSTHOOK: type: QUERY +LOCK INFORMATION: +default.source -> SHARED_READ +default.target.p=1/q=2 -> SHARED_READ +default.target.p=1/q=3 -> SHARED_READ +default.target.p=2/q=2 -> SHARED_READ +default.target.p=2/q=2 -> SHARED_WRITE +default.target.p=2/q=2 -> SHARED_WRITE +default.target.p=1/q=3 -> SHARED_WRITE +default.target.p=1/q=3 -> SHARED_WRITE +default.target.p=1/q=2 -> SHARED_WRITE +default.target.p=1/q=2 -> SHARED_WRITE +default.target -> SHARED_READ http://git-wip-us.apache.org/repos/asf/hive/blob/90d19acd/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/LockComponentBuilder.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/LockComponentBuilder.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/LockComponentBuilder.java index 1ad0638..c739d4d 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/LockComponentBuilder.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/LockComponentBuilder.java @@ -118,4 +118,9 @@ public class LockComponentBuilder { component.setLevel(level); return component; } + + public LockComponent setLock(LockType type) { + component.setType(type); + return component; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/90d19acd/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/LockRequestBuilder.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/LockRequestBuilder.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/LockRequestBuilder.java index d03c73a..22902a9 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/LockRequestBuilder.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/LockRequestBuilder.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.metastore.api.LockType; import java.net.InetAddress; import java.net.UnknownHostException; +import java.util.Collection; import java.util.LinkedHashMap; import java.util.Map; @@ -94,6 +95,16 @@ public class LockRequestBuilder { return this; } + /** + * Add a collection with lock components to the lock request + * @param components to add + * @return reference to this builder + */ + public LockRequestBuilder addLockComponents(Collection<LockComponent> components) { + trie.addAll(components); + return this; + } + // For reasons that are completely incomprehensible to me the semantic // analyzers often ask for multiple locks on the same entity (for example // a shared_read and an exlcusive lock). The db locking system gets confused @@ -120,6 +131,12 @@ public class LockRequestBuilder { setTable(comp, tabs); } + public void addAll(Collection<LockComponent> components) { + for(LockComponent component: components) { + add(component); + } + } + public void addLocksToRequest(LockRequest request) { for (TableTrie tab : trie.values()) { for (PartTrie part : tab.values()) {