Repository: hive Updated Branches: refs/heads/branch-1 6675a7332 -> cae46469b
HIVE-11006 improve logging wrt ACID module (Eugene Koifman, reviewed by Alan Gates) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cae46469 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cae46469 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cae46469 Branch: refs/heads/branch-1 Commit: cae46469bec75389849f44835136cd22d527f269 Parents: 6675a73 Author: Eugene Koifman <ekoif...@hortonworks.com> Authored: Wed Jun 17 13:19:20 2015 -0700 Committer: Eugene Koifman <ekoif...@hortonworks.com> Committed: Wed Jun 17 13:19:20 2015 -0700 ---------------------------------------------------------------------- .../hive/metastore/txn/CompactionInfo.java | 9 +++++ .../metastore/txn/CompactionTxnHandler.java | 19 +++++------ .../hadoop/hive/metastore/txn/TxnHandler.java | 36 +++++++++++--------- .../hadoop/hive/ql/lockmgr/DbLockManager.java | 4 +-- .../hadoop/hive/ql/lockmgr/DbTxnManager.java | 2 +- 5 files changed, 41 insertions(+), 29 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/cae46469/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java index 939df3f..1dae7b9 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionInfo.java @@ -73,4 +73,13 @@ public class CompactionInfo implements Comparable<CompactionInfo> { public int compareTo(CompactionInfo o) { return getFullPartitionName().compareTo(o.getFullPartitionName()); } + public String toString() { + return "id:" + id + "," + + "dbname:" + dbname + "," + + "tableName:" + tableName + "," + + "partName:" + partName + "," + + "type:" + type + "," + + "runAs:" + runAs + "," + + "tooManyAborts:" + tooManyAborts; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/cae46469/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java index 26e72be..328a65c 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/CompactionTxnHandler.java @@ -95,7 +95,7 @@ public class CompactionTxnHandler extends TxnHandler { dbConn.rollback(); } catch (SQLException e) { LOG.error("Unable to connect to transaction database " + e.getMessage()); - checkRetryable(dbConn, e, "findPotentialCompactions"); + checkRetryable(dbConn, e, "findPotentialCompactions(maxAborted:" + maxAborted + ")"); } finally { closeDbConn(dbConn); closeStmt(stmt); @@ -133,7 +133,7 @@ public class CompactionTxnHandler extends TxnHandler { LOG.error("Unable to update compaction queue, " + e.getMessage()); LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "setRunAs"); + checkRetryable(dbConn, e, "setRunAs(cq_id:" + cq_id + ",user:" + user +")"); } finally { closeDbConn(dbConn); closeStmt(stmt); @@ -194,7 +194,7 @@ public class CompactionTxnHandler extends TxnHandler { LOG.error("Unable to select next element for compaction, " + e.getMessage()); LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "findNextToCompact"); + checkRetryable(dbConn, e, "findNextToCompact(workerId:" + workerId + ")"); throw new MetaException("Unable to connect to transaction database " + StringUtils.stringifyException(e)); } finally { @@ -232,7 +232,7 @@ public class CompactionTxnHandler extends TxnHandler { LOG.error("Unable to update compaction queue " + e.getMessage()); LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "markCompacted"); + checkRetryable(dbConn, e, "markCompacted(" + info + ")"); throw new MetaException("Unable to connect to transaction database " + StringUtils.stringifyException(e)); } finally { @@ -374,7 +374,7 @@ public class CompactionTxnHandler extends TxnHandler { LOG.error("Unable to delete from compaction queue " + e.getMessage()); LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "markCleaned"); + checkRetryable(dbConn, e, "markCleaned(" + info + ")"); throw new MetaException("Unable to connect to transaction database " + StringUtils.stringifyException(e)); } finally { @@ -465,7 +465,7 @@ public class CompactionTxnHandler extends TxnHandler { e.getMessage()); LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "revokeFromLocalWorkers"); + checkRetryable(dbConn, e, "revokeFromLocalWorkers(hostname:" + hostname +")"); throw new MetaException("Unable to connect to transaction database " + StringUtils.stringifyException(e)); } finally { @@ -508,7 +508,7 @@ public class CompactionTxnHandler extends TxnHandler { e.getMessage()); LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "revokeTimedoutWorkers"); + checkRetryable(dbConn, e, "revokeTimedoutWorkers(timeout:" + timeout + ")"); throw new MetaException("Unable to connect to transaction database " + StringUtils.stringifyException(e)); } finally { @@ -565,10 +565,9 @@ public class CompactionTxnHandler extends TxnHandler { dbConn.commit(); return columns; } catch (SQLException e) { - LOG.error("Failed to find columns to analyze stats on for " + ci.tableName + - (ci.partName == null ? "" : "/" + ci.partName), e); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "findColumnsWithStats"); + checkRetryable(dbConn, e, "findColumnsWithStats(" + ci.tableName + + (ci.partName == null ? "" : "/" + ci.partName) + ")"); throw new MetaException("Unable to connect to transaction database " + StringUtils.stringifyException(e)); } finally { http://git-wip-us.apache.org/repos/asf/hive/blob/cae46469/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java index 7c3b55c..fd9c275 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java @@ -318,7 +318,7 @@ public class TxnHandler { } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "openTxns"); + checkRetryable(dbConn, e, "openTxns(" + rqst + ")"); throw new MetaException("Unable to select from transaction database " + StringUtils.stringifyException(e)); } finally { @@ -347,7 +347,7 @@ public class TxnHandler { } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "abortTxn"); + checkRetryable(dbConn, e, "abortTxn(" + rqst + ")"); throw new MetaException("Unable to update transaction database " + StringUtils.stringifyException(e)); } finally { @@ -400,7 +400,7 @@ public class TxnHandler { } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "commitTxn"); + checkRetryable(dbConn, e, "commitTxn(" + rqst + ")"); throw new MetaException("Unable to update transaction database " + StringUtils.stringifyException(e)); } finally { @@ -423,7 +423,7 @@ public class TxnHandler { } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "lock"); + checkRetryable(dbConn, e, "lock(" + rqst + ")"); throw new MetaException("Unable to update transaction database " + StringUtils.stringifyException(e)); } finally { @@ -444,7 +444,7 @@ public class TxnHandler { } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "lockNoWait"); + checkRetryable(dbConn, e, "lockNoWait(" + rqst + ")"); throw new MetaException("Unable to update transaction database " + StringUtils.stringifyException(e)); } finally { @@ -475,7 +475,7 @@ public class TxnHandler { } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "checkLock"); + checkRetryable(dbConn, e, "checkLock(" + rqst + " )"); throw new MetaException("Unable to update transaction database " + StringUtils.stringifyException(e)); } finally { @@ -526,7 +526,7 @@ public class TxnHandler { } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "unlock"); + checkRetryable(dbConn, e, "unlock(" + rqst + ")"); throw new MetaException("Unable to update transaction database " + StringUtils.stringifyException(e)); } finally { @@ -593,7 +593,7 @@ public class TxnHandler { LOG.debug("Going to rollback"); dbConn.rollback(); } catch (SQLException e) { - checkRetryable(dbConn, e, "showLocks"); + checkRetryable(dbConn, e, "showLocks(" + rqst + ")"); throw new MetaException("Unable to select from transaction database " + StringUtils.stringifyException(e)); } finally { @@ -624,7 +624,7 @@ public class TxnHandler { } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "heartbeat"); + checkRetryable(dbConn, e, "heartbeat(" + ids + ")"); throw new MetaException("Unable to select from transaction database " + StringUtils.stringifyException(e)); } finally { @@ -661,7 +661,7 @@ public class TxnHandler { } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "heartbeatTxnRange"); + checkRetryable(dbConn, e, "heartbeatTxnRange(" + rqst + ")"); throw new MetaException("Unable to select from transaction database " + StringUtils.stringifyException(e)); } finally { @@ -742,7 +742,7 @@ public class TxnHandler { } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "compact"); + checkRetryable(dbConn, e, "compact(" + rqst + ")"); throw new MetaException("Unable to select from transaction database " + StringUtils.stringifyException(e)); } finally { @@ -792,7 +792,7 @@ public class TxnHandler { } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "showCompact"); + checkRetryable(dbConn, e, "showCompact(" + rqst + ")"); throw new MetaException("Unable to select from transaction database " + StringUtils.stringifyException(e)); } finally { @@ -835,7 +835,7 @@ public class TxnHandler { } catch (SQLException e) { LOG.debug("Going to rollback"); rollbackDBConn(dbConn); - checkRetryable(dbConn, e, "addDynamicPartitions"); + checkRetryable(dbConn, e, "addDynamicPartitions(" + rqst + ")"); throw new MetaException("Unable to insert into from transaction database " + StringUtils.stringifyException(e)); } finally { @@ -955,6 +955,7 @@ public class TxnHandler { * @param caller name of the method calling this * @throws org.apache.hadoop.hive.metastore.txn.TxnHandler.RetryException when deadlock * detected and retry count has not been exceeded. + * TODO: make "caller" more elaborate like include lockId for example */ protected void checkRetryable(Connection conn, SQLException e, @@ -977,10 +978,12 @@ public class TxnHandler { (dbProduct == DatabaseProduct.ORACLE && (e.getMessage().contains("deadlock detected") || e.getMessage().contains("can't serialize access for this transaction")))) { if (deadlockCnt++ < ALLOWED_REPEATED_DEADLOCKS) { - LOG.warn("Deadlock detected in " + caller + ", trying again."); + long waitInterval = deadlockRetryInterval * deadlockCnt; + LOG.warn("Deadlock detected in " + caller + ". Will wait " + waitInterval + + "ms try again up to " + (ALLOWED_REPEATED_DEADLOCKS - deadlockCnt + 1) + " times."); // Pause for a just a bit for retrying to avoid immediately jumping back into the deadlock. try { - Thread.sleep(deadlockRetryInterval * deadlockCnt); + Thread.sleep(waitInterval); } catch (InterruptedException ie) { // NOP } @@ -993,13 +996,14 @@ public class TxnHandler { else if(isRetryable(e)) { //in MSSQL this means Communication Link Failure if(retryNum++ < retryLimit) { + LOG.warn("Retryable error detected in " + caller + ". Will wait " + retryInterval + + "ms and retry up to " + (retryLimit - retryNum + 1) + " times. Error: " + getMessage(e)); try { Thread.sleep(retryInterval); } catch(InterruptedException ex) { // } - LOG.warn("Retryable error detected in " + caller + ", trying again: " + getMessage(e)); throw new RetryException(); } else { http://git-wip-us.apache.org/repos/asf/hive/blob/cae46469/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java index e8c49ef..aa00573 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbLockManager.java @@ -79,10 +79,10 @@ public class DbLockManager implements HiveLockManager{ */ LockState lock(LockRequest lock, String queryId, boolean isBlocking, List<HiveLock> acquiredLocks) throws LockException { try { - LOG.debug("Requesting: queryId=" + queryId + " " + lock); + LOG.info("Requesting: queryId=" + queryId + " " + lock); LockResponse res = client.lock(lock); //link lockId to queryId - LOG.debug("Response " + res); + LOG.info("Response to queryId=" + queryId + " " + res); if(!isBlocking) { if(res.getState() == LockState.WAITING) { return LockState.WAITING; http://git-wip-us.apache.org/repos/asf/hive/blob/cae46469/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java index ccbac80..f8fff1a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lockmgr/DbTxnManager.java @@ -105,7 +105,7 @@ public class DbTxnManager extends HiveTxnManagerImpl { LockRequestBuilder rqstBuilder = new LockRequestBuilder(); //link queryId to txnId - LOG.debug("Setting lock request transaction to " + txnId + " for queryId=" + plan.getQueryId()); + LOG.info("Setting lock request transaction to " + txnId + " for queryId=" + plan.getQueryId()); rqstBuilder.setTransactionId(txnId) .setUser(username);