Repository: hive Updated Branches: refs/heads/branch-1.2 c83fe8ba2 -> ae10c832d
HIVE-10500 Repeated deadlocks in underlying RDBMS cause transaction or lock failure (Alan Gates, reviewed by Gunther Hagleitner) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ae10c832 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ae10c832 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ae10c832 Branch: refs/heads/branch-1.2 Commit: ae10c832d3a231780d09a6398886c12c04869e22 Parents: c83fe8b Author: Alan Gates <ga...@hortonworks.com> Authored: Tue Apr 28 15:25:23 2015 -0700 Committer: Alan Gates <ga...@hortonworks.com> Committed: Tue Apr 28 15:27:25 2015 -0700 ---------------------------------------------------------------------- .../apache/hadoop/hive/metastore/txn/TxnHandler.java | 12 +++++++++++- .../hadoop/hive/metastore/txn/TestTxnHandler.java | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ae10c832/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java index f9a742d..1e64fc7 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java @@ -73,7 +73,7 @@ public class TxnHandler { static final protected char LOCK_SHARED = 'r'; static final protected char LOCK_SEMI_SHARED = 'w'; - static final private int ALLOWED_REPEATED_DEADLOCKS = 5; + static final private int ALLOWED_REPEATED_DEADLOCKS = 10; static final private Log LOG = LogFactory.getLog(TxnHandler.class.getName()); static private DataSource connPool; @@ -84,6 +84,7 @@ public class TxnHandler { * Number of consecutive deadlocks we have seen */ protected int deadlockCnt; + private long deadlockRetryInterval; protected HiveConf conf; protected DatabaseProduct dbProduct; @@ -130,6 +131,7 @@ public class TxnHandler { buildJumpTable(); retryInterval = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HMSHANDLERINTERVAL, TimeUnit.MILLISECONDS); retryLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HMSHANDLERATTEMPTS); + deadlockRetryInterval = retryInterval / 10; } @@ -270,6 +272,7 @@ public class TxnHandler { } public OpenTxnsResponse openTxns(OpenTxnRequest rqst) throws MetaException { + deadlockCnt = 0; // Reset deadlock count since this is a new transaction int numTxns = rqst.getNum_txns(); try { Connection dbConn = null; @@ -410,6 +413,7 @@ public class TxnHandler { public LockResponse lock(LockRequest rqst) throws NoSuchTxnException, TxnAbortedException, MetaException { + deadlockCnt = 0; try { Connection dbConn = null; try { @@ -973,6 +977,12 @@ public class TxnHandler { || e.getMessage().contains("can't serialize access for this transaction")))) { if (deadlockCnt++ < ALLOWED_REPEATED_DEADLOCKS) { LOG.warn("Deadlock detected in " + caller + ", trying again."); + // Pause for a just a bit for retrying to avoid immediately jumping back into the deadlock. + try { + Thread.sleep(deadlockRetryInterval * deadlockCnt); + } catch (InterruptedException ie) { + // NOP + } throw new RetryException(); } else { LOG.error("Too many repeated deadlocks in " + caller + ", giving up."); http://git-wip-us.apache.org/repos/asf/hive/blob/ae10c832/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java b/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java index e88ce02..d4266e1 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java @@ -1090,6 +1090,7 @@ public class TestTxnHandler { @Test @Ignore public void deadlockDetected() throws Exception { + LOG.debug("Starting deadlock test"); Connection conn = txnHandler.getDbConn(Connection.TRANSACTION_SERIALIZABLE); Statement stmt = conn.createStatement(); long now = txnHandler.getDbTime(conn);