Repository: hive
Updated Branches:
  refs/heads/branch-1.2 c83fe8ba2 -> ae10c832d


HIVE-10500 Repeated deadlocks in underlying RDBMS cause transaction or lock 
failure (Alan Gates, reviewed by Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ae10c832
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ae10c832
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ae10c832

Branch: refs/heads/branch-1.2
Commit: ae10c832d3a231780d09a6398886c12c04869e22
Parents: c83fe8b
Author: Alan Gates <ga...@hortonworks.com>
Authored: Tue Apr 28 15:25:23 2015 -0700
Committer: Alan Gates <ga...@hortonworks.com>
Committed: Tue Apr 28 15:27:25 2015 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/metastore/txn/TxnHandler.java    | 12 +++++++++++-
 .../hadoop/hive/metastore/txn/TestTxnHandler.java       |  1 +
 2 files changed, 12 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ae10c832/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
index f9a742d..1e64fc7 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -73,7 +73,7 @@ public class TxnHandler {
   static final protected char LOCK_SHARED = 'r';
   static final protected char LOCK_SEMI_SHARED = 'w';
 
-  static final private int ALLOWED_REPEATED_DEADLOCKS = 5;
+  static final private int ALLOWED_REPEATED_DEADLOCKS = 10;
   static final private Log LOG = LogFactory.getLog(TxnHandler.class.getName());
 
   static private DataSource connPool;
@@ -84,6 +84,7 @@ public class TxnHandler {
    * Number of consecutive deadlocks we have seen
    */
   protected int deadlockCnt;
+  private long deadlockRetryInterval;
   protected HiveConf conf;
   protected DatabaseProduct dbProduct;
 
@@ -130,6 +131,7 @@ public class TxnHandler {
     buildJumpTable();
     retryInterval = HiveConf.getTimeVar(conf, 
HiveConf.ConfVars.HMSHANDLERINTERVAL, TimeUnit.MILLISECONDS);
     retryLimit = HiveConf.getIntVar(conf, 
HiveConf.ConfVars.HMSHANDLERATTEMPTS);
+    deadlockRetryInterval = retryInterval / 10;
 
   }
 
@@ -270,6 +272,7 @@ public class TxnHandler {
   }
 
   public OpenTxnsResponse openTxns(OpenTxnRequest rqst) throws MetaException {
+    deadlockCnt = 0;  // Reset deadlock count since this is a new transaction
     int numTxns = rqst.getNum_txns();
     try {
       Connection dbConn = null;
@@ -410,6 +413,7 @@ public class TxnHandler {
 
   public LockResponse lock(LockRequest rqst)
     throws NoSuchTxnException, TxnAbortedException, MetaException {
+    deadlockCnt = 0;
     try {
       Connection dbConn = null;
       try {
@@ -973,6 +977,12 @@ public class TxnHandler {
         || e.getMessage().contains("can't serialize access for this 
transaction")))) {
       if (deadlockCnt++ < ALLOWED_REPEATED_DEADLOCKS) {
         LOG.warn("Deadlock detected in " + caller + ", trying again.");
+        // Pause for a just a bit for retrying to avoid immediately jumping 
back into the deadlock.
+        try {
+          Thread.sleep(deadlockRetryInterval * deadlockCnt);
+        } catch (InterruptedException ie) {
+          // NOP
+        }
         throw new RetryException();
       } else {
         LOG.error("Too many repeated deadlocks in " + caller + ", giving up.");

http://git-wip-us.apache.org/repos/asf/hive/blob/ae10c832/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java 
b/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java
index e88ce02..d4266e1 100644
--- 
a/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java
+++ 
b/metastore/src/test/org/apache/hadoop/hive/metastore/txn/TestTxnHandler.java
@@ -1090,6 +1090,7 @@ public class TestTxnHandler {
   @Test
   @Ignore
   public void deadlockDetected() throws Exception {
+    LOG.debug("Starting deadlock test");
     Connection conn = 
txnHandler.getDbConn(Connection.TRANSACTION_SERIALIZABLE);
     Statement stmt = conn.createStatement();
     long now = txnHandler.getDbTime(conn);

Reply via email to