This is an automated email from the ASF dual-hosted git repository.

krathbun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new dc796bf827 Resolves TabletServer deadlock (#5651)
dc796bf827 is described below

commit dc796bf8274db9e99cf5b39e34b8c48c30fd0c7a
Author: Kevin Rathbun <[email protected]>
AuthorDate: Fri Jun 20 10:18:40 2025 -0400

    Resolves TabletServer deadlock (#5651)
    
    * Resolves TabletServer deadlock
    
    Resolves a deadlock in Tablet related code. Expected lock order is 
refreshLock -> logLock -> tablet, but `Tablet.completeClose()` would lock 
tablet -> refreshLock -> logLock, which could (and did) result in deadlocks. I 
did not find any other situations where we do not acquire the locks in the 
order refreshLock -> logLock -> tablet, so Tablet code should now be deadlock 
free.
    
    closes #5597
---
 .../org/apache/accumulo/tserver/tablet/Tablet.java | 189 +++++++++++----------
 1 file changed, 103 insertions(+), 86 deletions(-)

diff --git 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
index cae47debef..e6283cd09b 100644
--- 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
+++ 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
@@ -893,42 +893,33 @@ public class Tablet extends TabletBase {
 
   private boolean closeCompleting = false;
 
-  synchronized void completeClose(boolean saveState) throws IOException {
-
-    if (!isClosing() || isCloseComplete() || closeCompleting) {
-      throw new IllegalStateException("Bad close state " + closeState + " on 
tablet " + extent);
-    }
-
-    log.trace("completeClose(saveState={}) {}", saveState, extent);
+  void completeClose(boolean saveState) throws IOException {
+    boolean shouldPrepMinC;
+    MinorCompactionTask mct = null;
 
-    // ensure this method is only called once, also guards against multiple
-    // threads entering the method at the same time
-    closeCompleting = true;
-    closeState = CloseState.CLOSED;
+    synchronized (this) {
+      if (!isClosing() || isCloseComplete() || closeCompleting) {
+        throw new IllegalStateException("Bad close state " + closeState + " on 
tablet " + extent);
+      }
 
-    // modify dataSourceDeletions so scans will try to switch data sources and 
fail because the
-    // tablet is closed
-    dataSourceDeletions.incrementAndGet();
+      log.trace("completeClose(saveState={}) {}", saveState, extent);
 
-    for (ScanDataSource activeScan : activeScans) {
-      activeScan.interrupt();
-    }
+      // ensure this method is only called once, also guards against multiple
+      // threads entering the method at the same time
+      closeCompleting = true;
+      closeState = CloseState.CLOSED;
 
-    // create a copy so that it can be whittled down as client sessions are 
disabled
-    List<ScanDataSource> runningScans = new ArrayList<>(this.activeScans);
+      // modify dataSourceDeletions so scans will try to switch data sources 
and fail because the
+      // tablet is closed
+      dataSourceDeletions.incrementAndGet();
 
-    runningScans.removeIf(scanDataSource -> {
-      boolean currentlyUnreserved = 
disallowNewReservations(scanDataSource.getScanParameters());
-      if (currentlyUnreserved) {
-        log.debug("Disabled scan session in tablet close {} {}", extent, 
scanDataSource);
+      for (ScanDataSource activeScan : activeScans) {
+        activeScan.interrupt();
       }
-      return currentlyUnreserved;
-    });
 
-    long lastLogTime = System.nanoTime();
+      // create a copy so that it can be whittled down as client sessions are 
disabled
+      List<ScanDataSource> runningScans = new ArrayList<>(this.activeScans);
 
-    // wait for reads and writes to complete
-    while (writesInProgress > 0 || !runningScans.isEmpty()) {
       runningScans.removeIf(scanDataSource -> {
         boolean currentlyUnreserved = 
disallowNewReservations(scanDataSource.getScanParameters());
         if (currentlyUnreserved) {
@@ -937,70 +928,98 @@ public class Tablet extends TabletBase {
         return currentlyUnreserved;
       });
 
-      if (log.isDebugEnabled() && System.nanoTime() - lastLogTime > 
TimeUnit.SECONDS.toNanos(60)) {
-        for (ScanDataSource activeScan : runningScans) {
-          log.debug("Waiting on scan in completeClose {} {}", extent, 
activeScan);
-        }
-
-        lastLogTime = System.nanoTime();
-      }
+      long lastLogTime = System.nanoTime();
 
-      try {
-        log.debug("Waiting to completeClose for {}. {} writes {} scans", 
extent, writesInProgress,
-            runningScans.size());
-        this.wait(50);
-      } catch (InterruptedException e) {
-        log.error("Interrupted waiting to completeClose for extent {}", 
extent, e);
-      }
-    }
+      // wait for reads and writes to complete
+      while (writesInProgress > 0 || !runningScans.isEmpty()) {
+        runningScans.removeIf(scanDataSource -> {
+          boolean currentlyUnreserved = 
disallowNewReservations(scanDataSource.getScanParameters());
+          if (currentlyUnreserved) {
+            log.debug("Disabled scan session in tablet close {} {}", extent, 
scanDataSource);
+          }
+          return currentlyUnreserved;
+        });
 
-    // It is assumed that nothing new would have been added to activeScans 
since it was copied, so
-    // check that assumption. At this point activeScans should be empty or 
everything in it should
-    // be disabled.
-    Preconditions.checkState(activeScans.stream()
-        .allMatch(scanDataSource -> 
disallowNewReservations(scanDataSource.getScanParameters())));
+        if (log.isDebugEnabled()
+            && System.nanoTime() - lastLogTime > TimeUnit.SECONDS.toNanos(60)) 
{
+          for (ScanDataSource activeScan : runningScans) {
+            log.debug("Waiting on scan in completeClose {} {}", extent, 
activeScan);
+          }
 
-    getTabletMemory().waitForMinC();
+          lastLogTime = System.nanoTime();
+        }
 
-    if (saveState && getTabletMemory().getMemTable().getNumEntries() > 0) {
-      try {
-        prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE).run();
-      } catch (NoNodeException e) {
-        throw new RuntimeException("Exception on " + extent + " during prep 
for MinC", e);
+        try {
+          log.debug("Waiting to completeClose for {}. {} writes {} scans", 
extent, writesInProgress,
+              runningScans.size());
+          this.wait(50);
+        } catch (InterruptedException e) {
+          log.error("Interrupted waiting to completeClose for extent {}", 
extent, e);
+        }
       }
-    }
 
-    if (saveState) {
-      // at this point all tablet data is flushed, so do a consistency check
-      RuntimeException err = null;
-      for (int i = 0; i < 5; i++) {
+      // It is assumed that nothing new would have been added to activeScans 
since it was copied, so
+      // check that assumption. At this point activeScans should be empty or 
everything in it should
+      // be disabled.
+      Preconditions.checkState(activeScans.stream()
+          .allMatch(scanDataSource -> 
disallowNewReservations(scanDataSource.getScanParameters())));
+
+      getTabletMemory().waitForMinC();
+
+      shouldPrepMinC = saveState && 
getTabletMemory().getMemTable().getNumEntries() > 0;
+      if (shouldPrepMinC) {
         try {
-          closeConsistencyCheck();
-          err = null;
-        } catch (RuntimeException t) {
-          err = t;
-          log.error("Consistency check fails, retrying", t);
-          sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
+          mct = prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE);
+        } catch (NoNodeException e) {
+          throw new RuntimeException("Exception on " + extent + " during prep 
for MinC", e);
         }
       }
-      if (err != null) {
-        log.error("Tablet closed consistency check has failed for {} giving up 
and closing",
-            this.extent);
-      }
     }
 
-    try {
-      getTabletMemory().getMemTable().delete(0);
-    } catch (Exception t) {
-      log.error("Failed to delete mem table : " + t.getMessage() + " for 
tablet " + extent, t);
+    if (shouldPrepMinC) {
+      // must not run while tablet is locked, as this may result in deadlocks
+      mct.run();
     }
 
-    getTabletMemory().close();
+    synchronized (this) {
+      // gave up the lock to allow a minor compaction to run, now that the 
lock is reacquired
+      // validate that nothing unexpectedly changed
+      Preconditions.checkState(closeState == CloseState.CLOSED, "closeState:%s 
extent:%s",
+          closeState, extent);
+      Preconditions.checkState(writesInProgress == 0, "writesInProgress:%s 
extent:%s",
+          writesInProgress, extent);
+      if (saveState) {
+        // at this point all tablet data is flushed, so do a consistency check
+        RuntimeException err = null;
+        for (int i = 0; i < 5; i++) {
+          try {
+            closeConsistencyCheck();
+            err = null;
+          } catch (RuntimeException t) {
+            err = t;
+            log.error("Consistency check fails, retrying", t);
+            sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
+          }
+        }
+        if (err != null) {
+          log.error("Tablet closed consistency check has failed for {} giving 
up and closing",
+              this.extent);
+        }
+      }
+
+      try {
+        getTabletMemory().getMemTable().delete(0);
+      } catch (Exception t) {
+        log.error("Failed to delete mem table : " + t.getMessage() + " for 
tablet " + extent, t);
+      }
 
-    // close data files
-    getTabletResources().close();
+      getTabletMemory().close();
 
-    closeState = CloseState.COMPLETE;
+      // close data files
+      getTabletResources().close();
+
+      closeState = CloseState.COMPLETE;
+    }
   }
 
   private void closeConsistencyCheck() {
@@ -1220,10 +1239,6 @@ public class Tablet extends TabletBase {
     }
   }
 
-  ReentrantLock getLogLock() {
-    return logLock;
-  }
-
   Set<LogEntry> beginClearingUnusedLogs() {
     Preconditions.checkState(logLock.isHeldByCurrentThread());
     Set<LogEntry> unusedLogs = new HashSet<>();
@@ -1278,6 +1293,8 @@ public class Tablet extends TabletBase {
   private boolean removingLogs = false;
 
   // this lock is basically used to synchronize writing of log info to metadata
+  // care should be taken when using this lock. Lock order should be:
+  // refreshLock -> logLock -> tablet to prevent deadlock
   private final ReentrantLock logLock = new ReentrantLock();
 
   // don't release the lock if this method returns true for success; instead, 
the caller should
@@ -1288,8 +1305,6 @@ public class Tablet extends TabletBase {
 
     boolean releaseLock = true;
 
-    // Should not hold the tablet lock while trying to acquire the log lock 
because this could lead
-    // to deadlock. However there is a path in the code that does this. See 
#3759
     logLock.lock();
 
     try {
@@ -1518,6 +1533,8 @@ public class Tablet extends TabletBase {
 
   // The purpose of this lock is to prevent race conditions between concurrent 
refresh RPC calls and
   // between minor compactions and refresh calls.
+  // care should be taken when using this lock. Lock order should be:
+  // refreshLock -> logLock -> tablet to prevent deadlock
   private final ReentrantLock refreshLock = new ReentrantLock();
 
   void bringMinorCompactionOnline(ReferencedTabletFile tmpDatafile,
@@ -1559,11 +1576,11 @@ public class Tablet extends TabletBase {
     // This prevents a concurrent refresh operation from pulling in the new 
tablet file before the
     // in memory map reference related to the file is deactivated. Scans 
should use one of the in
     // memory map or the new file, never both.
-    Preconditions.checkState(!getLogLock().isHeldByCurrentThread());
+    Preconditions.checkState(!logLock.isHeldByCurrentThread());
     refreshLock.lock();
     try {
       // Can not hold tablet lock while acquiring the log lock.
-      getLogLock().lock();
+      logLock.lock();
       // do not place any code here between lock and try
       try {
         // The following call pairs with tablet.finishClearingUnusedLogs() 
later in this block. If
@@ -1580,7 +1597,7 @@ public class Tablet extends TabletBase {
 
         finishClearingUnusedLogs();
       } finally {
-        getLogLock().unlock();
+        logLock.unlock();
       }
 
       // Without the refresh lock, if a refresh happened here it could make 
the new file written to

Reply via email to