This is an automated email from the ASF dual-hosted git repository.
krathbun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push:
new dc796bf827 Resolves TabletServer deadlock (#5651)
dc796bf827 is described below
commit dc796bf8274db9e99cf5b39e34b8c48c30fd0c7a
Author: Kevin Rathbun <[email protected]>
AuthorDate: Fri Jun 20 10:18:40 2025 -0400
Resolves TabletServer deadlock (#5651)
* Resolves TabletServer deadlock
Resolves a deadlock in Tablet related code. Expected lock order is
refreshLock -> logLock -> tablet, but `Tablet.completeClose()` would lock
tablet -> refreshLock -> logLock, which could (and did) result in deadlocks. I
did not find any other situations where we do not acquire the locks in the
order refreshLock -> logLock -> tablet, so Tablet code should now be deadlock
free.
closes #5597
---
.../org/apache/accumulo/tserver/tablet/Tablet.java | 189 +++++++++++----------
1 file changed, 103 insertions(+), 86 deletions(-)
diff --git
a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
index cae47debef..e6283cd09b 100644
---
a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
+++
b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
@@ -893,42 +893,33 @@ public class Tablet extends TabletBase {
private boolean closeCompleting = false;
- synchronized void completeClose(boolean saveState) throws IOException {
-
- if (!isClosing() || isCloseComplete() || closeCompleting) {
- throw new IllegalStateException("Bad close state " + closeState + " on
tablet " + extent);
- }
-
- log.trace("completeClose(saveState={}) {}", saveState, extent);
+ void completeClose(boolean saveState) throws IOException {
+ boolean shouldPrepMinC;
+ MinorCompactionTask mct = null;
- // ensure this method is only called once, also guards against multiple
- // threads entering the method at the same time
- closeCompleting = true;
- closeState = CloseState.CLOSED;
+ synchronized (this) {
+ if (!isClosing() || isCloseComplete() || closeCompleting) {
+ throw new IllegalStateException("Bad close state " + closeState + " on
tablet " + extent);
+ }
- // modify dataSourceDeletions so scans will try to switch data sources and
fail because the
- // tablet is closed
- dataSourceDeletions.incrementAndGet();
+ log.trace("completeClose(saveState={}) {}", saveState, extent);
- for (ScanDataSource activeScan : activeScans) {
- activeScan.interrupt();
- }
+ // ensure this method is only called once, also guards against multiple
+ // threads entering the method at the same time
+ closeCompleting = true;
+ closeState = CloseState.CLOSED;
- // create a copy so that it can be whittled down as client sessions are
disabled
- List<ScanDataSource> runningScans = new ArrayList<>(this.activeScans);
+ // modify dataSourceDeletions so scans will try to switch data sources
and fail because the
+ // tablet is closed
+ dataSourceDeletions.incrementAndGet();
- runningScans.removeIf(scanDataSource -> {
- boolean currentlyUnreserved =
disallowNewReservations(scanDataSource.getScanParameters());
- if (currentlyUnreserved) {
- log.debug("Disabled scan session in tablet close {} {}", extent,
scanDataSource);
+ for (ScanDataSource activeScan : activeScans) {
+ activeScan.interrupt();
}
- return currentlyUnreserved;
- });
- long lastLogTime = System.nanoTime();
+ // create a copy so that it can be whittled down as client sessions are
disabled
+ List<ScanDataSource> runningScans = new ArrayList<>(this.activeScans);
- // wait for reads and writes to complete
- while (writesInProgress > 0 || !runningScans.isEmpty()) {
runningScans.removeIf(scanDataSource -> {
boolean currentlyUnreserved =
disallowNewReservations(scanDataSource.getScanParameters());
if (currentlyUnreserved) {
@@ -937,70 +928,98 @@ public class Tablet extends TabletBase {
return currentlyUnreserved;
});
- if (log.isDebugEnabled() && System.nanoTime() - lastLogTime >
TimeUnit.SECONDS.toNanos(60)) {
- for (ScanDataSource activeScan : runningScans) {
- log.debug("Waiting on scan in completeClose {} {}", extent,
activeScan);
- }
-
- lastLogTime = System.nanoTime();
- }
+ long lastLogTime = System.nanoTime();
- try {
- log.debug("Waiting to completeClose for {}. {} writes {} scans",
extent, writesInProgress,
- runningScans.size());
- this.wait(50);
- } catch (InterruptedException e) {
- log.error("Interrupted waiting to completeClose for extent {}",
extent, e);
- }
- }
+ // wait for reads and writes to complete
+ while (writesInProgress > 0 || !runningScans.isEmpty()) {
+ runningScans.removeIf(scanDataSource -> {
+ boolean currentlyUnreserved =
disallowNewReservations(scanDataSource.getScanParameters());
+ if (currentlyUnreserved) {
+ log.debug("Disabled scan session in tablet close {} {}", extent,
scanDataSource);
+ }
+ return currentlyUnreserved;
+ });
- // It is assumed that nothing new would have been added to activeScans
since it was copied, so
- // check that assumption. At this point activeScans should be empty or
everything in it should
- // be disabled.
- Preconditions.checkState(activeScans.stream()
- .allMatch(scanDataSource ->
disallowNewReservations(scanDataSource.getScanParameters())));
+ if (log.isDebugEnabled()
+ && System.nanoTime() - lastLogTime > TimeUnit.SECONDS.toNanos(60))
{
+ for (ScanDataSource activeScan : runningScans) {
+ log.debug("Waiting on scan in completeClose {} {}", extent,
activeScan);
+ }
- getTabletMemory().waitForMinC();
+ lastLogTime = System.nanoTime();
+ }
- if (saveState && getTabletMemory().getMemTable().getNumEntries() > 0) {
- try {
- prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE).run();
- } catch (NoNodeException e) {
- throw new RuntimeException("Exception on " + extent + " during prep
for MinC", e);
+ try {
+ log.debug("Waiting to completeClose for {}. {} writes {} scans",
extent, writesInProgress,
+ runningScans.size());
+ this.wait(50);
+ } catch (InterruptedException e) {
+ log.error("Interrupted waiting to completeClose for extent {}",
extent, e);
+ }
}
- }
- if (saveState) {
- // at this point all tablet data is flushed, so do a consistency check
- RuntimeException err = null;
- for (int i = 0; i < 5; i++) {
+ // It is assumed that nothing new would have been added to activeScans
since it was copied, so
+ // check that assumption. At this point activeScans should be empty or
everything in it should
+ // be disabled.
+ Preconditions.checkState(activeScans.stream()
+ .allMatch(scanDataSource ->
disallowNewReservations(scanDataSource.getScanParameters())));
+
+ getTabletMemory().waitForMinC();
+
+ shouldPrepMinC = saveState &&
getTabletMemory().getMemTable().getNumEntries() > 0;
+ if (shouldPrepMinC) {
try {
- closeConsistencyCheck();
- err = null;
- } catch (RuntimeException t) {
- err = t;
- log.error("Consistency check fails, retrying", t);
- sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
+ mct = prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE);
+ } catch (NoNodeException e) {
+ throw new RuntimeException("Exception on " + extent + " during prep
for MinC", e);
}
}
- if (err != null) {
- log.error("Tablet closed consistency check has failed for {} giving up
and closing",
- this.extent);
- }
}
- try {
- getTabletMemory().getMemTable().delete(0);
- } catch (Exception t) {
- log.error("Failed to delete mem table : " + t.getMessage() + " for
tablet " + extent, t);
+ if (shouldPrepMinC) {
+ // must not run while tablet is locked, as this may result in deadlocks
+ mct.run();
}
- getTabletMemory().close();
+ synchronized (this) {
+ // gave up the lock to allow a minor compaction to run, now that the
lock is reacquired
+ // validate that nothing unexpectedly changed
+ Preconditions.checkState(closeState == CloseState.CLOSED, "closeState:%s
extent:%s",
+ closeState, extent);
+ Preconditions.checkState(writesInProgress == 0, "writesInProgress:%s
extent:%s",
+ writesInProgress, extent);
+ if (saveState) {
+ // at this point all tablet data is flushed, so do a consistency check
+ RuntimeException err = null;
+ for (int i = 0; i < 5; i++) {
+ try {
+ closeConsistencyCheck();
+ err = null;
+ } catch (RuntimeException t) {
+ err = t;
+ log.error("Consistency check fails, retrying", t);
+ sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
+ }
+ }
+ if (err != null) {
+ log.error("Tablet closed consistency check has failed for {} giving
up and closing",
+ this.extent);
+ }
+ }
+
+ try {
+ getTabletMemory().getMemTable().delete(0);
+ } catch (Exception t) {
+ log.error("Failed to delete mem table : " + t.getMessage() + " for
tablet " + extent, t);
+ }
- // close data files
- getTabletResources().close();
+ getTabletMemory().close();
- closeState = CloseState.COMPLETE;
+ // close data files
+ getTabletResources().close();
+
+ closeState = CloseState.COMPLETE;
+ }
}
private void closeConsistencyCheck() {
@@ -1220,10 +1239,6 @@ public class Tablet extends TabletBase {
}
}
- ReentrantLock getLogLock() {
- return logLock;
- }
-
Set<LogEntry> beginClearingUnusedLogs() {
Preconditions.checkState(logLock.isHeldByCurrentThread());
Set<LogEntry> unusedLogs = new HashSet<>();
@@ -1278,6 +1293,8 @@ public class Tablet extends TabletBase {
private boolean removingLogs = false;
// this lock is basically used to synchronize writing of log info to metadata
+ // care should be taken when using this lock. Lock order should be:
+ // refreshLock -> logLock -> tablet to prevent deadlock
private final ReentrantLock logLock = new ReentrantLock();
// don't release the lock if this method returns true for success; instead,
the caller should
@@ -1288,8 +1305,6 @@ public class Tablet extends TabletBase {
boolean releaseLock = true;
- // Should not hold the tablet lock while trying to acquire the log lock
because this could lead
- // to deadlock. However there is a path in the code that does this. See
#3759
logLock.lock();
try {
@@ -1518,6 +1533,8 @@ public class Tablet extends TabletBase {
// The purpose of this lock is to prevent race conditions between concurrent
refresh RPC calls and
// between minor compactions and refresh calls.
+ // care should be taken when using this lock. Lock order should be:
+ // refreshLock -> logLock -> tablet to prevent deadlock
private final ReentrantLock refreshLock = new ReentrantLock();
void bringMinorCompactionOnline(ReferencedTabletFile tmpDatafile,
@@ -1559,11 +1576,11 @@ public class Tablet extends TabletBase {
// This prevents a concurrent refresh operation from pulling in the new
tablet file before the
// in memory map reference related to the file is deactivated. Scans
should use one of the in
// memory map or the new file, never both.
- Preconditions.checkState(!getLogLock().isHeldByCurrentThread());
+ Preconditions.checkState(!logLock.isHeldByCurrentThread());
refreshLock.lock();
try {
// Can not hold tablet lock while acquiring the log lock.
- getLogLock().lock();
+ logLock.lock();
// do not place any code here between lock and try
try {
// The following call pairs with tablet.finishClearingUnusedLogs()
later in this block. If
@@ -1580,7 +1597,7 @@ public class Tablet extends TabletBase {
finishClearingUnusedLogs();
} finally {
- getLogLock().unlock();
+ logLock.unlock();
}
// Without the refresh lock, if a refresh happened here it could make
the new file written to