keith-turner commented on code in PR #5651:
URL: https://github.com/apache/accumulo/pull/5651#discussion_r2155359488
##########
server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java:
##########
@@ -937,70 +928,92 @@ synchronized void completeClose(boolean saveState) throws
IOException {
return currentlyUnreserved;
});
- if (log.isDebugEnabled() && System.nanoTime() - lastLogTime >
TimeUnit.SECONDS.toNanos(60)) {
- for (ScanDataSource activeScan : runningScans) {
- log.debug("Waiting on scan in completeClose {} {}", extent,
activeScan);
- }
-
- lastLogTime = System.nanoTime();
- }
+ long lastLogTime = System.nanoTime();
- try {
- log.debug("Waiting to completeClose for {}. {} writes {} scans",
extent, writesInProgress,
- runningScans.size());
- this.wait(50);
- } catch (InterruptedException e) {
- log.error("Interrupted waiting to completeClose for extent {}",
extent, e);
- }
- }
+ // wait for reads and writes to complete
+ while (writesInProgress > 0 || !runningScans.isEmpty()) {
+ runningScans.removeIf(scanDataSource -> {
+ boolean currentlyUnreserved =
disallowNewReservations(scanDataSource.getScanParameters());
+ if (currentlyUnreserved) {
+ log.debug("Disabled scan session in tablet close {} {}", extent,
scanDataSource);
+ }
+ return currentlyUnreserved;
+ });
- // It is assumed that nothing new would have been added to activeScans
since it was copied, so
- // check that assumption. At this point activeScans should be empty or
everything in it should
- // be disabled.
- Preconditions.checkState(activeScans.stream()
- .allMatch(scanDataSource ->
disallowNewReservations(scanDataSource.getScanParameters())));
+ if (log.isDebugEnabled()
+ && System.nanoTime() - lastLogTime > TimeUnit.SECONDS.toNanos(60))
{
+ for (ScanDataSource activeScan : runningScans) {
+ log.debug("Waiting on scan in completeClose {} {}", extent,
activeScan);
+ }
- getTabletMemory().waitForMinC();
+ lastLogTime = System.nanoTime();
+ }
- if (saveState && getTabletMemory().getMemTable().getNumEntries() > 0) {
- try {
- prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE).run();
- } catch (NoNodeException e) {
- throw new RuntimeException("Exception on " + extent + " during prep
for MinC", e);
+ try {
+ log.debug("Waiting to completeClose for {}. {} writes {} scans",
extent, writesInProgress,
+ runningScans.size());
+ this.wait(50);
+ } catch (InterruptedException e) {
+ log.error("Interrupted waiting to completeClose for extent {}",
extent, e);
+ }
}
- }
- if (saveState) {
- // at this point all tablet data is flushed, so do a consistency check
- RuntimeException err = null;
- for (int i = 0; i < 5; i++) {
+ // It is assumed that nothing new would have been added to activeScans
since it was copied, so
+ // check that assumption. At this point activeScans should be empty or
everything in it should
+ // be disabled.
+ Preconditions.checkState(activeScans.stream()
+ .allMatch(scanDataSource ->
disallowNewReservations(scanDataSource.getScanParameters())));
+
+ getTabletMemory().waitForMinC();
+
+ shouldPrepMinC = saveState &&
getTabletMemory().getMemTable().getNumEntries() > 0;
+ if (shouldPrepMinC) {
try {
- closeConsistencyCheck();
- err = null;
- } catch (RuntimeException t) {
- err = t;
- log.error("Consistency check fails, retrying", t);
- sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
+ mct = prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE);
+ } catch (NoNodeException e) {
+ throw new RuntimeException("Exception on " + extent + " during prep
for MinC", e);
}
}
- if (err != null) {
- log.error("Tablet closed consistency check has failed for {} giving up
and closing",
- this.extent);
- }
}
- try {
- getTabletMemory().getMemTable().delete(0);
- } catch (Exception t) {
- log.error("Failed to delete mem table : " + t.getMessage() + " for
tablet " + extent, t);
+ if (shouldPrepMinC) {
+ // must not run while tablet is locked, as this may result in deadlocks
+ mct.run();
Review Comment:
Normally minor compactions run concurrently w/ reads and writes. At this
point in the code no more reads and writes should be happening so I think this
code did the minor compaction in the synch block because it did not matter if
reads and writes were blocked.
It should be ok to run this outside of the sync block because the close
state change at the top of this method should prevent any new reads or writes
from starting and the code waited for ones in progress to complete. Added a
suggestion to check some things after the lock is reacquired.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]