kevinrr888 commented on code in PR #5651:
URL: https://github.com/apache/accumulo/pull/5651#discussion_r2150611109
##########
server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java:
##########
@@ -937,70 +928,92 @@ synchronized void completeClose(boolean saveState) throws
IOException {
return currentlyUnreserved;
});
- if (log.isDebugEnabled() && System.nanoTime() - lastLogTime >
TimeUnit.SECONDS.toNanos(60)) {
- for (ScanDataSource activeScan : runningScans) {
- log.debug("Waiting on scan in completeClose {} {}", extent,
activeScan);
- }
-
- lastLogTime = System.nanoTime();
- }
+ long lastLogTime = System.nanoTime();
- try {
- log.debug("Waiting to completeClose for {}. {} writes {} scans",
extent, writesInProgress,
- runningScans.size());
- this.wait(50);
- } catch (InterruptedException e) {
- log.error("Interrupted waiting to completeClose for extent {}",
extent, e);
- }
- }
+ // wait for reads and writes to complete
+ while (writesInProgress > 0 || !runningScans.isEmpty()) {
+ runningScans.removeIf(scanDataSource -> {
+ boolean currentlyUnreserved =
disallowNewReservations(scanDataSource.getScanParameters());
+ if (currentlyUnreserved) {
+ log.debug("Disabled scan session in tablet close {} {}", extent,
scanDataSource);
+ }
+ return currentlyUnreserved;
+ });
- // It is assumed that nothing new would have been added to activeScans
since it was copied, so
- // check that assumption. At this point activeScans should be empty or
everything in it should
- // be disabled.
- Preconditions.checkState(activeScans.stream()
- .allMatch(scanDataSource ->
disallowNewReservations(scanDataSource.getScanParameters())));
+ if (log.isDebugEnabled()
+ && System.nanoTime() - lastLogTime > TimeUnit.SECONDS.toNanos(60))
{
+ for (ScanDataSource activeScan : runningScans) {
+ log.debug("Waiting on scan in completeClose {} {}", extent,
activeScan);
+ }
- getTabletMemory().waitForMinC();
+ lastLogTime = System.nanoTime();
+ }
- if (saveState && getTabletMemory().getMemTable().getNumEntries() > 0) {
- try {
- prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE).run();
- } catch (NoNodeException e) {
- throw new RuntimeException("Exception on " + extent + " during prep
for MinC", e);
+ try {
+ log.debug("Waiting to completeClose for {}. {} writes {} scans",
extent, writesInProgress,
+ runningScans.size());
+ this.wait(50);
+ } catch (InterruptedException e) {
+ log.error("Interrupted waiting to completeClose for extent {}",
extent, e);
+ }
}
- }
- if (saveState) {
- // at this point all tablet data is flushed, so do a consistency check
- RuntimeException err = null;
- for (int i = 0; i < 5; i++) {
+ // It is assumed that nothing new would have been added to activeScans
since it was copied, so
+ // check that assumption. At this point activeScans should be empty or
everything in it should
+ // be disabled.
+ Preconditions.checkState(activeScans.stream()
+ .allMatch(scanDataSource ->
disallowNewReservations(scanDataSource.getScanParameters())));
+
+ getTabletMemory().waitForMinC();
+
+ shouldPrepMinC = saveState &&
getTabletMemory().getMemTable().getNumEntries() > 0;
+ if (shouldPrepMinC) {
try {
- closeConsistencyCheck();
- err = null;
- } catch (RuntimeException t) {
- err = t;
- log.error("Consistency check fails, retrying", t);
- sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
+ mct = prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE);
+ } catch (NoNodeException e) {
+ throw new RuntimeException("Exception on " + extent + " during prep
for MinC", e);
}
}
- if (err != null) {
- log.error("Tablet closed consistency check has failed for {} giving up
and closing",
- this.extent);
- }
}
- try {
- getTabletMemory().getMemTable().delete(0);
- } catch (Exception t) {
- log.error("Failed to delete mem table : " + t.getMessage() + " for
tablet " + extent, t);
+ if (shouldPrepMinC) {
+ // must not run while tablet is locked, as this may result in deadlocks
+ mct.run();
Review Comment:
`MinorCompactionTask.run()` is done elsewhere as well, but other places do
not have tablet locked when calling `run()`
##########
server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java:
##########
@@ -937,70 +928,92 @@ synchronized void completeClose(boolean saveState) throws
IOException {
return currentlyUnreserved;
});
- if (log.isDebugEnabled() && System.nanoTime() - lastLogTime >
TimeUnit.SECONDS.toNanos(60)) {
- for (ScanDataSource activeScan : runningScans) {
- log.debug("Waiting on scan in completeClose {} {}", extent,
activeScan);
- }
-
- lastLogTime = System.nanoTime();
- }
+ long lastLogTime = System.nanoTime();
- try {
- log.debug("Waiting to completeClose for {}. {} writes {} scans",
extent, writesInProgress,
- runningScans.size());
- this.wait(50);
- } catch (InterruptedException e) {
- log.error("Interrupted waiting to completeClose for extent {}",
extent, e);
- }
- }
+ // wait for reads and writes to complete
+ while (writesInProgress > 0 || !runningScans.isEmpty()) {
+ runningScans.removeIf(scanDataSource -> {
+ boolean currentlyUnreserved =
disallowNewReservations(scanDataSource.getScanParameters());
+ if (currentlyUnreserved) {
+ log.debug("Disabled scan session in tablet close {} {}", extent,
scanDataSource);
+ }
+ return currentlyUnreserved;
+ });
- // It is assumed that nothing new would have been added to activeScans
since it was copied, so
- // check that assumption. At this point activeScans should be empty or
everything in it should
- // be disabled.
- Preconditions.checkState(activeScans.stream()
- .allMatch(scanDataSource ->
disallowNewReservations(scanDataSource.getScanParameters())));
+ if (log.isDebugEnabled()
+ && System.nanoTime() - lastLogTime > TimeUnit.SECONDS.toNanos(60))
{
+ for (ScanDataSource activeScan : runningScans) {
+ log.debug("Waiting on scan in completeClose {} {}", extent,
activeScan);
+ }
- getTabletMemory().waitForMinC();
+ lastLogTime = System.nanoTime();
+ }
- if (saveState && getTabletMemory().getMemTable().getNumEntries() > 0) {
- try {
- prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE).run();
- } catch (NoNodeException e) {
- throw new RuntimeException("Exception on " + extent + " during prep
for MinC", e);
+ try {
+ log.debug("Waiting to completeClose for {}. {} writes {} scans",
extent, writesInProgress,
+ runningScans.size());
+ this.wait(50);
+ } catch (InterruptedException e) {
+ log.error("Interrupted waiting to completeClose for extent {}",
extent, e);
+ }
}
- }
- if (saveState) {
- // at this point all tablet data is flushed, so do a consistency check
- RuntimeException err = null;
- for (int i = 0; i < 5; i++) {
+ // It is assumed that nothing new would have been added to activeScans
since it was copied, so
+ // check that assumption. At this point activeScans should be empty or
everything in it should
+ // be disabled.
+ Preconditions.checkState(activeScans.stream()
+ .allMatch(scanDataSource ->
disallowNewReservations(scanDataSource.getScanParameters())));
+
+ getTabletMemory().waitForMinC();
+
+ shouldPrepMinC = saveState &&
getTabletMemory().getMemTable().getNumEntries() > 0;
+ if (shouldPrepMinC) {
try {
- closeConsistencyCheck();
- err = null;
- } catch (RuntimeException t) {
- err = t;
- log.error("Consistency check fails, retrying", t);
- sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
+ mct = prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE);
+ } catch (NoNodeException e) {
+ throw new RuntimeException("Exception on " + extent + " during prep
for MinC", e);
}
Review Comment:
The only part moved out of the sync block in this PR is `mct.run()`
##########
server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java:
##########
@@ -1288,8 +1299,6 @@ public boolean beginUpdatingLogsUsed(InMemoryMap
memTable, DfsLogger more, boole
boolean releaseLock = true;
- // Should not hold the tablet lock while trying to acquire the log lock
because this could lead
- // to deadlock. However there is a path in the code that does this. See
#3759
Review Comment:
I don't think this is the case anymore. I think this can be deleted and
#3759 can be closed
##########
server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java:
##########
@@ -937,70 +928,92 @@ synchronized void completeClose(boolean saveState) throws
IOException {
return currentlyUnreserved;
});
- if (log.isDebugEnabled() && System.nanoTime() - lastLogTime >
TimeUnit.SECONDS.toNanos(60)) {
- for (ScanDataSource activeScan : runningScans) {
- log.debug("Waiting on scan in completeClose {} {}", extent,
activeScan);
- }
-
- lastLogTime = System.nanoTime();
- }
+ long lastLogTime = System.nanoTime();
- try {
- log.debug("Waiting to completeClose for {}. {} writes {} scans",
extent, writesInProgress,
- runningScans.size());
- this.wait(50);
- } catch (InterruptedException e) {
- log.error("Interrupted waiting to completeClose for extent {}",
extent, e);
- }
- }
+ // wait for reads and writes to complete
+ while (writesInProgress > 0 || !runningScans.isEmpty()) {
+ runningScans.removeIf(scanDataSource -> {
+ boolean currentlyUnreserved =
disallowNewReservations(scanDataSource.getScanParameters());
+ if (currentlyUnreserved) {
+ log.debug("Disabled scan session in tablet close {} {}", extent,
scanDataSource);
+ }
+ return currentlyUnreserved;
+ });
- // It is assumed that nothing new would have been added to activeScans
since it was copied, so
- // check that assumption. At this point activeScans should be empty or
everything in it should
- // be disabled.
- Preconditions.checkState(activeScans.stream()
- .allMatch(scanDataSource ->
disallowNewReservations(scanDataSource.getScanParameters())));
+ if (log.isDebugEnabled()
+ && System.nanoTime() - lastLogTime > TimeUnit.SECONDS.toNanos(60))
{
+ for (ScanDataSource activeScan : runningScans) {
+ log.debug("Waiting on scan in completeClose {} {}", extent,
activeScan);
+ }
- getTabletMemory().waitForMinC();
+ lastLogTime = System.nanoTime();
+ }
- if (saveState && getTabletMemory().getMemTable().getNumEntries() > 0) {
- try {
- prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE).run();
- } catch (NoNodeException e) {
- throw new RuntimeException("Exception on " + extent + " during prep
for MinC", e);
+ try {
+ log.debug("Waiting to completeClose for {}. {} writes {} scans",
extent, writesInProgress,
+ runningScans.size());
+ this.wait(50);
+ } catch (InterruptedException e) {
+ log.error("Interrupted waiting to completeClose for extent {}",
extent, e);
+ }
}
- }
- if (saveState) {
- // at this point all tablet data is flushed, so do a consistency check
- RuntimeException err = null;
- for (int i = 0; i < 5; i++) {
+ // It is assumed that nothing new would have been added to activeScans
since it was copied, so
+ // check that assumption. At this point activeScans should be empty or
everything in it should
+ // be disabled.
+ Preconditions.checkState(activeScans.stream()
+ .allMatch(scanDataSource ->
disallowNewReservations(scanDataSource.getScanParameters())));
+
+ getTabletMemory().waitForMinC();
+
+ shouldPrepMinC = saveState &&
getTabletMemory().getMemTable().getNumEntries() > 0;
+ if (shouldPrepMinC) {
try {
- closeConsistencyCheck();
- err = null;
- } catch (RuntimeException t) {
- err = t;
- log.error("Consistency check fails, retrying", t);
- sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
+ mct = prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE);
+ } catch (NoNodeException e) {
+ throw new RuntimeException("Exception on " + extent + " during prep
for MinC", e);
}
Review Comment:
done while tablet is locked since all other uses of `getMemTable()` are done
when tablet is locked. Also `prepareForMinC()` will lock tablet, so best to
keep that call in the `sync (this)` block.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]