ndimiduk commented on a change in pull request #1305: HBASE-23984 [Flakey Tests] TestMasterAbortAndRSGotKilled fails in tea… URL: https://github.com/apache/hbase/pull/1305#discussion_r394687434
########## File path: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java ########## @@ -3185,71 +3195,61 @@ private void closeRegionIgnoreErrors(RegionInfo region, final boolean abort) { * If a close was in progress, this new request will be ignored, and an exception thrown. * </p> * - * @param encodedName Region to close * @param abort True if we are aborting + * @param destination Where the Region is being moved too... maybe null if unknown. * @return True if closed a region. * @throws NotServingRegionException if the region is not online */ - protected boolean closeRegion(String encodedName, final boolean abort, final ServerName sn) - throws NotServingRegionException { - //Check for permissions to close. - HRegion actualRegion = this.getRegion(encodedName); - // Can be null if we're calling close on a region that's not online - if ((actualRegion != null) && (actualRegion.getCoprocessorHost() != null)) { - try { - actualRegion.getCoprocessorHost().preClose(false); - } catch (IOException exp) { - LOG.warn("Unable to close region: the coprocessor launched an error ", exp); - return false; - } - } - - // previous can come back 'null' if not in map. - final Boolean previous = this.regionsInTransitionInRS.putIfAbsent(Bytes.toBytes(encodedName), - Boolean.FALSE); - + protected boolean closeRegion(String encodedRegionName, final boolean abort, + @Nullable final ServerName destination) throws NotServingRegionException { + // TODO: Check for perms to close. + // Ideally we'd shove a bunch of the below logic into CloseRegionHandler only we have to do + // some handling inline w/ the request such as throwing NotServervingRegionException if we + // are not hosting so client gets the message directly; can't do this from inside an + // executor service. + Region region = getRegion(encodedRegionName); + if (region == null) { + // Changed behavior. We'd proceed though Region not online. + // The master deletes the znode when it receives this exception. + throw new NotServingRegionException(encodedRegionName); + } + byte [] encodedRegionNameAsBytes = Bytes.toBytes(encodedRegionName); + // This getting from this.regionsInTransitionInRS and checking the return is done in a few + // places, mostly over in Handlers; this.regionsInTransitionInRS is how we ensure we don't + // clash opens/closes or duplicate closes when one is ongoing. FYI. + final Boolean previous = this.regionsInTransitionInRS. + putIfAbsent(encodedRegionNameAsBytes, Boolean.FALSE); if (Boolean.TRUE.equals(previous)) { - LOG.info("Received CLOSE for the region:" + encodedName + " , which we are already " + - "trying to OPEN. Cancelling OPENING."); - if (!regionsInTransitionInRS.replace(Bytes.toBytes(encodedName), previous, Boolean.FALSE)) { + LOG.info("Received CLOSE for {} which is OPENING. Cancelling OPENING.", encodedRegionName); + if (!this.regionsInTransitionInRS.replace(encodedRegionNameAsBytes, previous, + Boolean.FALSE)) { // The replace failed. That should be an exceptional case, but theoretically it can happen. - // We're going to try to do a standard close then. - LOG.warn("The opening for region " + encodedName + " was done before we could cancel it." + - " Doing a standard close now"); - return closeRegion(encodedName, abort, sn); + // We're going to try to rerun the close. + LOG.warn("The opening of {} was done before we could cancel it; running a new close now", + encodedRegionName); + return closeRegion(encodedRegionName, abort, destination); } // Let's get the region from the online region list again - actualRegion = this.getRegion(encodedName); - if (actualRegion == null) { // If already online, we still need to close it. - LOG.info("The opening previously in progress has been cancelled by a CLOSE request."); + region = this.getRegion(encodedRegionName); + if (region == null) { + LOG.info("The opening of {} has been cancelled by a CLOSE request", encodedRegionName); Review comment: also not your code, but "the opening of ...", I think this is an incorrect assumption. We don't know that the region opening was canceled.. could be a failed open, for example. All we know is that the region is not present in the list of online regions at this time. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services