This is an automated email from the ASF dual-hosted git repository.

vjasani pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.6 by this push:
     new d3c6d7c80df HBASE-29180 Apply fail-fast retry limit for 
UnknownHostException (#6813)
d3c6d7c80df is described below

commit d3c6d7c80df93ea4f96b55276dc35a07177a16f3
Author: Viraj Jasani <[email protected]>
AuthorDate: Tue Mar 18 17:13:17 2025 -0700

    HBASE-29180 Apply fail-fast retry limit for UnknownHostException (#6813)
    
    Signed-off-by: Andrew Purtell <[email protected]>
---
 .../hbase/master/procedure/RSProcedureDispatcher.java    | 16 ++++++++++------
 .../org/apache/hadoop/hbase/util/RSProcDispatcher.java   | 14 +++++++++++---
 .../org/apache/hadoop/hbase/util/TestProcDispatcher.java |  2 +-
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java
index 260b012339e..f255c918ceb 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master.procedure;
 
 import java.io.IOException;
 import java.lang.Thread.UncaughtExceptionHandler;
+import java.net.UnknownHostException;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
@@ -411,19 +412,22 @@ public class RSProcedureDispatcher extends 
RemoteProcedureDispatcher<MasterProce
     }
 
     /**
-     * Returns true if the error or its cause is of type 
ConnectionClosedException.
+     * Returns true if the error or its cause indicates a network connection 
issue.
      * @param e IOException thrown by the underlying rpc framework.
-     * @return True if the error or its cause is of type 
ConnectionClosedException.
+     * @return True if the error or its cause indicates a network connection 
issue.
      */
-    private boolean isConnectionClosedError(IOException e) {
-      if (e instanceof ConnectionClosedException) {
+    private boolean isNetworkError(IOException e) {
+      if (e instanceof ConnectionClosedException || e instanceof 
UnknownHostException) {
         return true;
       }
       Throwable cause = e;
       while (true) {
         if (cause instanceof IOException) {
           IOException unwrappedCause = unwrapException((IOException) cause);
-          if (unwrappedCause instanceof ConnectionClosedException) {
+          if (
+            unwrappedCause instanceof ConnectionClosedException
+              || unwrappedCause instanceof UnknownHostException
+          ) {
             return true;
           }
         }
@@ -440,7 +444,7 @@ public class RSProcedureDispatcher extends 
RemoteProcedureDispatcher<MasterProce
      * @return True if the error type can allow fail-fast.
      */
     private boolean isErrorTypeFailFast(IOException e) {
-      return e instanceof CallQueueTooBigException || isSaslError(e) || 
isConnectionClosedError(e);
+      return e instanceof CallQueueTooBigException || isSaslError(e) || 
isNetworkError(e);
     }
 
     private long getMaxWaitTime() {
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RSProcDispatcher.java 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RSProcDispatcher.java
index ae0775af3e2..4180238ca6c 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RSProcDispatcher.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RSProcDispatcher.java
@@ -18,6 +18,9 @@
 package org.apache.hadoop.hbase.util;
 
 import java.io.IOException;
+import java.net.UnknownHostException;
+import java.util.Arrays;
+import java.util.List;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.hadoop.hbase.ServerName;
@@ -39,7 +42,12 @@ public class RSProcDispatcher extends RSProcedureDispatcher {
 
   private static final Logger LOG = 
LoggerFactory.getLogger(RSProcDispatcher.class);
 
-  private static final AtomicInteger i = new AtomicInteger();
+  private static final AtomicInteger I = new AtomicInteger();
+
+  private static final List<IOException> ERRORS =
+    Arrays.asList(new ConnectionClosedException("test connection closed 
error..."),
+      new UnknownHostException("test unknown host error..."));
+  private static final AtomicInteger ERROR_IDX = new AtomicInteger();
 
   public RSProcDispatcher(MasterServices master) {
     super(master);
@@ -66,7 +74,7 @@ public class RSProcDispatcher extends RSProcedureDispatcher {
     @Override
     public AdminProtos.ExecuteProceduresResponse sendRequest(final ServerName 
serverName,
       final AdminProtos.ExecuteProceduresRequest request) throws IOException {
-      int j = i.addAndGet(1);
+      int j = I.addAndGet(1);
       LOG.info("sendRequest() req: {} , j: {}", request, j);
       if (j == 12 || j == 22) {
         // Execute the remote close and open region requests in the last (5th) 
retry before
@@ -84,7 +92,7 @@ public class RSProcDispatcher extends RSProcedureDispatcher {
       // schedules recoveries for the server.
       // We will have ABNORMALLY_CLOSED regions, and they are expected to 
recover on their own.
       if (j >= 10 && j <= 15 || j >= 18 && j <= 23) {
-        throw new ConnectionClosedException("test connection closed error...");
+        throw ERRORS.get(ERROR_IDX.getAndIncrement() % ERRORS.size());
       }
       try {
         return getRsAdmin().executeProcedures(null, request);
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestProcDispatcher.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestProcDispatcher.java
index 5b91879e1f8..740a65f2b61 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestProcDispatcher.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestProcDispatcher.java
@@ -163,7 +163,7 @@ public class TestProcDispatcher {
               == ProcedureProtos.ProcedureState.SUCCESS)
           .count() == 
master.getMasterProcedureExecutor().getProcedures().size()
         && master.getMasterProcedureExecutor().getProcedures().stream()
-          .filter(proc -> proc instanceof ServerCrashProcedure).count() > 0;
+          .anyMatch(proc -> proc instanceof ServerCrashProcedure);
     });
 
     // Ensure we have no inconsistent regions

Reply via email to