This is an automated email from the ASF dual-hosted git repository.
vjasani pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new 069686540cc HBASE-29180 Apply fail-fast retry limit for
UnknownHostException (#6813)
069686540cc is described below
commit 069686540cc393aa69e02bc9bccb34f0a4acf935
Author: Viraj Jasani <[email protected]>
AuthorDate: Tue Mar 18 17:13:17 2025 -0700
HBASE-29180 Apply fail-fast retry limit for UnknownHostException (#6813)
Signed-off-by: Andrew Purtell <[email protected]>
---
.../hbase/master/procedure/RSProcedureDispatcher.java | 16 ++++++++++------
.../org/apache/hadoop/hbase/util/RSProcDispatcher.java | 14 +++++++++++---
.../org/apache/hadoop/hbase/util/TestProcDispatcher.java | 2 +-
3 files changed, 22 insertions(+), 10 deletions(-)
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java
index 260b012339e..f255c918ceb 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master.procedure;
import java.io.IOException;
import java.lang.Thread.UncaughtExceptionHandler;
+import java.net.UnknownHostException;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
@@ -411,19 +412,22 @@ public class RSProcedureDispatcher extends
RemoteProcedureDispatcher<MasterProce
}
/**
- * Returns true if the error or its cause is of type
ConnectionClosedException.
+ * Returns true if the error or its cause indicates a network connection
issue.
* @param e IOException thrown by the underlying rpc framework.
- * @return True if the error or its cause is of type
ConnectionClosedException.
+ * @return True if the error or its cause indicates a network connection
issue.
*/
- private boolean isConnectionClosedError(IOException e) {
- if (e instanceof ConnectionClosedException) {
+ private boolean isNetworkError(IOException e) {
+ if (e instanceof ConnectionClosedException || e instanceof
UnknownHostException) {
return true;
}
Throwable cause = e;
while (true) {
if (cause instanceof IOException) {
IOException unwrappedCause = unwrapException((IOException) cause);
- if (unwrappedCause instanceof ConnectionClosedException) {
+ if (
+ unwrappedCause instanceof ConnectionClosedException
+ || unwrappedCause instanceof UnknownHostException
+ ) {
return true;
}
}
@@ -440,7 +444,7 @@ public class RSProcedureDispatcher extends
RemoteProcedureDispatcher<MasterProce
* @return True if the error type can allow fail-fast.
*/
private boolean isErrorTypeFailFast(IOException e) {
- return e instanceof CallQueueTooBigException || isSaslError(e) ||
isConnectionClosedError(e);
+ return e instanceof CallQueueTooBigException || isSaslError(e) ||
isNetworkError(e);
}
private long getMaxWaitTime() {
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RSProcDispatcher.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RSProcDispatcher.java
index ae0775af3e2..4180238ca6c 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RSProcDispatcher.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RSProcDispatcher.java
@@ -18,6 +18,9 @@
package org.apache.hadoop.hbase.util;
import java.io.IOException;
+import java.net.UnknownHostException;
+import java.util.Arrays;
+import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.hbase.ServerName;
@@ -39,7 +42,12 @@ public class RSProcDispatcher extends RSProcedureDispatcher {
private static final Logger LOG =
LoggerFactory.getLogger(RSProcDispatcher.class);
- private static final AtomicInteger i = new AtomicInteger();
+ private static final AtomicInteger I = new AtomicInteger();
+
+ private static final List<IOException> ERRORS =
+ Arrays.asList(new ConnectionClosedException("test connection closed
error..."),
+ new UnknownHostException("test unknown host error..."));
+ private static final AtomicInteger ERROR_IDX = new AtomicInteger();
public RSProcDispatcher(MasterServices master) {
super(master);
@@ -66,7 +74,7 @@ public class RSProcDispatcher extends RSProcedureDispatcher {
@Override
public AdminProtos.ExecuteProceduresResponse sendRequest(final ServerName
serverName,
final AdminProtos.ExecuteProceduresRequest request) throws IOException {
- int j = i.addAndGet(1);
+ int j = I.addAndGet(1);
LOG.info("sendRequest() req: {} , j: {}", request, j);
if (j == 12 || j == 22) {
// Execute the remote close and open region requests in the last (5th)
retry before
@@ -84,7 +92,7 @@ public class RSProcDispatcher extends RSProcedureDispatcher {
// schedules recoveries for the server.
// We will have ABNORMALLY_CLOSED regions, and they are expected to
recover on their own.
if (j >= 10 && j <= 15 || j >= 18 && j <= 23) {
- throw new ConnectionClosedException("test connection closed error...");
+ throw ERRORS.get(ERROR_IDX.getAndIncrement() % ERRORS.size());
}
try {
return getRsAdmin().executeProcedures(null, request);
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestProcDispatcher.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestProcDispatcher.java
index 5b91879e1f8..740a65f2b61 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestProcDispatcher.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestProcDispatcher.java
@@ -163,7 +163,7 @@ public class TestProcDispatcher {
== ProcedureProtos.ProcedureState.SUCCESS)
.count() ==
master.getMasterProcedureExecutor().getProcedures().size()
&& master.getMasterProcedureExecutor().getProcedures().stream()
- .filter(proc -> proc instanceof ServerCrashProcedure).count() > 0;
+ .anyMatch(proc -> proc instanceof ServerCrashProcedure);
});
// Ensure we have no inconsistent regions