Apache9 commented on code in PR #8100:
URL: https://github.com/apache/hbase/pull/8100#discussion_r3104239438
##
hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClusterIdFetcher.java:
##
@@ -88,10 +91,9 @@ class ClusterIdFetcher {
private void getClusterId(int index) {
ServerName server = bootstrapServers.get(index);
LOG.debug("Going to request {} for getting cluster id", server);
-// user and rpcTimeout are both not important here, as we will not
actually send any rpc calls
-// out, only a preamble connection header, but if we pass null as user,
there will be NPE in
-// some code paths...
-RpcChannel channel = rpcClient.createRpcChannel(server, user, 0);
+// The preamble exchange still requires TCP connect and potentially TLS
negotiation, so we use
+// the configured rpc timeout to bound the connection attempt.
+RpcChannel channel = rpcClient.createRpcChannel(server, user,
rpcTimeoutMs);
Review Comment:
We only send a preamble header here, I wonder whether the rpc timeout
actually work. And I can make sure that we do not have TLS negotiation when
sending preamble header.
##
hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestConnectionRegistryRpcClientFailure.java:
##
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.IOException;
+import java.net.SocketAddress;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseCommonTestingUtil;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.ipc.RpcClient;
+import org.apache.hadoop.hbase.ipc.RpcClientFactory;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.testclassification.ClientTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.FutureUtils;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+
+import org.apache.hbase.thirdparty.com.google.protobuf.BlockingRpcChannel;
+import
org.apache.hbase.thirdparty.com.google.protobuf.Descriptors.MethodDescriptor;
+import org.apache.hbase.thirdparty.com.google.protobuf.Message;
+import org.apache.hbase.thirdparty.com.google.protobuf.RpcCallback;
+import org.apache.hbase.thirdparty.com.google.protobuf.RpcChannel;
+import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
+
+import
org.apache.hadoop.hbase.shaded.protobuf.generated.RegistryProtos.ConnectionRegistryService;
+import
org.apache.hadoop.hbase.shaded.protobuf.generated.RegistryProtos.GetConnectionRegistryResponse;
+
+/**
+ * Test that ConnectionRegistryRpcStubHolder properly completes its future
when RPC client creation
+ * fails. Before the fix, an exception thrown by
RpcClientFactory.createClient() inside the
+ * FutureUtils.addListener callback would be swallowed, leaving the
CompletableFuture permanently
+ * incomplete and hanging all callers.
+ */
+@Tag(ClientTests.TAG)
+@Tag(SmallTests.TAG)
+public class TestConnectionRegistryRpcClientFailure {
+
+ private static final String HEDGED_REQS_FANOUT_CONFIG_NAME =
"hbase.test.hedged.reqs.fanout";
+ private static final String INITIAL_DELAY_SECS_CONFIG_NAME =
+"hbase.test.refresh.initial.delay.secs";
+ private static final String REFRESH_INTERVAL_SECS_CONFIG_NAME =
+"hbase.test.refresh.interval.secs";
+ private static final String MIN_REFRESH_INTERVAL_SECS_CONFIG_NAME =
+"hbase.test.min.refresh.interval.secs";
+
+ private static final HBaseCommonTestingUtil UTIL = new
HBaseCommonTestingUtil();
+
+ private static Set BOOTSTRAP_NODES;
+
+ /**
+ * RPC client that succeeds for the preamble cluster ID fetch (clusterId is
null) but throws on
+ * the rea