adoroszlai commented on code in PR #3297: URL: https://github.com/apache/ozone/pull/3297#discussion_r848849361
########## hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/GrpcOmTransport.java: ########## @@ -0,0 +1,330 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.om.protocolPB; + +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.security.cert.X509Certificate; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.HashMap; +import java.util.Map; + +import com.google.common.net.HostAndPort; +import io.grpc.Status; +import io.grpc.StatusRuntimeException; +import org.apache.hadoop.ipc.RemoteException; + +import org.apache.hadoop.hdds.conf.Config; +import org.apache.hadoop.hdds.conf.ConfigGroup; +import org.apache.hadoop.hdds.conf.ConfigTag; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.security.x509.SecurityConfig; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.om.exceptions.OMException; +import org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; +import org.apache.hadoop.security.UserGroupInformation; + +import org.apache.hadoop.ozone.om.ha.GrpcOMFailoverProxyProvider; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerServiceGrpc; +import io.grpc.ManagedChannel; +import io.grpc.netty.GrpcSslContexts; +import io.grpc.netty.NettyChannelBuilder; +import io.netty.handler.ssl.SslContextBuilder; + +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.ozone.om.OMConfigKeys + .OZONE_OM_GRPC_MAXIMUM_RESPONSE_LENGTH; +import static org.apache.hadoop.ozone.om.OMConfigKeys + .OZONE_OM_GRPC_MAXIMUM_RESPONSE_LENGTH_DEFAULT; + +/** + * Grpc transport for grpc between s3g and om. + */ +public class GrpcOmTransport implements OmTransport { + private static final Logger LOG = + LoggerFactory.getLogger(GrpcOmTransport.class); + + private static final String CLIENT_NAME = "GrpcOmTransport"; + private final AtomicBoolean isRunning = new AtomicBoolean(false); + + // gRPC specific + private static List<X509Certificate> caCerts = null; + + private OzoneManagerServiceGrpc.OzoneManagerServiceBlockingStub client; + private Map<String, + OzoneManagerServiceGrpc.OzoneManagerServiceBlockingStub> clients; + private Map<String, ManagedChannel> channels; + private int lastVisited = -1; + private ConfigurationSource conf; + + private AtomicReference<String> host; + private int maxSize; + private SecurityConfig secConfig; + + public static void setCaCerts(List<X509Certificate> x509Certificates) { + caCerts = x509Certificates; + } + + private List<String> oms; + private RetryPolicy retryPolicy; + private int failoverCount = 0; + private GrpcOMFailoverProxyProvider<OzoneManagerProtocolPB> + omFailoverProxyProvider; + + public GrpcOmTransport(ConfigurationSource conf, + UserGroupInformation ugi, String omServiceId) + throws IOException { + + this.channels = new HashMap<>(); + this.clients = new HashMap<>(); + this.conf = conf; + this.host = new AtomicReference(); + + secConfig = new SecurityConfig(conf); + maxSize = conf.getInt(OZONE_OM_GRPC_MAXIMUM_RESPONSE_LENGTH, + OZONE_OM_GRPC_MAXIMUM_RESPONSE_LENGTH_DEFAULT); Review Comment: I don't see this variable being used. Where is the setting applied? ########## hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/GrpcOmTransport.java: ########## @@ -0,0 +1,330 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.om.protocolPB; + +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.security.cert.X509Certificate; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.HashMap; +import java.util.Map; + +import com.google.common.net.HostAndPort; +import io.grpc.Status; +import io.grpc.StatusRuntimeException; +import org.apache.hadoop.ipc.RemoteException; + +import org.apache.hadoop.hdds.conf.Config; +import org.apache.hadoop.hdds.conf.ConfigGroup; +import org.apache.hadoop.hdds.conf.ConfigTag; +import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.security.x509.SecurityConfig; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.ozone.OzoneConfigKeys; +import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.hadoop.ozone.om.exceptions.OMException; +import org.apache.hadoop.ozone.om.exceptions.OMException.ResultCodes; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse; +import org.apache.hadoop.security.UserGroupInformation; + +import org.apache.hadoop.ozone.om.ha.GrpcOMFailoverProxyProvider; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerServiceGrpc; +import io.grpc.ManagedChannel; +import io.grpc.netty.GrpcSslContexts; +import io.grpc.netty.NettyChannelBuilder; +import io.netty.handler.ssl.SslContextBuilder; + +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.ozone.om.OMConfigKeys + .OZONE_OM_GRPC_MAXIMUM_RESPONSE_LENGTH; +import static org.apache.hadoop.ozone.om.OMConfigKeys + .OZONE_OM_GRPC_MAXIMUM_RESPONSE_LENGTH_DEFAULT; + +/** + * Grpc transport for grpc between s3g and om. + */ +public class GrpcOmTransport implements OmTransport { + private static final Logger LOG = + LoggerFactory.getLogger(GrpcOmTransport.class); + + private static final String CLIENT_NAME = "GrpcOmTransport"; + private final AtomicBoolean isRunning = new AtomicBoolean(false); + + // gRPC specific + private static List<X509Certificate> caCerts = null; + + private OzoneManagerServiceGrpc.OzoneManagerServiceBlockingStub client; + private Map<String, + OzoneManagerServiceGrpc.OzoneManagerServiceBlockingStub> clients; + private Map<String, ManagedChannel> channels; + private int lastVisited = -1; + private ConfigurationSource conf; + + private AtomicReference<String> host; + private int maxSize; + private SecurityConfig secConfig; + + public static void setCaCerts(List<X509Certificate> x509Certificates) { + caCerts = x509Certificates; + } + + private List<String> oms; + private RetryPolicy retryPolicy; + private int failoverCount = 0; + private GrpcOMFailoverProxyProvider<OzoneManagerProtocolPB> + omFailoverProxyProvider; + + public GrpcOmTransport(ConfigurationSource conf, + UserGroupInformation ugi, String omServiceId) + throws IOException { + + this.channels = new HashMap<>(); + this.clients = new HashMap<>(); + this.conf = conf; + this.host = new AtomicReference(); + + secConfig = new SecurityConfig(conf); + maxSize = conf.getInt(OZONE_OM_GRPC_MAXIMUM_RESPONSE_LENGTH, + OZONE_OM_GRPC_MAXIMUM_RESPONSE_LENGTH_DEFAULT); + + omFailoverProxyProvider = new GrpcOMFailoverProxyProvider( + conf, + ugi, + omServiceId, + OzoneManagerProtocolPB.class); + + start(); + } + + public void start() throws IOException { + host.set(omFailoverProxyProvider + .getGrpcProxyAddress( + omFailoverProxyProvider.getCurrentProxyOMNodeId())); + + if (!isRunning.compareAndSet(false, true)) { + LOG.info("Ignore. already started."); + return; + } + + List<String> nodes = omFailoverProxyProvider.getGrpcOmNodeIDList(); + for (String nodeId : nodes) { + String hostaddr = omFailoverProxyProvider.getGrpcProxyAddress(nodeId); + HostAndPort hp = HostAndPort.fromString(hostaddr); + + NettyChannelBuilder channelBuilder = + NettyChannelBuilder.forAddress(hp.getHost(), hp.getPort()) + .usePlaintext() + .maxInboundMessageSize(OzoneConsts.OZONE_SCM_CHUNK_MAX_SIZE); + + if (secConfig.isGrpcTlsEnabled()) { + try { + SslContextBuilder sslContextBuilder = GrpcSslContexts.forClient(); + if (secConfig.isSecurityEnabled()) { + if (caCerts != null) { + sslContextBuilder.trustManager(caCerts); + } else { + LOG.error("x509Certicates empty"); + } + channelBuilder.useTransportSecurity(). + sslContext(sslContextBuilder.build()); + } else { + LOG.error("ozone.security not enabled when TLS specified," + + " using plaintext"); + } + } catch (Exception ex) { + LOG.error("cannot establish TLS for grpc om transport client"); + } + } else { + channelBuilder.usePlaintext(); + } + + channels.put(hostaddr, channelBuilder.build()); + clients.put(hostaddr, + OzoneManagerServiceGrpc + .newBlockingStub(channels.get(hostaddr))); + } + int maxFailovers = conf.getInt( + OzoneConfigKeys.OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY, + OzoneConfigKeys.OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT); + + retryPolicy = omFailoverProxyProvider.getRetryPolicy(maxFailovers); + LOG.info("{}: started", CLIENT_NAME); + } + + @Override + public OMResponse submitRequest(OMRequest payload) throws IOException { + OMResponse resp = null; + boolean tryOtherHost = true; + ResultCodes resultCode = ResultCodes.INTERNAL_ERROR; + while (tryOtherHost) { + tryOtherHost = false; + try { + resp = clients.get(host.get()).submitRequest(payload); + } catch (StatusRuntimeException e) { + if (e.getStatus().getCode() == Status.Code.UNAVAILABLE) { + resultCode = ResultCodes.TIMEOUT; + } + Exception exp = new Exception(e); + tryOtherHost = shouldRetry(unwrapException(exp)); + if (!tryOtherHost) { + throw new OMException(resultCode); + } + } + } + return resp; + } + + private Exception unwrapException(Exception ex) { + Exception grpcException = null; + try { + StatusRuntimeException srexp = + (StatusRuntimeException)ex.getCause(); + Status status = srexp.getStatus(); + LOG.debug("GRPC exception wrapped: {}", status.getDescription()); + if (status.getCode() == Status.Code.INTERNAL) { + // exception potentially generated by OzoneManagerServiceGrpc + Class<?> realClass = Class.forName(status.getDescription() + .substring(0, status.getDescription() + .indexOf(":"))); + Class<? extends Exception> cls = realClass + .asSubclass(Exception.class); + Constructor<? extends Exception> cn = cls.getConstructor(String.class); + cn.setAccessible(true); + grpcException = cn.newInstance(status.getDescription()); + IOException remote = null; + try { + String cause = status.getDescription(); + cause = cause.substring(cause.indexOf(":") + 2); + remote = new RemoteException(cause.substring(0, cause.indexOf(":")), + cause.substring(cause.indexOf(":") + 1)); + grpcException.initCause(remote); + } catch (Exception e) { + LOG.error("cannot get cause for remote exception"); + } + } else { + // exception generated by connection failure, gRPC + grpcException = ex; + } + } catch (Exception e) { + grpcException = new IOException(e); + LOG.error("error unwrapping exception from OMResponse {}"); + } + return grpcException; + } + + private boolean shouldRetry(Exception ex) { + boolean retry = false; + RetryPolicy.RetryAction action = null; + try { + action = retryPolicy.shouldRetry((Exception)ex, 0, failoverCount++, true); + LOG.debug("grpc failover retry action {}", action.action); + if (action.action == RetryPolicy.RetryAction.RetryDecision.FAIL) { + retry = false; + LOG.error("Retry request failed. " + action.reason, ex); + } else { + if (action.action == RetryPolicy.RetryAction.RetryDecision.RETRY || + (action.action == RetryPolicy.RetryAction.RetryDecision + .FAILOVER_AND_RETRY)) { + if (action.delayMillis > 0) { + try { + Thread.sleep(action.delayMillis); + } catch (Exception e) { + LOG.error("Error trying sleep thread for {}", action.delayMillis); + } + } + // switch om host to current proxy OMNodeId + omFailoverProxyProvider.performFailover(null); + host.set(omFailoverProxyProvider + .getGrpcProxyAddress( + omFailoverProxyProvider.getCurrentProxyOMNodeId())); + retry = true; + } + } + } catch (Exception e) { + LOG.error("Failed failover exception {}", e); + } + return retry; + } + + // stub implementation for interface + @Override + public Text getDelegationTokenService() { + return new Text(); + } + + public void shutdown() { + for (Map.Entry<String, ManagedChannel> entry : channels.entrySet()) { + ManagedChannel channel = entry.getValue(); + channel.shutdown(); + try { + channel.awaitTermination(5, TimeUnit.SECONDS); + } catch (Exception e) { + LOG.error("failed to shutdown OzoneManagerServiceGrpc channel {} : {}", + entry.getKey(), e); + } + } + } + + @Override + public void close() throws IOException { + shutdown(); + } + + /** + * GrpcOmTransport configuration in Java style configuration class. + */ + @ConfigGroup(prefix = "ozone.om.grpc") + public static final class GrpcOmTransportConfig { + @Config(key = "port", defaultValue = "8981", Review Comment: GrpcOmTransport.GrpcOmTransportConfig and GrpcOzoneManagerServer.GrpcOzoneManagerServerConfig both define the same configuration (`ozone.om.grpc.port`). I think we only need one of them. ########## hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java: ########## @@ -65,6 +64,8 @@ public void initializeMemberVariables() { xmlPrefixToSkipCompare.add("ipc.client.rpc-timeout.ms"); xmlPropsToSkipCompare.add("ozone.om.leader.election.minimum.timeout" + ".duration"); // Deprecated config + xmlPrefixToSkipCompare.add("ozone.s3g"); + configurationPrefixToSkipCompare.add("ozone.s3g"); Review Comment: All S3 config keys are ignored for the check? ########## hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto: ########## @@ -1354,9 +1354,9 @@ message UpdateGetS3SecretRequest { This will be used by OM to authenticate S3 gateway requests on a per request basis. */ message S3Authentication { - optional string stringToSign = 1; - optional string signature = 2; - optional string accessId = 3; + required string stringToSign = 1; + required string signature = 2; + required string accessId = 3; Review Comment: Should keep these `optional`? @kerneltime ########## hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestGrpcOzoneManagerServer.java: ########## @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + */ + +package org.apache.hadoop.ozone.om; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.Timeout; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.mockito.Mockito; +import org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB; + +/** + * Tests for GrpcOzoneManagerServer. + */ +public class TestGrpcOzoneManagerServer { + private static final Logger LOG = + LoggerFactory.getLogger(TestGrpcOzoneManagerServer.class); + private OzoneManager ozoneManager; + private OzoneManagerProtocolServerSideTranslatorPB omServerProtocol; + private GrpcOzoneManagerServer server; + + @Rule + public Timeout timeout = Timeout.seconds(30); + + @Test + public void testStartStop() throws Exception { + OzoneConfiguration conf = new OzoneConfiguration(); + ozoneManager = Mockito.mock(OzoneManager.class); + omServerProtocol = ozoneManager.getOmServerProtocol(); + + server = new GrpcOzoneManagerServer(conf, + omServerProtocol, + ozoneManager.getDelegationTokenMgr(), + ozoneManager.getCertificateClient()); + + try { + server.start(); + } catch (Exception e) { + e.printStackTrace(); Review Comment: Printing the stack trace would not cause the test to fail in case of exception. Seems like this test would always pass. ########## hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/protocolPB/TestGrpcOmTransport.java: ########## @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + * + */ + +package org.apache.hadoop.ozone.protocolPB; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.om.protocolPB.GrpcOmTransport; +import org.apache.hadoop.ozone.om.protocolPB.OmTransport; +import org.apache.hadoop.ozone.om.protocolPB.OmTransportFactory; +import org.apache.hadoop.security.UserGroupInformation; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.Timeout; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_TRANSPORT_CLASS; + +/** + * Tests for GrpcOmTransport. + */ +public class TestGrpcOmTransport { + + private static final Logger LOG = + LoggerFactory.getLogger(TestGrpcOmTransport.class); + @Rule + public Timeout timeout = Timeout.seconds(30); + + + @Test + public void testGrpcOmTransportFactory() throws Exception { + String omServiceId = ""; + String transportCls = GrpcOmTransport.class.getName(); + OzoneConfiguration conf = new OzoneConfiguration(); + conf.set(OZONE_OM_TRANSPORT_CLASS, + transportCls); + + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + OmTransport omTransport = OmTransportFactory.create(conf, ugi, omServiceId); + Assert.assertEquals(GrpcOmTransport.class.getSimpleName(), + omTransport.getClass().getSimpleName()); + + } + + @Test + public void testHrpcOmTransportFactory() throws Exception { + String omServiceId = ""; + OzoneConfiguration conf = new OzoneConfiguration(); + + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + OmTransport omTransport = OmTransportFactory.create(conf, ugi, omServiceId); + // OmTransport should be Hadoop Rpc and + // fail equality GrpcOmTransport equality test + Assert.assertNotEquals(GrpcOmTransport.class.getSimpleName(), + omTransport.getClass().getSimpleName()); + } + + @Test + public void testStartStop() throws Exception { + String omServiceId = ""; + OzoneConfiguration conf = new OzoneConfiguration(); + + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + GrpcOmTransport client = new GrpcOmTransport(conf, ugi, omServiceId); + + try { + client.start(); + } catch (Exception e) { + e.printStackTrace(); Review Comment: Same comment here about the test: seems like this test would always pass. ########## hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java: ########## @@ -49,8 +48,8 @@ public void initializeMemberVariables() { configurationClasses = new Class[] {OzoneConfigKeys.class, ScmConfigKeys.class, OMConfigKeys.class, HddsConfigKeys.class, + ReconServerConfigKeys.class, ReconConfigKeys.class, ReconServerConfigKeys.class, Review Comment: Nit: `ReconServerConfigKeys` is duplicated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
