bharatviswa504 commented on a change in pull request #1494: URL: https://github.com/apache/ozone/pull/1494#discussion_r640369321
########## File path: hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMInterServiceProtocolPB.java ########## @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.om.protocolPB; + +import org.apache.hadoop.hdds.annotation.InterfaceAudience; +import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ipc.ProtocolInfo; +import org.apache.hadoop.ozone.protocol.proto + .OzoneManagerInterServiceProtocolProtos.OzoneManagerInterService; +import org.apache.hadoop.security.KerberosInfo; +import org.apache.hadoop.security.token.TokenInfo; +import org.apache.hadoop.ozone.security.OzoneDelegationTokenSelector; + +/** + * Protocol used for communication between OMs. + */ +@ProtocolInfo(protocolName = + "org.apache.hadoop.ozone.om.protocol.OMInterServiceProtocol", + protocolVersion = 1) +@KerberosInfo( + serverPrincipal = OMConfigKeys.OZONE_OM_KERBEROS_PRINCIPAL_KEY) +@TokenInfo(OzoneDelegationTokenSelector.class) [email protected] Review comment: Looks like we need to add this new service to OMPolicyProvider and add the ACL property to ozone-default.xml To allow service-level authorization for this rpc service. ########## File path: hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMInterServiceProtocolPB.java ########## @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.om.protocolPB; + +import org.apache.hadoop.hdds.annotation.InterfaceAudience; +import org.apache.hadoop.ozone.om.OMConfigKeys; +import org.apache.hadoop.ipc.ProtocolInfo; +import org.apache.hadoop.ozone.protocol.proto + .OzoneManagerInterServiceProtocolProtos.OzoneManagerInterService; +import org.apache.hadoop.security.KerberosInfo; +import org.apache.hadoop.security.token.TokenInfo; +import org.apache.hadoop.ozone.security.OzoneDelegationTokenSelector; + +/** + * Protocol used for communication between OMs. + */ +@ProtocolInfo(protocolName = + "org.apache.hadoop.ozone.om.protocol.OMInterServiceProtocol", + protocolVersion = 1) +@KerberosInfo( + serverPrincipal = OMConfigKeys.OZONE_OM_KERBEROS_PRINCIPAL_KEY) +@TokenInfo(OzoneDelegationTokenSelector.class) Review comment: Question: Do we need to add TokenInfo for this Protocol? As for this, the user should come with a Kerberos credential only for bootstrap right? Or in general when do we add TokenInfo only when we allow that service with token auth? cc @xiaoyuyao ########## File path: hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMInterServiceProtocol.java ########## @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.om.protocol; + +import java.io.Closeable; +import java.io.IOException; +import org.apache.hadoop.ozone.om.helpers.OMNodeDetails; + +/** + * Protocol for inter OM communication. + */ +public interface OMInterServiceProtocol extends Closeable { Review comment: Do we need to add here also KerberosInfo. From other Protocol classes which are there in ozone, we have the info here also. Not sure mandatory or not here. ########## File path: hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java ########## @@ -352,7 +361,12 @@ public MiniOzoneCluster build() throws IOException { numOfActiveOMs = numOfOMs; } - // If num of ActiveOMs is not set, set it to numOfOMs. + // If num of OMs it not set, set it to 1. Review comment: Minor: ActiveOMs -> ActiiiveSCMs OMs -> SCMs ########## File path: hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerBootstrap.java ########## @@ -0,0 +1,212 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import com.google.common.base.Supplier; +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdfs.server.common.Storage; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.Timeout; + +import static org.apache.hadoop.ozone.OzoneConsts.SCM_DUMMY_SERVICE_ID; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_RATIS_SERVER_REQUEST_TIMEOUT_DEFAULT; +import static org.apache.hadoop.ozone.om.TestOzoneManagerHA.createKey; + +public class TestOzoneManagerBootstrap { + + @Rule + public ExpectedException exception = ExpectedException.none(); + + @Rule + public Timeout timeout = new Timeout(500_000); + + private MiniOzoneHAClusterImpl cluster = null; + private ObjectStore objectStore; + private OzoneConfiguration conf; + private final String clusterId = UUID.randomUUID().toString(); + private final String scmId = UUID.randomUUID().toString(); + + private static final int NUM_INITIAL_OMS = 3; + + private static final String OM_SERVICE_ID = "om-bootstrap"; + private static final String VOLUME_NAME; + private static final String BUCKET_NAME; + + private long lastTransactionIndex; + + static { + VOLUME_NAME = "volume" + RandomStringUtils.randomNumeric(5); + BUCKET_NAME = "bucket" + RandomStringUtils.randomNumeric(5); + } + + private void setupCluster() throws Exception { + setupCluster(NUM_INITIAL_OMS); + } + + private void setupCluster(int numInitialOMs) throws Exception { + conf = new OzoneConfiguration(); + cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf) + .setClusterId(clusterId) + .setScmId(scmId) + .setSCMServiceId(SCM_DUMMY_SERVICE_ID) + .setOMServiceId(OM_SERVICE_ID) + .setNumOfOzoneManagers(numInitialOMs) + .build(); + cluster.waitForClusterToBeReady(); + objectStore = OzoneClientFactory.getRpcClient(OM_SERVICE_ID, conf) + .getObjectStore(); + + // Perform some transactions + objectStore.createVolume(VOLUME_NAME); + OzoneVolume volume = objectStore.getVolume(VOLUME_NAME); + volume.createBucket(BUCKET_NAME); + OzoneBucket bucket = volume.getBucket(BUCKET_NAME); + createKey(bucket); + + lastTransactionIndex = cluster.getOMLeader().getRatisSnapshotIndex(); Review comment: We commit to DB once double buffer flushes. Do you think we need some wait or read from StateMachine and getLastAppliedIndex? ########## File path: hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java ########## @@ -652,10 +667,158 @@ private void initOMHAConfig(int basePort) throws IOException { conf.setInt(omRatisPortKey, port + 4); } - conf.set(omNodesKey, omNodesKeyValue.substring(1)); + conf.set(omNodesKey, String.join(",", omNodeIds)); + } + } + + /** + * Bootstrap new OM and add to existing OM HA service ring. + * @return new OM nodeId + */ + public void bootstrapOzoneManager(String omNodeId) throws Exception { + + int basePort; + int retryCount = 0; + + OzoneManager om = null; + + long leaderSnapshotIndex = getOMLeader().getRatisSnapshotIndex(); + + while (true) { + try { + basePort = 10000 + RANDOM.nextInt(1000) * 4; + OzoneConfiguration newConf = addNewOMToConfig(getOMServiceId(), + omNodeId, basePort); + + om = bootstrapNewOM(omNodeId); + + // Get the CertClient from an existing OM and set for new OM + if (omhaService.getServiceByIndex(0).getCertificateClient() != null) { + om.setCertClient( Review comment: Question: Not understood the reason to do this. As our MiniOzoneHACluster is only used in non-secure, why certClient is being set here? ########## File path: hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerStateMachine.java ########## @@ -166,6 +167,29 @@ public void notifyTermIndexUpdated(long currentTerm, long index) { computeAndUpdateLastAppliedIndex(index, currentTerm, null, false); } + /** + * Called to notify state machine about configuration changes. + * Configurations changes include addition of newly bootstrapped OM. + */ + @Override + public void notifyConfigurationChanged(long term, long index, + RaftProtos.RaftConfigurationProto newRaftConfiguration) { + List<RaftProtos.RaftPeerProto> newPeers = + newRaftConfiguration.getPeersList(); + LOG.info("Received Configuration change notification from Ratis. New Peer" + + " list:\n{}", newPeers); + for (RaftProtos.RaftPeerProto raftPeerProto : newPeers) { + String omNodeId = RaftPeerId.valueOf(raftPeerProto.getId()).toString(); + if (!ozoneManager.doesPeerExist(omNodeId)) { + LOG.info("Adding new OM {} to the Peer list.", omNodeId); + ozoneManager.addOMNodeToPeers(omNodeId); Review comment: This is reading the values from config, I think if all required information is passed like httpAddress, httpsAddress and any other required info, we can bootstrap OM's with out restart. (As now if we read from config, we except the new OM details should be in config before) ########## File path: hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerBootstrap.java ########## @@ -0,0 +1,212 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import com.google.common.base.Supplier; +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdfs.server.common.Storage; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneBucket; +import org.apache.hadoop.ozone.client.OzoneClientFactory; +import org.apache.hadoop.ozone.client.OzoneVolume; +import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.rules.Timeout; + +import static org.apache.hadoop.ozone.OzoneConsts.SCM_DUMMY_SERVICE_ID; +import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_RATIS_SERVER_REQUEST_TIMEOUT_DEFAULT; +import static org.apache.hadoop.ozone.om.TestOzoneManagerHA.createKey; + +public class TestOzoneManagerBootstrap { + + @Rule + public ExpectedException exception = ExpectedException.none(); + + @Rule + public Timeout timeout = new Timeout(500_000); + + private MiniOzoneHAClusterImpl cluster = null; + private ObjectStore objectStore; + private OzoneConfiguration conf; + private final String clusterId = UUID.randomUUID().toString(); + private final String scmId = UUID.randomUUID().toString(); + + private static final int NUM_INITIAL_OMS = 3; + + private static final String OM_SERVICE_ID = "om-bootstrap"; + private static final String VOLUME_NAME; + private static final String BUCKET_NAME; + + private long lastTransactionIndex; + + static { + VOLUME_NAME = "volume" + RandomStringUtils.randomNumeric(5); + BUCKET_NAME = "bucket" + RandomStringUtils.randomNumeric(5); + } + + private void setupCluster() throws Exception { + setupCluster(NUM_INITIAL_OMS); + } + + private void setupCluster(int numInitialOMs) throws Exception { + conf = new OzoneConfiguration(); + cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf) + .setClusterId(clusterId) + .setScmId(scmId) + .setSCMServiceId(SCM_DUMMY_SERVICE_ID) + .setOMServiceId(OM_SERVICE_ID) + .setNumOfOzoneManagers(numInitialOMs) + .build(); + cluster.waitForClusterToBeReady(); + objectStore = OzoneClientFactory.getRpcClient(OM_SERVICE_ID, conf) + .getObjectStore(); + + // Perform some transactions + objectStore.createVolume(VOLUME_NAME); + OzoneVolume volume = objectStore.getVolume(VOLUME_NAME); + volume.createBucket(BUCKET_NAME); + OzoneBucket bucket = volume.getBucket(BUCKET_NAME); + createKey(bucket); + + lastTransactionIndex = cluster.getOMLeader().getRatisSnapshotIndex(); + } + + @After + public void shutdown() throws Exception { + if (cluster != null) { + cluster.shutdown(); + } + } + + private void assertNewOMExistsInPeerList(String nodeId) throws Exception { + for (OzoneManager om : cluster.getOzoneManagersList()) { + Assert.assertTrue("New OM node " + nodeId + " not present in Peer list " + + "of OM " + om.getOMNodeId(), om.doesPeerExist(nodeId)); + } + OzoneManager newOM = cluster.getOzoneManager(nodeId); + GenericTestUtils.waitFor(new Supplier<Boolean>() { + @Override + public Boolean get() { + try { + return newOM.getRatisSnapshotIndex() >= lastTransactionIndex; + } catch (IOException e) { + return false; + } + } + }, 100, 10000); Review comment: Increase time duration here to avoid flakiness in CI runs. And also as method name says check OM Exists in peerList, can we check from ratis server peer info also? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
