[ 
https://issues.apache.org/jira/browse/HDDS-2034?focusedWorklogId=316026&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-316026
 ]

ASF GitHub Bot logged work on HDDS-2034:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 21/Sep/19 01:16
            Start Date: 21/Sep/19 01:16
    Worklog Time Spent: 10m 
      Work Description: xiaoyuyao commented on pull request #1469: HDDS-2034. 
Async RATIS pipeline creation and destroy through heartbea…
URL: https://github.com/apache/hadoop/pull/1469#discussion_r326840990
 
 

 ##########
 File path: 
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/CreatePipelineCommandHandler.java
 ##########
 @@ -0,0 +1,226 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations 
under
+ * the License.
+ */
+package org.apache.hadoop.ozone.container.common.statemachine.commandhandler;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.
+    StorageContainerDatanodeProtocolProtos.CreatePipelineCommandProto;
+import org.apache.hadoop.hdds.protocol.proto.
+    StorageContainerDatanodeProtocolProtos.SCMCommandProto;
+import org.apache.hadoop.hdds.protocol.proto.
+    StorageContainerDatanodeProtocolProtos.CreatePipelineACKProto;
+import org.apache.hadoop.hdds.ratis.RatisHelper;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
+import org.apache.hadoop.hdds.security.x509.SecurityConfig;
+import org.apache.hadoop.io.MultipleIOException;
+import org.apache.hadoop.ozone.container.common.statemachine
+    .SCMConnectionManager;
+import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
+import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
+import org.apache.hadoop.ozone.protocol.commands.CommandStatus;
+import org.apache.hadoop.ozone.protocol.commands.CreatePipelineCommand;
+import org.apache.hadoop.ozone.protocol.commands.CreatePipelineCommandStatus;
+import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
+import org.apache.hadoop.util.Time;
+import org.apache.ratis.client.RaftClient;
+import org.apache.ratis.grpc.GrpcTlsConfig;
+import org.apache.ratis.protocol.NotLeaderException;
+import org.apache.ratis.protocol.RaftClientReply;
+import org.apache.ratis.protocol.RaftGroup;
+import org.apache.ratis.protocol.RaftGroupId;
+import org.apache.ratis.protocol.RaftPeer;
+import org.apache.ratis.retry.RetryPolicy;
+import org.apache.ratis.rpc.SupportedRpcType;
+import org.apache.ratis.util.TimeDuration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.RejectedExecutionException;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+
+/**
+ * Handler for create pipeline command received from SCM.
+ */
+public class CreatePipelineCommandHandler implements CommandHandler {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(CreatePipelineCommandHandler.class);
+
+  private int invocationCount;
+  private long totalTime;
+
+  /**
+   * Constructs a createPipelineCommand handler.
+   */
+  public CreatePipelineCommandHandler() {
+  }
+
+  /**
+   * Handles a given SCM command.
+   *
+   * @param command           - SCM Command
+   * @param ozoneContainer    - Ozone Container.
+   * @param context           - Current Context.
+   * @param connectionManager - The SCMs that we are talking to.
+   */
+  @Override
+  public void handle(SCMCommand command, OzoneContainer ozoneContainer,
+      StateContext context, SCMConnectionManager connectionManager) {
+    invocationCount++;
+    final long startTime = Time.monotonicNow();
+    final DatanodeDetails dn = context.getParent()
+        .getDatanodeDetails();
+    final CreatePipelineCommandProto createCommand =
+        ((CreatePipelineCommand)command).getProto();
+    final PipelineID pipelineID = PipelineID.getFromProtobuf(
+        createCommand.getPipelineID());
+    Collection<DatanodeDetails> peers =
+        createCommand.getDatanodeList().stream()
+            .map(DatanodeDetails::getFromProtoBuf)
+            .collect(Collectors.toList());
+
+    final CreatePipelineACKProto createPipelineACK =
+        CreatePipelineACKProto.newBuilder()
+            .setPipelineID(createCommand.getPipelineID())
+            .setDatanodeUUID(dn.getUuidString()).build();
+    boolean success = false;
+    try {
+      createPipeline(dn, pipelineID, peers, context.getParent().getConf());
+      success = true;
+      LOG.info("Create Pipeline {} {} #{} command succeed on datanode {}.",
+          createCommand.getType(), createCommand.getFactor(), pipelineID,
+          dn.getUuidString());
+    } catch (NotLeaderException e) {
+      LOG.debug("Follower cannot create pipeline #{}.", pipelineID);
+    } catch (IOException e) {
+      LOG.error("Can't create pipeline {} {} #{}", createCommand.getType(),
+          createCommand.getFactor(), pipelineID, e);
+    } finally {
+      final boolean cmdExecuted = success;
+      Consumer<CommandStatus> statusUpdater = (cmdStatus) -> {
+        cmdStatus.setStatus(cmdExecuted);
+        ((CreatePipelineCommandStatus)cmdStatus)
+            .setCreatePipelineAck(createPipelineACK);
+      };
+      updateCommandStatus(context, command, statusUpdater, LOG);
+      long endTime = Time.monotonicNow();
+      totalTime += endTime - startTime;
+    }
+  }
+
+  /**
+   * Returns the command type that this command handler handles.
+   *
+   * @return Type
+   */
+  @Override
+  public SCMCommandProto.Type getCommandType() {
+    return SCMCommandProto.Type.createPipelineCommand;
+  }
+
+  /**
+   * Returns number of times this handler has been invoked.
+   *
+   * @return int
+   */
+  @Override
+  public int getInvocationCount() {
+    return invocationCount;
+  }
+
+  /**
+   * Returns the average time this function takes to run.
+   *
+   * @return long
+   */
+  @Override
+  public long getAverageRunTime() {
+    if (invocationCount > 0) {
+      return totalTime / invocationCount;
+    }
+    return 0;
+  }
+
+  /**
+   * Sends ratis command to create pipeline on this datanode.
+   *
+   * @param dn       - this datanode
+   * @param pipelineId   - pipeline ID
+   * @param peers        - datanodes of the pipeline
+   * @param ozoneConf  - Ozone configuration
+   * @throws IOException
+   */
+  private void createPipeline(DatanodeDetails dn, PipelineID pipelineId,
+      Collection<DatanodeDetails> peers, Configuration ozoneConf)
+      throws IOException {
+    final RaftGroup group = RatisHelper.newRaftGroup(
+        RaftGroupId.valueOf(pipelineId.getId()), peers);
+    LOG.debug("creating pipeline:#{} with {}", pipelineId, group);
+
+    final String rpcType = ozoneConf
+        .get(ScmConfigKeys.DFS_CONTAINER_RATIS_RPC_TYPE_KEY,
+            ScmConfigKeys.DFS_CONTAINER_RATIS_RPC_TYPE_DEFAULT);
+    final RetryPolicy retryPolicy = RatisHelper.createRetryPolicy(ozoneConf);
+    final List< IOException > exceptions =
+        Collections.synchronizedList(new ArrayList<>());
+    final int maxOutstandingRequests = ozoneConf.getInt(
+        ScmConfigKeys.SCM_CONTAINER_CLIENT_MAX_OUTSTANDING_REQUESTS,
+        ScmConfigKeys.SCM_CONTAINER_CLIENT_MAX_OUTSTANDING_REQUESTS_DEFAULT);
+    final GrpcTlsConfig tlsConfig = RatisHelper.createTlsClientConfig(new
+        SecurityConfig(ozoneConf));
+    final TimeDuration requestTimeout =
+        RatisHelper.getClientRequestTimeout(ozoneConf);
+    try {
+      final RaftPeer peer = RatisHelper.toRaftPeer(dn);
+      try (RaftClient client = RatisHelper
+          .newRaftClient(SupportedRpcType.valueOfIgnoreCase(rpcType), peer,
+              retryPolicy, maxOutstandingRequests, tlsConfig,
+              requestTimeout)) {
+
+        RaftClientReply reply = client.groupAdd(group, peer.getId());
+        if (reply == null || !reply.isSuccess()) {
+          String msg = "Pipeline initialization failed for pipeline:"
+              + pipelineId.getId() + " node:" + peer.getId();
+          LOG.error(msg);
+          throw new IOException(msg);
+        }
+      } catch (IOException ioe) {
+        String errMsg =
+            "Failed invoke Ratis rpc for " + dn.getUuid();
+        LOG.error(errMsg, ioe);
 
 Review comment:
   same as above. 
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 316026)
    Time Spent: 2h 20m  (was: 2h 10m)

> Async RATIS pipeline creation and destroy through heartbeat commands
> --------------------------------------------------------------------
>
>                 Key: HDDS-2034
>                 URL: https://issues.apache.org/jira/browse/HDDS-2034
>             Project: Hadoop Distributed Data Store
>          Issue Type: Sub-task
>            Reporter: Sammi Chen
>            Assignee: Sammi Chen
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 2h 20m
>  Remaining Estimate: 0h
>
> Currently, pipeline creation and destroy are synchronous operations. SCM 
> directly connect to each datanode of the pipeline through gRPC channel to 
> create the pipeline to destroy the pipeline.  
> This task is to remove the gRPC channel, send pipeline creation and destroy 
> action through heartbeat command to each datanode.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: hdfs-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: hdfs-issues-h...@hadoop.apache.org

Reply via email to