mukul1987 commented on a change in pull request #1226: HDDS-1610. applyTransaction failure should not be lost on restart. URL: https://github.com/apache/hadoop/pull/1226#discussion_r311191416
########## File path: hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java ########## @@ -674,30 +674,54 @@ public void notifyIndexUpdate(long term, long index) { if (cmdType == Type.WriteChunk || cmdType ==Type.PutSmallFile) { builder.setCreateContainerSet(createContainerSet); } + CompletableFuture<Message> applyTransactionFuture = + new CompletableFuture<>(); // Ensure the command gets executed in a separate thread than // stateMachineUpdater thread which is calling applyTransaction here. - CompletableFuture<Message> future = CompletableFuture - .supplyAsync(() -> runCommand(requestProto, builder.build()), + CompletableFuture<ContainerCommandResponseProto> future = + CompletableFuture.supplyAsync( + () -> runCommandGetResponse(requestProto, builder.build()), getCommandExecutor(requestProto)); - - future.thenAccept(m -> { + future.thenApply(r -> { if (trx.getServerRole() == RaftPeerRole.LEADER) { long startTime = (long) trx.getStateMachineContext(); metrics.incPipelineLatency(cmdType, Time.monotonicNowNanos() - startTime); } - - final Long previous = - applyTransactionCompletionMap - .put(index, trx.getLogEntry().getTerm()); - Preconditions.checkState(previous == null); - if (cmdType == Type.WriteChunk || cmdType == Type.PutSmallFile) { - metrics.incNumBytesCommittedCount( + if (r.getResult() != ContainerProtos.Result.SUCCESS) { + StorageContainerException sce = + new StorageContainerException(r.getMessage(), r.getResult()); + LOG.error(gid + ": ApplyTransaction failed: cmd " + r.getCmdType() + + " logIndex " + index + " Error message: " + r.getMessage() + + " Container Result: " + r.getResult()); + metrics.incNumApplyTransactionsFails(); + ratisServer.handleApplyTransactionFailure(gid, trx.getServerRole()); + // Since the applyTransaction now is completed exceptionally, + // before any further snapshot is taken , the exception will be + // caught in stateMachineUpdater in Ratis and ratis server will + // shutdown. + applyTransactionFuture.completeExceptionally(sce); + } else { + metrics.incNumBytesWrittenCount( requestProto.getWriteChunk().getChunkData().getLen()); + LOG.debug(gid + ": ApplyTransaction completed: cmd " + r.getCmdType() Review comment: if this is a success, then "" Error message: " + r.getMessage()" will not be the right thing to print here. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org