[ https://issues.apache.org/jira/browse/TWILL-61?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16399626#comment-16399626 ]
ASF GitHub Bot commented on TWILL-61: ------------------------------------- Github user anew commented on a diff in the pull request: https://github.com/apache/twill/pull/67#discussion_r174643542 --- Diff: twill-zookeeper/src/main/java/org/apache/twill/zookeeper/ZKOperations.java --- @@ -281,6 +286,73 @@ public void onFailure(Throwable t) { return resultFuture; } + /** + * Creates a ZK node of the given path. If the node already exists, deletion of the node (recursively) will happen + * and the creation will be retried. + */ + public static OperationFuture<String> createDeleteIfExists(final ZKClient zkClient, final String path, + @Nullable final byte[] data, final CreateMode createMode, + final boolean createParent, final ACL...acls) { + final SettableOperationFuture<String> resultFuture = SettableOperationFuture.create(path, + Threads.SAME_THREAD_EXECUTOR); + final List<ACL> createACLs = acls.length == 0 ? ZooDefs.Ids.OPEN_ACL_UNSAFE : Arrays.asList(acls); + createNode(zkClient, path, data, createMode, createParent, createACLs, new FutureCallback<String>() { + + final FutureCallback<String> createCallback = this; + + @Override + public void onSuccess(String result) { + // Create succeeded, just set the result to the resultFuture + resultFuture.set(result); + } + + @Override + public void onFailure(final Throwable createFailure) { + // If create failed not because of the NodeExistsException, just set the exception to the result future + if (!(createFailure instanceof KeeperException.NodeExistsException)) { + resultFuture.setException(createFailure); + return; + } + + // Try to delete the path + LOG.info("Node {}{} already exists. Deleting it and retry creation", zkClient.getConnectString(), path); + Futures.addCallback(recursiveDelete(zkClient, path), new FutureCallback<String>() { + @Override + public void onSuccess(String result) { + // If delete succeeded, perform the creation again. + createNode(zkClient, path, data, createMode, createParent, createACLs, createCallback); + } + + @Override + public void onFailure(Throwable t) { + // If deletion failed because of NoNodeException, fail the result operation future + if (!(t instanceof KeeperException.NoNodeException)) { + createFailure.addSuppressed(t); + resultFuture.setException(createFailure); + return; + } + + // If can't delete because the node no longer exists, just go ahead and recreate the node + createNode(zkClient, path, data, createMode, createParent, createACLs, createCallback); + } + }, Threads.SAME_THREAD_EXECUTOR); + } + }); + + return resultFuture; + } + + /** + * Private helper method to create a ZK node based on the parameter. The result of the creation is always + * communicate via the provided {@link FutureCallback}. + */ + private static void createNode(ZKClient zkClient, String path, @Nullable byte[] data, + CreateMode createMode, boolean createParent, + Iterable<ACL> acls, FutureCallback<String> callback) { + Futures.addCallback(zkClient.create(path, data, createMode, createParent, acls), + callback, Threads.SAME_THREAD_EXECUTOR); + } --- End diff -- yes, easier to understand and extracted into separate methods. Like it much better now > Second launch attempt of AM always failed > ----------------------------------------- > > Key: TWILL-61 > URL: https://issues.apache.org/jira/browse/TWILL-61 > Project: Apache Twill > Issue Type: Bug > Components: yarn > Reporter: Terence Yim > Assignee: Terence Yim > Priority: Major > Fix For: 0.5.0-incubating > > > YARN would make multiple attempts to launch an application. Currently second > or above attempts would always fail due to creation of /runId/state node in > ZK fail (node exists) because runId is generated on client side and doesn't > change between attempts. -- This message was sent by Atlassian JIRA (v7.6.3#76005)