[ https://issues.apache.org/jira/browse/HADOOP-18425?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17703037#comment-17703037 ]
ASF GitHub Bot commented on HADOOP-18425: ----------------------------------------- mehakmeet commented on code in PR #5494: URL: https://github.com/apache/hadoop/pull/5494#discussion_r1142877888 ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java: ########## @@ -38,6 +38,7 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; Review Comment: import seem to be in the wrong place here. ########## hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java: ########## @@ -123,6 +143,257 @@ public void testRenameFailuresDueToIncompleteMetadata() throws Exception { } + AbfsClient getMockAbfsClient() throws IOException { + AzureBlobFileSystem fs = getFileSystem(); + + // adding mock objects to current AbfsClient + AbfsClient spyClient = Mockito.spy(fs.getAbfsStore().getClient()); + + Mockito.doAnswer(answer -> { + AbfsRestOperation op = new AbfsRestOperation(AbfsRestOperationType.RenamePath, + spyClient, HTTP_METHOD_PUT, answer.getArgument(0), answer.getArgument(1)); + AbfsRestOperation spiedOp = Mockito.spy(op); + addSpyBehavior(spiedOp, op, spyClient); + return spiedOp; + }).when(spyClient).createRenameRestOperation(nullable(URL.class), nullable(List.class)); + + return spyClient; + + } + + /** + * Spies on a rest operation to inject transient failure. + * the first createHttpOperation() invocation will return an abfs rest operation + * which will fail. + * @param spiedRestOp spied operation whose createHttpOperation() will fail first time + * @param normalRestOp normal operation the good operation + * @param client client. + * @throws IOException failure + */ + private void addSpyBehavior(final AbfsRestOperation spiedRestOp, + final AbfsRestOperation normalRestOp, + final AbfsClient client) + throws IOException { + AbfsHttpOperation failingOperation = Mockito.spy(normalRestOp.createHttpOperation()); + AbfsHttpOperation normalOp1 = normalRestOp.createHttpOperation(); + executeThenFail(client, normalRestOp, failingOperation, normalOp1); + AbfsHttpOperation normalOp2 = normalRestOp.createHttpOperation(); + normalOp2.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, + client.getAccessToken()); + + when(spiedRestOp.createHttpOperation()) + .thenReturn(failingOperation) + .thenReturn(normalOp2); + } + + /** + * Mock an idempotency failure by executing the normal operation, then + * raising an IOE. + * @param normalRestOp the rest operation used to sign the requests. + * @param failingOperation failing operation + * @param normalOp good operation + * @throws IOException failure + */ + private void executeThenFail(final AbfsClient client, + final AbfsRestOperation normalRestOp, + final AbfsHttpOperation failingOperation, + final AbfsHttpOperation normalOp) + throws IOException { + Mockito.doAnswer(answer -> { + LOG.info("Executing first attempt with post-operation fault injection"); + final byte[] buffer = answer.getArgument(0); + final int offset = answer.getArgument(1); + final int length = answer.getArgument(2); + normalRestOp.signRequest(normalOp, length); + normalOp.sendRequest(buffer, offset, length); + normalOp.processResponse(buffer, offset, length); + LOG.info("Actual outcome is {} \"{}\" \"{}\"; injecting failure", + normalOp.getStatusCode(), + normalOp.getStorageErrorCode(), + normalOp.getStorageErrorMessage()); + throw new SocketException("connection-reset"); + }).when(failingOperation).sendRequest(Mockito.nullable(byte[].class), + Mockito.nullable(int.class), Mockito.nullable(int.class)); + } + + /** + * This is the good outcome: resilient rename. + */ + @Test + public void testRenameRecoverySrcDestEtagSame() throws IOException { + AzureBlobFileSystem fs = getFileSystem(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient mockClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyFile1"; + String path2 = base + "/dummyFile2"; + + touch(new Path(path1)); + + // 404 and retry, send sourceEtag as null + // source eTag matches -> rename should pass even when execute throws exception + final AbfsClientRenameResult result = + mockClient.renamePath(path1, path2, null, testTracingContext, null, false); + Assertions.assertThat(result.isRenameRecovered()) + .describedAs("rename result recovered flag of %s", result) + .isTrue(); + } + + /** + * execute a failing rename but have the file at the far end not match. + * This is done by explicitly passing in a made up etag for the source + * etag and creating a file at the far end. + * The first rename will actually fail with a path exists exception, + * but as that is swallowed, it's not a problem. + */ + @Test + public void testRenameRecoverySourceDestEtagDifferent() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient spyClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyFile1"; + String path2 = base + "/dummyFile2"; + + touch(new Path(path2)); + + // source eTag does not match -> throw exception + expectErrorCode(SOURCE_PATH_NOT_FOUND, intercept(AbfsRestOperationException.class, () -> + spyClient.renamePath(path1, path2, null, testTracingContext, "source", false)) + ); + } + + /** + * Assert that an exception failed with a specific error code. + * @param code code + * @param e exception + * @throws AbfsRestOperationException if there is a mismatch + */ + private static void expectErrorCode(final AzureServiceErrorCode code, + final AbfsRestOperationException e) throws AbfsRestOperationException { + if (e.getErrorCode() != code) { + throw e; + } + } + + /** + * Directory rename failure is unrecoverable. + */ + @Test + public void testDirRenameRecoveryUnsupported() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + TracingContext testTracingContext = getTestTracingContext(fs, false); + + Assume.assumeTrue(fs.getAbfsStore().getIsNamespaceEnabled(testTracingContext)); + + AbfsClient spyClient = getMockAbfsClient(); + + String base = "/" + getMethodName(); + String path1 = base + "/dummyDir1"; + String path2 = base + "/dummyDir2"; + + fs.mkdirs(new Path(path1)); + + // source eTag does not match -> throw exception + expectErrorCode(SOURCE_PATH_NOT_FOUND, intercept(AbfsRestOperationException.class, () -> + spyClient.renamePath(path1, path2, null, testTracingContext, null, false))); + } + + /** + * Even with failures, having Review Comment: incomplete javadocs? ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java: ########## @@ -157,6 +166,9 @@ private AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCreden new ThreadFactoryBuilder().setNameFormat("AbfsClient Lease Ops").setDaemon(true).build(); this.executorService = MoreExecutors.listeningDecorator( HadoopExecutors.newScheduledThreadPool(this.abfsConfiguration.getNumLeaseThreads(), tf)); + // rename resilience + renameResilience = abfsConfiguration.getRenameResilience(); + LOG.debug("Rename resilience is {}",renameResilience); Review Comment: nit: space after ",". > [ABFS]: RenameFilePath Source File Not Found (404) error in retry loop > ---------------------------------------------------------------------- > > Key: HADOOP-18425 > URL: https://issues.apache.org/jira/browse/HADOOP-18425 > Project: Hadoop Common > Issue Type: Bug > Components: fs/azure > Reporter: Sree Bhattacharyya > Assignee: Sree Bhattacharyya > Priority: Minor > Labels: pull-request-available > > RenameFilePath on its first try receives a Request timed out error with code > 500. On retrying the same operation, a Source file not found (404) error is > received. > Possible mitigation: Check whether etags remain the same before and after the > retry and accordingly send an Operation Successful result, instead of source > file not found. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org