[ https://issues.apache.org/jira/browse/HADOOP-18012?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17704019#comment-17704019 ]
ASF GitHub Bot commented on HADOOP-18012: ----------------------------------------- anmolanmol1234 commented on code in PR #5488: URL: https://github.com/apache/hadoop/pull/5488#discussion_r1145921432 ########## hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java: ########## @@ -556,99 +583,150 @@ public AbfsClientRenameResult renamePath( // isMetadataIncompleteState is used for renameRecovery(as the 2nd param). return new AbfsClientRenameResult(op, isMetadataIncompleteState, isMetadataIncompleteState); } catch (AzureBlobFileSystemException e) { - // If we have no HTTP response, throw the original exception. - if (!op.hasResult()) { - throw e; - } + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { + throw e; + } - // ref: HADOOP-18242. Rename failure occurring due to a rare case of - // tracking metadata being in incomplete state. - if (op.getResult().getStorageErrorCode() - .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode()) - && !isMetadataIncompleteState) { - //Logging - ABFS_METADATA_INCOMPLETE_RENAME_FAILURE - .info("Rename Failure attempting to resolve tracking metadata state and retrying."); - - // Doing a HEAD call resolves the incomplete metadata state and - // then we can retry the rename operation. - AbfsRestOperation sourceStatusOp = getPathStatus(source, false, - tracingContext); - isMetadataIncompleteState = true; - // Extract the sourceEtag, using the status Op, and set it - // for future rename recovery. - AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult(); - String sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult); - renamePath(source, destination, continuation, tracingContext, - sourceEtagAfterFailure, isMetadataIncompleteState); - } - // if we get out of the condition without a successful rename, then - // it isn't metadata incomplete state issue. - isMetadataIncompleteState = false; - - boolean etagCheckSucceeded = renameIdempotencyCheckOp( - source, - sourceEtag, op, destination, tracingContext); - if (!etagCheckSucceeded) { - // idempotency did not return different result - // throw back the exception - throw e; - } + // ref: HADOOP-18242. Rename failure occurring due to a rare case of + // tracking metadata being in incomplete state. + if (op.getResult().getStorageErrorCode() + .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode()) + && !isMetadataIncompleteState) { + //Logging + ABFS_METADATA_INCOMPLETE_RENAME_FAILURE + .info("Rename Failure attempting to resolve tracking metadata state and retrying."); + + // Doing a HEAD call resolves the incomplete metadata state and + // then we can retry the rename operation. + AbfsRestOperation sourceStatusOp = getPathStatus(source, false, + tracingContext); + isMetadataIncompleteState = true; + // Extract the sourceEtag, using the status Op, and set it + // for future rename recovery. + AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult(); + String sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult); + renamePath(source, destination, continuation, tracingContext, + sourceEtagAfterFailure, isMetadataIncompleteState, isNamespaceEnabled); + } + // if we get out of the condition without a successful rename, then + // it isn't metadata incomplete state issue. + isMetadataIncompleteState = false; + + boolean etagCheckSucceeded = renameIdempotencyCheckOp( + source, + sourceEtag, op, destination, tracingContext, isDir); + if (!etagCheckSucceeded) { + // idempotency did not return different result + // throw back the exception + throw e; + } return new AbfsClientRenameResult(op, true, isMetadataIncompleteState); } } + private boolean checkIsDir(AbfsHttpOperation result) { + String resourceType = result.getResponseHeader( + HttpHeaderConfigurations.X_MS_RESOURCE_TYPE); + return resourceType != null + && resourceType.equalsIgnoreCase(AbfsHttpConstants.DIRECTORY); + } + + @VisibleForTesting + AbfsRestOperation createRenameRestOperation(URL url, List<AbfsHttpHeader> requestHeaders) { + AbfsRestOperation op = new AbfsRestOperation( + AbfsRestOperationType.RenamePath, + this, + HTTP_METHOD_PUT, + url, + requestHeaders); + return op; + } + private void incrementAbfsRenamePath() { abfsCounters.incrementCounter(RENAME_PATH_ATTEMPTS, 1); } /** * Check if the rename request failure is post a retry and if earlier rename * request might have succeeded at back-end. - * + * <p> * If a source etag was passed in, and the error was 404, get the * etag of any file at the destination. * If it matches the source etag, then the rename is considered * a success. * Exceptions raised in the probe of the destination are swallowed, * so that they do not interfere with the original rename failures. - * @param source source path - * @param op Rename request REST operation response with non-null HTTP response - * @param destination rename destination path - * @param sourceEtag etag of source file. may be null or empty + * + * @param source source path + * @param op Rename request REST operation response with non-null HTTP response + * @param destination rename destination path + * @param sourceEtag etag of source file. may be null or empty * @param tracingContext Tracks identifiers for request header * @return true if the file was successfully copied */ public boolean renameIdempotencyCheckOp( - final String source, - final String sourceEtag, - final AbfsRestOperation op, - final String destination, - TracingContext tracingContext) { + final String source, + final String sourceEtag, + final AbfsRestOperation op, + final String destination, + TracingContext tracingContext, + final boolean isDir) { Review Comment: javadoc for new param. > ABFS: Enable config controlled ETag check for Rename idempotency > ---------------------------------------------------------------- > > Key: HADOOP-18012 > URL: https://issues.apache.org/jira/browse/HADOOP-18012 > Project: Hadoop Common > Issue Type: Sub-task > Components: fs/azure > Affects Versions: 3.3.2 > Reporter: Sneha Vijayarajan > Assignee: Sree Bhattacharyya > Priority: Major > Labels: pull-request-available > > ABFS driver has a handling for rename idempotency which relies on LMT of the > destination file to conclude if the rename was successful or not when source > file is absent and if the rename request had entered retry loop. > This handling is incorrect as LMT of the destination does not change on > rename. > This Jira will track the change to undo the current implementation and add a > new one where for an incoming rename operation, source file eTag is fetched > first and then rename is done only if eTag matches for the source file. > As this is going to be a costly operation given an extra HEAD request is > added to each rename, this implementation will be guarded over a config and > can enabled by customers who have workloads that do multiple renames. > Long term plan to handle rename idempotency without HEAD request is being > discussed. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org