[ https://issues.apache.org/jira/browse/HADOOP-18656?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17810322#comment-17810322 ]
ASF GitHub Bot commented on HADOOP-18656: ----------------------------------------- anujmodi2021 commented on code in PR #6409: URL: https://github.com/apache/hadoop/pull/6409#discussion_r1464712357 ########## hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsPaginatedDelete.java: ########## @@ -0,0 +1,279 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider; +import org.apache.hadoop.fs.azurebfs.utils.AclTestHelpers; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclEntryScope; +import org.apache.hadoop.fs.permission.AclEntryType; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.util.Lists; + +import org.assertj.core.api.Assertions; +import org.junit.Assume; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.UUID; + +import static java.net.HttpURLConnection.HTTP_BAD_REQUEST; +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.AUGUST_2023_API_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DECEMBER_2019_API_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_OAUTH_CLIENT_ENDPOINT; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_PAGINATED; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_ID; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_SECRET; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CHECKACCESS_TEST_USER_GUID; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CLIENT_ID; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_BLOB_FS_CLIENT_SECRET; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT; +import static org.apache.hadoop.fs.azurebfs.services.AbfsClientUtils.getHeaderValue; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public class ITestAbfsPaginatedDelete extends AbstractAbfsIntegrationTest { + + private AzureBlobFileSystem superUserFs; + private AzureBlobFileSystem firstTestUserFs; + private String firstTestUserGuid; + + private boolean isHnsEnabled; + public ITestAbfsPaginatedDelete() throws Exception { + } + + @Override + public void setup() throws Exception { + isHnsEnabled = this.getConfiguration().getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false); + loadConfiguredFileSystem(); + super.setup(); + this.superUserFs = getFileSystem(); + this.firstTestUserGuid = getConfiguration() + .get(FS_AZURE_BLOB_FS_CHECKACCESS_TEST_USER_GUID); + + if(isHnsEnabled) { + // setting up ACL permissions for test user + setFirstTestUserFsAuth(); + setDefaultAclOnRoot(this.firstTestUserGuid); + } + } + + /** + * Test to check that recursive deletePath works with paginated enabled and + * disabled for both empty and non-empty directory. + * When enabled appropriate xMsVersion should be used. + * @throws Exception + */ + @Test + public void testRecursiveDeleteWithPagination() throws Exception { + testRecursiveDeleteWithPaginationInternal(false, true, DECEMBER_2019_API_VERSION); + testRecursiveDeleteWithPaginationInternal(false, true, AUGUST_2023_API_VERSION); + testRecursiveDeleteWithPaginationInternal(false, false, DECEMBER_2019_API_VERSION); + testRecursiveDeleteWithPaginationInternal(false, false, AUGUST_2023_API_VERSION); + testRecursiveDeleteWithPaginationInternal(true, true, DECEMBER_2019_API_VERSION); + testRecursiveDeleteWithPaginationInternal(true, false, AUGUST_2023_API_VERSION); + } + + /** + * Test to check that non-recursive delete works with both paginated enabled + * and disabled only for empty directories. + * Pagination should not be set when recursive is false. + * @throws Exception + */ + @Test + public void testNonRecursiveDeleteWithPagination() throws Exception { + testNonRecursiveDeleteWithPaginationInternal(true); + testNonRecursiveDeleteWithPaginationInternal(false); + } + + /** + * Test to check that with pagination enabled, invalid CT will fail + * @throws Exception + */ + @Test + public void testRecursiveDeleteWithInvalidCT() throws Exception { + testRecursiveDeleteWithInvalidCTInternal(true); + testRecursiveDeleteWithInvalidCTInternal(false); + } + + public void testRecursiveDeleteWithPaginationInternal(boolean isEmptyDir, boolean isPaginatedDeleteEnabled, + String xMsVersion) throws Exception { + final AzureBlobFileSystem fs = isHnsEnabled ? this.firstTestUserFs : getFileSystem(); + TracingContext testTracingContext = getTestTracingContext(this.firstTestUserFs, true); + Path testPath; + if (isEmptyDir) { + testPath = new Path("/emptyPath" + StringUtils.right( + UUID.randomUUID().toString(), 10)); + fs.mkdirs(testPath); + } else { + testPath = createSmallDir(); + } + + // Set the paginated enabled value and xMsVersion at client level. + AbfsClient client = ITestAbfsClient.setAbfsClientField( + fs.getAbfsStore().getClient(), "xMsVersion", xMsVersion); + client.getAbfsConfiguration().setIsPaginatedDeleteEnabled(isPaginatedDeleteEnabled); Review Comment: Agreed... Created a spied Client here and doing config set on the spied client only. > ABFS: Support for Pagination in Recursive Directory Delete > ----------------------------------------------------------- > > Key: HADOOP-18656 > URL: https://issues.apache.org/jira/browse/HADOOP-18656 > Project: Hadoop Common > Issue Type: Sub-task > Components: fs/azure > Affects Versions: 3.3.5 > Reporter: Sree Bhattacharyya > Assignee: Anuj Modi > Priority: Minor > Labels: pull-request-available > > Today, when a recursive delete is issued for a large directory in ADLS Gen2 > (HNS) account, the directory deletion happens in O(1) but in backend ACL > Checks are done recursively for each object inside that directory which in > case of large directory could lead to request time out. Pagination is > introduced in the Azure Storage Backend for these ACL checks. > More information on how pagination works can be found on public documentation > of [Azure Delete Path > API|https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/delete?view=rest-storageservices-datalakestoragegen2-2019-12-12]. > This PR contains changes to support this from client side. To trigger > pagination, client needs to add a new query parameter "paginated" and set it > to true along with recursive set to true. In return if the directory is > large, server might return a continuation token back to the caller. If caller > gets back a continuation token, it has to call the delete API again with > continuation token along with recursive and pagination set to true. This is > similar to directory delete of FNS account. > Pagination is available only in versions "2023-08-03" onwards. > PR also contains functional tests to verify driver works well with different > combinations of recursive and pagination features for both HNS and FNS > account. > Full E2E testing of pagination requires large dataset to be created and hence > not added as part of driver test suite. But extensive E2E testing has been > performed. -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org