
ASF GitHub Bot commented on HADOOP-18656:

anmolanmol1234 commented on code in PR #6409:
URL: https://github.com/apache/hadoop/pull/6409#discussion_r1456987897

@@ -0,0 +1,279 @@
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.azurebfs.services;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
+import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest;
+import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem;
+import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider;
+import org.apache.hadoop.fs.azurebfs.utils.AclTestHelpers;
+import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
+import org.apache.hadoop.fs.permission.AclEntry;
+import org.apache.hadoop.fs.permission.AclEntryScope;
+import org.apache.hadoop.fs.permission.AclEntryType;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.util.Lists;
+import org.assertj.core.api.Assertions;
+import org.junit.Assume;
+import org.junit.Test;
+import java.io.IOException;
+import java.util.List;
+import java.util.UUID;
+import static java.net.HttpURLConnection.HTTP_BAD_REQUEST;
+import static java.net.HttpURLConnection.HTTP_NOT_FOUND;
+import static 
+import static 
+import static 
+import static 
+import static 
+import static 
+import static 
+import static 
+import static 
+import static 
+import static 
+import static 
+import static 
+import static 
+import static 
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+public class ITestAbfsPaginatedDelete extends AbstractAbfsIntegrationTest {
+  private AzureBlobFileSystem superUserFs;
+  private AzureBlobFileSystem firstTestUserFs;
+  private String firstTestUserGuid;
+  private boolean isHnsEnabled;
+  public ITestAbfsPaginatedDelete() throws Exception {
+  }
+  @Override
+  public void setup() throws Exception {
+    isHnsEnabled = 
+    loadConfiguredFileSystem();
+    super.setup();
+    this.superUserFs = getFileSystem();
+    this.firstTestUserGuid = getConfiguration()
+    if(isHnsEnabled) {
+      // setting up ACL permissions for test user
+      setFirstTestUserFsAuth();
+      setDefaultAclOnRoot(this.firstTestUserGuid);
+    }
+  }
+  /**
+   * Test to check that recursive deletePath works with paginated enabled and
+   * disabled for both empty and non-empty directory.
+   * When enabled appropriate xMsVersion should be used.
+   * @throws Exception
+   */
+  @Test
+  public void testRecursiveDeleteWithPagination() throws Exception {
+    testRecursiveDeleteWithPaginationInternal(false, true, 
+    testRecursiveDeleteWithPaginationInternal(false, true, 
+    testRecursiveDeleteWithPaginationInternal(false, false, 
+    testRecursiveDeleteWithPaginationInternal(false, false, 
+    testRecursiveDeleteWithPaginationInternal(true, true, 
+    testRecursiveDeleteWithPaginationInternal(true, false, 
+  }
+  /**
+   * Test to check that non-recursive delete works with both paginated enabled
+   * and disabled only for empty directories.
+   * Pagination should not be set when recursive is false.
+   * @throws Exception
+   */
+  @Test
+  public void testNonRecursiveDeleteWithPagination() throws Exception {
+    testNonRecursiveDeleteWithPaginationInternal(true);
+    testNonRecursiveDeleteWithPaginationInternal(false);
+  }
+  /**
+   * Test to check that with pagination enabled, invalid CT will fail
+   * @throws Exception
+   */
+  @Test
+  public void testRecursiveDeleteWithInvalidCT() throws Exception {
+    testRecursiveDeleteWithInvalidCTInternal(true);
+    testRecursiveDeleteWithInvalidCTInternal(false);
+  }
+  public void testRecursiveDeleteWithPaginationInternal(boolean isEmptyDir, 
boolean isPaginatedDeleteEnabled,
+      String xMsVersion) throws Exception {
+    final AzureBlobFileSystem fs = isHnsEnabled ? this.firstTestUserFs : 
+    TracingContext testTracingContext = 
getTestTracingContext(this.firstTestUserFs, true);
+    Path testPath;
+    if (isEmptyDir) {
+      testPath = new Path("/emptyPath" + StringUtils.right(
+          UUID.randomUUID().toString(), 10));
+      fs.mkdirs(testPath);
+    } else {
+      testPath = createSmallDir();
+    }
+    // Set the paginated enabled value and xMsVersion at client level.
+    AbfsClient client = ITestAbfsClient.setAbfsClientField(
+        fs.getAbfsStore().getClient(), "xMsVersion", xMsVersion);
+    AbfsRestOperation op = client.deletePath(testPath.toString(), true, null, 
+    // Getting the xMsVersion that was used to make the request
+    String xMsVersionUsed = getHeaderValue(op.getRequestHeaders(), 
+    String urlUsed = op.getUrl().toString();
+    // Assert that appropriate xMsVersion and query param was used to make 
+    if (isPaginatedDeleteEnabled && 
xMsVersion.compareTo(AUGUST_2023_API_VERSION) < 0) {
+      Assertions.assertThat(urlUsed)
+          .describedAs("Url must have paginated = true as query param")
+          .contains(QUERY_PARAM_PAGINATED);
+      Assertions.assertThat(xMsVersionUsed)
+          .describedAs("Request was made with wrong x-ms-version")
+          .isEqualTo(AUGUST_2023_API_VERSION);
+    } else if (isPaginatedDeleteEnabled && 
xMsVersion.compareTo(AUGUST_2023_API_VERSION) >= 0) {
+      Assertions.assertThat(urlUsed)
+          .describedAs("Url must have paginated = true as query param")
+          .contains(QUERY_PARAM_PAGINATED);
+      Assertions.assertThat(xMsVersionUsed)
+          .describedAs("Request was made with wrong x-ms-version")
+          .isEqualTo(xMsVersion);
+    } else {
+      Assertions.assertThat(urlUsed)
+          .describedAs("Url must not have paginated = true as query param")
+          .doesNotContain(QUERY_PARAM_PAGINATED);
+      Assertions.assertThat(xMsVersionUsed)
+          .describedAs("Request was made with wrong x-ms-version")
+          .isEqualTo(xMsVersion);
+    }
+    // Assert that deletion was successful in every scenario.
+    AbfsRestOperationException e = intercept(AbfsRestOperationException.class, 
() ->
+        client.getPathStatus(testPath.toString(), false, testTracingContext, 
+    Assertions.assertThat(e.getStatusCode())
+        .describedAs("Path should have been 
+  }
+  public void testNonRecursiveDeleteWithPaginationInternal(boolean 
isPaginatedDeleteEnabled) throws Exception{
+    final AzureBlobFileSystem fs = isHnsEnabled ? this.firstTestUserFs : 
+    TracingContext testTracingContext = 
getTestTracingContext(this.firstTestUserFs, true);
+    Path testPath = new Path("/emptyPath");
+    fs.mkdirs(testPath);
+    // Set the paginated enabled value and xMsVersion at client level.
+    AbfsClient client = fs.getAbfsStore().getClient();
+    AbfsRestOperation op = client.deletePath(testPath.toString(), false, null, 
+    // Getting the url that was used to make the request
+    String urlUsed = op.getUrl().toString();
+    // Assert that paginated query param was not set to make request
+    Assertions.assertThat(urlUsed)
+          .describedAs("Url must not have paginated as query param")
+          .doesNotContain(QUERY_PARAM_PAGINATED);
+    // Assert that deletion was successful in every scenario.
+    AbfsRestOperationException e = intercept(AbfsRestOperationException.class, 
() ->
+        client.getPathStatus(testPath.toString(), false, testTracingContext, 
+    Assertions.assertThat(e.getStatusCode())
+        .describedAs("Path should have been 
+  }
+  public void testRecursiveDeleteWithInvalidCTInternal(boolean 
isPaginatedEnabled) throws Exception {
+    final AzureBlobFileSystem fs = isHnsEnabled ? this.firstTestUserFs : 
+    Path smallDirPath = createSmallDir();
+    String randomCT = "randomContinuationToken1234";
+    TracingContext testTracingContext = 
getTestTracingContext(this.firstTestUserFs, true);
+    AbfsClient client = fs.getAbfsStore().getClient();
+    AbfsRestOperationException e = intercept(AbfsRestOperationException.class, 
() ->
+        client.deletePath(smallDirPath.toString(), true, randomCT, 
+    Assertions.assertThat(e.getStatusCode())
+        .describedAs("Request Should fail with 

Review Comment:
   Rather than 400 we should mention Bad Request error message

> ABFS: Support for Pagination in Recursive Directory Delete 
> -----------------------------------------------------------
>                 Key: HADOOP-18656
>                 URL: https://issues.apache.org/jira/browse/HADOOP-18656
>             Project: Hadoop Common
>          Issue Type: Sub-task
>          Components: fs/azure
>    Affects Versions: 3.3.5
>            Reporter: Sree Bhattacharyya
>            Assignee: Anuj Modi
>            Priority: Minor
>              Labels: pull-request-available
> Today, when a recursive delete is issued for a large directory in ADLS Gen2 
> (HNS) account, the directory deletion happens in O(1) but in backend ACL 
> Checks are done recursively for each object inside that directory which in 
> case of large directory could lead to request time out. Pagination is 
> introduced in the Azure Storage Backend for these ACL checks.
> More information on how pagination works can be found on public documentation 
> of [Azure Delete Path 
> API|https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/path/delete?view=rest-storageservices-datalakestoragegen2-2019-12-12].
> This PR contains changes to support this from client side. To trigger 
> pagination, client needs to add a new query parameter "paginated" and set it 
> to true along with recursive set to true. In return if the directory is 
> large, server might return a continuation token back to the caller. If caller 
> gets back a continuation token, it has to call the delete API again with 
> continuation token along with recursive and pagination set to true. This is 
> similar to directory delete of FNS account.
> Pagination is available only in versions "2023-08-03" onwards.
> PR also contains functional tests to verify driver works well with different 
> combinations of recursive and pagination features for both HNS and FNS 
> account.
> Full E2E testing of pagination requires large dataset to be created and hence 
> not added as part of driver test suite. But extensive E2E testing has been 
> performed.

This message was sent by Atlassian Jira

To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to