felipecrv commented on code in PR #38793: URL: https://github.com/apache/arrow/pull/38793#discussion_r1402728098
########## cpp/src/arrow/filesystem/azurefs.cc: ########## @@ -969,6 +969,95 @@ class AzureFileSystem::Impl { RETURN_NOT_OK(stream->Init()); return stream; } + + Status DeleteDir(const AzureLocation& location) { + if (location.container.empty()) { + return Status::Invalid("Cannot delete an empty container"); + } + + if (location.path.empty()) { + auto container_client = + blob_service_client_->GetBlobContainerClient(location.container); + try { + auto response = container_client.Delete(); + if (response.Value.Deleted) { + return Status::OK(); + } else { + return StatusFromErrorResponse( + container_client.GetUrl(), response.RawResponse.get(), + "Failed to delete a container: " + location.container); + } + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to delete a container: " + location.container + ": " + + container_client.GetUrl(), + exception); + } + } + + ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled, + hierarchical_namespace_.Enabled(location.container)); + if (hierarchical_namespace_enabled) { + auto directory_client = + datalake_service_client_->GetFileSystemClient(location.container) + .GetDirectoryClient(location.path); + try { + auto response = directory_client.DeleteRecursive(); + if (response.Value.Deleted) { + return Status::OK(); + } else { + return StatusFromErrorResponse( + directory_client.GetUrl(), response.RawResponse.get(), + "Failed to delete a directory: " + location.path); + } + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to delete a directory: " + location.path + ": " + + directory_client.GetUrl(), + exception); + } + } else { + auto container_client = + blob_service_client_->GetBlobContainerClient(location.container); + Azure::Storage::Blobs::ListBlobsOptions options; + options.Prefix = internal::EnsureTrailingSlash(location.path); + // https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch#remarks + // + // Only supports up to 256 subrequests in a single batch. The + // size of the body for a batch request can't exceed 4 MB. + const int32_t kNumMaxRequestsInBatch = 256; + options.PageSizeHint = kNumMaxRequestsInBatch; + auto list_response = container_client.ListBlobs(options); + while (list_response.HasPage() && !list_response.Blobs.empty()) { + auto batch = container_client.CreateBatch(); + std::vector<Azure::Storage::DeferredResponse< + Azure::Storage::Blobs::Models::DeleteBlobResult>> + deferred_responses; + for (const auto& blob_item : list_response.Blobs) { + deferred_responses.push_back(batch.DeleteBlob(blob_item.Name)); + } + try { + container_client.SubmitBatch(batch); + } catch (const Azure::Storage::StorageException& exception) { + return internal::ExceptionToStatus( + "Failed to delete blobs in a directory: " + location.path + ": " + + container_client.GetUrl(), + exception); + } + for (size_t i = 0; i < deferred_responses.size(); ++i) { + const auto& deferred_response = deferred_responses[i]; + auto delete_response = deferred_response.GetResponse(); + if (!delete_response.Value.Deleted) { + const auto& blob_item = list_response.Blobs[i]; + return Status::IOError("Failed to delete a blob: ", blob_item.Name, + ": " + container_client.GetUrl()); Review Comment: You're right. I thought `GetResponse` would block if necessary, but all the futures are waited for during `SubmitBatch`. It could block, but in the specific case of batched requests, they are all at ready state -> https://github.com/Azure/azure-sdk-for-cpp/blob/4a32d7266cfac8bfc0eb87feb56011361a36f43c/sdk/storage/azure-storage-blobs/src/blob_batch.cpp#L248 This a bit disappointing because it reduces the parallelism that could be exploited, but surely reduces the risk of API misuse. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org