TSFenwick commented on code in PR #14131:
URL: https://github.com/apache/druid/pull/14131#discussion_r1182824469


##########
extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3DataSegmentKiller.java:
##########
@@ -64,13 +73,81 @@ public S3DataSegmentKiller(
     this.inputDataConfig = inputDataConfig;
   }
 
+  @Override
+  public void kill(List<DataSegment> segments) throws SegmentLoadingException
+  {
+    int size = segments.size();
+    if (size == 0) {
+      return;
+    }
+    if (segments.size() == 1) {
+      kill(segments.get(0));
+      return;
+
+    }
+
+    // we can assume that all segments are in the same bucket.
+    String s3Bucket = MapUtils.getString(segments.get(0).getLoadSpec(), 
BUCKET);
+    final ServerSideEncryptingAmazonS3 s3Client = this.s3ClientSupplier.get();
+
+    List<DeleteObjectsRequest.KeyVersion> keysToDelete = segments.stream()
+            .map(segment -> MapUtils.getString(segment.getLoadSpec(), KEY))
+            .flatMap(path -> Stream.of(new 
DeleteObjectsRequest.KeyVersion(path),
+                                     new 
DeleteObjectsRequest.KeyVersion(DataSegmentKiller.descriptorPath(path))))
+            .collect(Collectors.toList());
+
+    List<List<DeleteObjectsRequest.KeyVersion>> keysChunks = 
Lists.partition(keysToDelete, 1000);
+    DeleteObjectsRequest deleteObjectsRequest = new 
DeleteObjectsRequest(s3Bucket);
+    deleteObjectsRequest.setQuiet(true);
+    // don't fail immediately delete as many as possible then we fail in the 
end by throwing one exception that has all
+    // exceptions in it
+    List<Exception> exceptions = new ArrayList<>();
+    // s3 jdk is weird where the call to delete is a 200 status code for the 
response in the api, but the call is unable
+    // to delete some segments is an exception. this exception has a list of 
all objects it couldnt' delete. If you make
+    // a call that is a 401 or 403 outright the exception is thrown, but there 
is no list of objects it couldnt' delete.
+    // so we store them here to log later.
+    List<String> unableToDeleteKeysButNotStoredInException = new ArrayList<>();
+    for (List<DeleteObjectsRequest.KeyVersion> keysChunk : keysChunks) {
+      List<String> keysToDeleteStrings = keysChunk.stream().map(
+            
DeleteObjectsRequest.KeyVersion::getKey).collect(Collectors.toList());
+      try {
+        deleteObjectsRequest.setKeys(keysChunk);
+        log.info("Removing from bucket: %s the following index files: %s from 
s3!", s3Bucket, keysToDeleteStrings);
+        s3Client.deleteObjects(deleteObjectsRequest);
+      }
+      catch (AmazonServiceException e) {
+        // client and server errors will not have a list of keys that couldn't 
be deleted in the exception, so we are
+        // adding them here
+        if (e.getStatusCode() >= 300) {
+          
unableToDeleteKeysButNotStoredInException.addAll(keysToDeleteStrings);
+        }
+        exceptions.add(e);
+      }
+    }
+    if (exceptions.size() > 0) {
+      List<String> segmentsNotDeleted = exceptions.stream()
+          .filter(exc -> exc instanceof MultiObjectDeleteException)
+          .map(exc -> (MultiObjectDeleteException) exc)
+          .map(MultiObjectDeleteException::getErrors)
+          .flatMap(errors -> 
errors.stream().map(MultiObjectDeleteException.DeleteError::getKey))
+          .collect(Collectors.toList());
+      segmentsNotDeleted.addAll(unableToDeleteKeysButNotStoredInException);
+      SegmentLoadingException segmentLoadingException =
+          new SegmentLoadingException(exceptions.get(0), "For bucket: %s 
unable to delete some or all segments %s", s3Bucket, segmentsNotDeleted);

Review Comment:
   For bucket: [%s] unable to delete files [%s]



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to