TSFenwick commented on code in PR #14131:
URL: https://github.com/apache/druid/pull/14131#discussion_r1177131874


##########
extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3DataSegmentKiller.java:
##########
@@ -64,6 +70,52 @@ public S3DataSegmentKiller(
     this.inputDataConfig = inputDataConfig;
   }
 
+  @Override
+  public void killBatched(List<DataSegment> segments) throws 
SegmentLoadingException
+  {
+    int size = segments.size();
+    if (size == 0) {
+      return;
+    }
+    if (segments.size() == 1) {
+      kill(segments.get(0));
+      return;
+    }
+
+    // we can assume that all segments are in the same bucket.
+    String s3Bucket = MapUtils.getString(segments.get(0).getLoadSpec(), 
"bucket");
+    final ServerSideEncryptingAmazonS3 s3Client = this.s3ClientSupplier.get();
+
+    // 1000 objects is the max amount of objects that can be deleted in s3 at 
a time.
+    List<List<DataSegment>> segmentsChunks = Lists.partition(segments, 1000);
+    for (List<DataSegment> segmentsChunk : segmentsChunks) {
+      try {
+        DeleteObjectsRequest deleteObjectsRequest = new 
DeleteObjectsRequest(s3Bucket);
+        String[] keys = segmentsChunk.stream()
+                     .map(segment -> MapUtils.getString(segment.getLoadSpec(), 
"key"))
+                     .toArray(String[]::new);
+        deleteObjectsRequest = deleteObjectsRequest.withKeys(keys);
+        log.info("Removing indexes from s3, bucket: %s, keys %s", 
deleteObjectsRequest.getBucketName(), 
deleteObjectsRequest.getKeys().toString());
+        DeleteObjectsResult deleteResult = 
s3Client.deleteObjects(deleteObjectsRequest);
+        log.info("deleted objects %s", deleteResult);
+
+        // delete descriptors which are a files to store segment metadata in 
deep storage.
+        // This file is deprecated and not stored anymore, but we still delete 
them if they exist.
+        deleteObjectsRequest = new DeleteObjectsRequest(s3Bucket);
+        deleteObjectsRequest = deleteObjectsRequest.withKeys(
+            
Arrays.stream(keys).map(DataSegmentKiller::descriptorPath).toArray(String[]::new));
+        deleteResult = s3Client.deleteObjects(deleteObjectsRequest);
+        log.info("deleted objects %s", deleteResult);
+      }
+      catch (MultiObjectDeleteException e) {
+        throw new SegmentLoadingException(e, "Couldn't kill all segment but 
deleted[%s]: [%s]", e.getDeletedObjects(), e);
+      }
+      catch (AmazonServiceException e) {
+

Review Comment:
   throw segmentLoadingException



##########
extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3DataSegmentKiller.java:
##########
@@ -64,6 +69,52 @@ public S3DataSegmentKiller(
     this.inputDataConfig = inputDataConfig;
   }
 
+  @Override
+  public void killBatched(List<DataSegment> segments) throws 
SegmentLoadingException
+  {
+    int size = segments.size();
+    if (size == 0) {
+      return;
+    }
+    if (segments.size() == 1) {
+      kill(segments.get(0));
+      return;
+    }
+    try {
+      // we can assume that all segments are in the same bucket.
+      String s3Bucket = MapUtils.getString(segments.get(0).getLoadSpec(), 
"bucket");
+      final ServerSideEncryptingAmazonS3 s3Client = 
this.s3ClientSupplier.get();
+
+      // 1000 objects is the max amount of objects that can be deleted in s3 
at a time.
+      List<List<DataSegment>> segmentsChunks = Lists.partition(segments, 1000);
+      for (List<DataSegment> segmentsChunk : segmentsChunks) {
+        DeleteObjectsRequest deleteObjectsRequest = new 
DeleteObjectsRequest(s3Bucket);

Review Comment:
   going to say no. network calls are likely the slower thing here. will add 
this scenario to test out before being done with this PR



##########
processing/src/main/java/org/apache/druid/segment/loading/DataSegmentKiller.java:
##########
@@ -54,6 +55,18 @@ static String descriptorPath(String path)
    */
   void kill(DataSegment segment) throws SegmentLoadingException;
 
+  /**
+   * Removes segment files (indexes and metadata) from deep storage.
+   * @param segments The list of segments to kill
+   * @throws SegmentLoadingException If there is an exception during deletion
+   */
+  default void killBatched(List<DataSegment> segments) throws 
SegmentLoadingException

Review Comment:
   kill should be good enough



##########
extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3DataSegmentKiller.java:
##########
@@ -64,6 +70,52 @@ public S3DataSegmentKiller(
     this.inputDataConfig = inputDataConfig;
   }
 
+  @Override
+  public void kill(List<DataSegment> segments) throws SegmentLoadingException
+  {
+    int size = segments.size();
+    if (size == 0) {
+      return;
+    }
+    if (segments.size() == 1) {
+      kill(segments.get(0));
+      return;
+    }
+
+    // we can assume that all segments are in the same bucket.
+    String s3Bucket = MapUtils.getString(segments.get(0).getLoadSpec(), 
"bucket");
+    final ServerSideEncryptingAmazonS3 s3Client = this.s3ClientSupplier.get();
+
+    // 1000 objects is the max amount of objects that can be deleted in s3 at 
a time.
+    List<List<DataSegment>> segmentsChunks = Lists.partition(segments, 1000);
+    for (List<DataSegment> segmentsChunk : segmentsChunks) {
+      try {
+        DeleteObjectsRequest deleteObjectsRequest = new 
DeleteObjectsRequest(s3Bucket);
+        String[] keys = segmentsChunk.stream()
+                     .map(segment -> MapUtils.getString(segment.getLoadSpec(), 
"key"))
+                     .toArray(String[]::new);
+        deleteObjectsRequest = deleteObjectsRequest.withKeys(keys);
+        log.info("Removing indexes from s3, bucket: %s, keys %s", 
deleteObjectsRequest.getBucketName(), 
deleteObjectsRequest.getKeys().toString());
+        DeleteObjectsResult deleteResult = 
s3Client.deleteObjects(deleteObjectsRequest);
+        log.info("deleted objects %s", deleteResult);
+
+        // delete descriptors which are a files to store segment metadata in 
deep storage.
+        // This file is deprecated and not stored anymore, but we still delete 
them if they exist.
+        deleteObjectsRequest = new DeleteObjectsRequest(s3Bucket);
+        deleteObjectsRequest = deleteObjectsRequest.withKeys(
+            
Arrays.stream(keys).map(DataSegmentKiller::descriptorPath).toArray(String[]::new));
+        deleteResult = s3Client.deleteObjects(deleteObjectsRequest);
+        log.info("deleted objects %s", deleteResult);

Review Comment:
   this is a junk log since deleteResult doesn't implement toString ot its just 
a memory address :/



##########
extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3DataSegmentKiller.java:
##########
@@ -64,6 +70,52 @@ public S3DataSegmentKiller(
     this.inputDataConfig = inputDataConfig;
   }
 
+  @Override
+  public void kill(List<DataSegment> segments) throws SegmentLoadingException
+  {
+    int size = segments.size();
+    if (size == 0) {
+      return;
+    }
+    if (segments.size() == 1) {
+      kill(segments.get(0));
+      return;
+    }
+
+    // we can assume that all segments are in the same bucket.
+    String s3Bucket = MapUtils.getString(segments.get(0).getLoadSpec(), 
"bucket");
+    final ServerSideEncryptingAmazonS3 s3Client = this.s3ClientSupplier.get();
+
+    // 1000 objects is the max amount of objects that can be deleted in s3 at 
a time.
+    List<List<DataSegment>> segmentsChunks = Lists.partition(segments, 1000);
+    for (List<DataSegment> segmentsChunk : segmentsChunks) {
+      try {
+        DeleteObjectsRequest deleteObjectsRequest = new 
DeleteObjectsRequest(s3Bucket);
+        String[] keys = segmentsChunk.stream()
+                     .map(segment -> MapUtils.getString(segment.getLoadSpec(), 
"key"))
+                     .toArray(String[]::new);
+        deleteObjectsRequest = deleteObjectsRequest.withKeys(keys);
+        log.info("Removing indexes from s3, bucket: %s, keys %s", 
deleteObjectsRequest.getBucketName(), 
deleteObjectsRequest.getKeys().toString());
+        DeleteObjectsResult deleteResult = 
s3Client.deleteObjects(deleteObjectsRequest);
+        log.info("deleted objects %s", deleteResult);
+
+        // delete descriptors which are a files to store segment metadata in 
deep storage.
+        // This file is deprecated and not stored anymore, but we still delete 
them if they exist.
+        deleteObjectsRequest = new DeleteObjectsRequest(s3Bucket);
+        deleteObjectsRequest = deleteObjectsRequest.withKeys(
+            
Arrays.stream(keys).map(DataSegmentKiller::descriptorPath).toArray(String[]::new));
+        deleteResult = s3Client.deleteObjects(deleteObjectsRequest);
+        log.info("deleted objects %s", deleteResult);
+      }
+      catch (MultiObjectDeleteException e) {
+        throw new SegmentLoadingException(e, "Couldn't kill all segment but 
deleted[%s]: [%s]", e.getDeletedObjects(), e);

Review Comment:
   check these exceptions



##########
server/src/main/java/org/apache/druid/segment/loading/OmniDataSegmentKiller.java:
##########
@@ -51,6 +53,22 @@ public OmniDataSegmentKiller(
     }
   }
 
+  @Override
+  public void kill(List<DataSegment> segments) throws SegmentLoadingException

Review Comment:
   add test for this



##########
extensions-core/s3-extensions/src/main/java/org/apache/druid/storage/s3/S3DataSegmentKiller.java:
##########
@@ -64,6 +69,52 @@ public S3DataSegmentKiller(
     this.inputDataConfig = inputDataConfig;
   }
 
+  @Override
+  public void killBatched(List<DataSegment> segments) throws 
SegmentLoadingException
+  {
+    int size = segments.size();
+    if (size == 0) {
+      return;
+    }
+    if (segments.size() == 1) {
+      kill(segments.get(0));
+      return;
+    }
+    try {
+      // we can assume that all segments are in the same bucket.
+      String s3Bucket = MapUtils.getString(segments.get(0).getLoadSpec(), 
"bucket");

Review Comment:
   Bucket into a constant?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to