clintropolis commented on a change in pull request #8495: Check targetCompactionSizeBytes to search for candidate segments in auto compaction URL: https://github.com/apache/incubator-druid/pull/8495#discussion_r322510975
########## File path: server/src/main/java/org/apache/druid/server/coordinator/helper/SegmentCompactorUtil.java ########## @@ -20,21 +20,36 @@ package org.apache.druid.server.coordinator.helper; import com.google.common.base.Preconditions; +import org.apache.druid.timeline.DataSegment; import org.joda.time.Interval; +import javax.annotation.Nullable; +import java.util.List; + /** * Util class used by {@link DruidCoordinatorSegmentCompactor} and {@link CompactionSegmentSearchPolicy}. */ class SegmentCompactorUtil { - static boolean isCompactibleSize(long targetBytes, long currentTotalBytes, long additionalBytes) - { - return currentTotalBytes + additionalBytes <= targetBytes; - } + /** + * The allowed error rate of the segment size after compaction. + * Its value is determined experimentally. + */ + private static final double ALLOWED_ERROR_OF_SEGMENT_SIZE = .2; - static boolean isCompactibleNum(int numTargetSegments, int numCurrentSegments, int numAdditionalSegments) + static boolean needsCompaction(@Nullable Long targetCompactionSizeBytes, List<DataSegment> candidates) { - return numCurrentSegments + numAdditionalSegments <= numTargetSegments; + if (targetCompactionSizeBytes == null) { + // If targetCompactionSizeBytes is null, we have no way to check that the given segments need compaction or not. + return true; + } + final double minAllowedSize = targetCompactionSizeBytes * (1 - ALLOWED_ERROR_OF_SEGMENT_SIZE); + final double maxAllowedSize = targetCompactionSizeBytes * (1 + ALLOWED_ERROR_OF_SEGMENT_SIZE); + + return candidates + .stream() + .filter(segment -> segment.getSize() < minAllowedSize || segment.getSize() > maxAllowedSize) + .count() > 1; Review comment: nit: I think this should use `anyMatch` instead of `filter` + `count` since the former might not have to materialize the entire set upon reaching first match ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org For additional commands, e-mail: commits-h...@druid.apache.org