keith-turner commented on code in PR #5620:
URL: https://github.com/apache/accumulo/pull/5620#discussion_r2135969182
##########
core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java:
##########
@@ -368,52 +374,49 @@ static int
getMaxTabletFiles(ServiceEnvironment.Configuration configuration) {
*/
private Collection<CompactableFile>
findFilesToCompactWithLowerRatio(PlanningParameters params,
long maxSizeToCompact, int maxTabletFiles) {
- double lowRatio = 1.0;
- double highRatio = params.getRatio();
-
- Preconditions.checkArgument(highRatio >= lowRatio);
var candidates = Set.copyOf(params.getCandidates());
- Collection<CompactableFile> found = Set.of();
-
- int goalCompactionSize = candidates.size() - maxTabletFiles + 1;
- if (goalCompactionSize > maxFilesToCompact) {
- // The tablet is way over max tablet files, so multiple compactions will
be needed. Therefore,
- // do not set a goal size for this compaction and find the largest
compaction ratio that will
- // compact some set of files.
- goalCompactionSize = 0;
- }
-
- // Do a binary search of the compaction ratios.
- while (highRatio - lowRatio > .1) {
- double ratioToCheck = (highRatio - lowRatio) / 2 + lowRatio;
-
- // This is continually resorting the list of files in the following
call, could optimize this
- var filesToCompact =
- findDataFilesToCompact(candidates, ratioToCheck, maxFilesToCompact,
maxSizeToCompact);
-
- log.trace("Tried ratio {} and found {} {} {}", ratioToCheck,
filesToCompact,
- filesToCompact.size() >= goalCompactionSize, goalCompactionSize);
+ List<CompactableFile> sortedFiles = sortAndLimitByMaxSize(candidates,
maxSizeToCompact);
+
+ List<CompactableFile> found = List.of();
+ double largestRatioSeen = Double.MIN_VALUE;
+
+ if (sortedFiles.size() > 1) {
+ int windowStart = 0;
+ int windowEnd = Math.min(sortedFiles.size(), maxFilesToCompact);
+
+ while (windowEnd <= sortedFiles.size()) {
+ var filesInWindow = sortedFiles.subList(windowStart, windowEnd);
+
+ long sum = filesInWindow.get(0).getEstimatedSize();
+ for (int i = 1; i < filesInWindow.size(); i++) {
+ long size = filesInWindow.get(i).getEstimatedSize();
+ sum += size;
+ // This is the compaction ratio needed to compact these files
+ double neededCompactionRatio = sum / (double) size;
Review Comment:
Bulk files should have an estimated size, the estimated entries is zero. It
would be good in general to defend against divide by zero here though.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]