This is an automated email from the ASF dual-hosted git repository. ddanielr pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push: new a0c7e4ae36 Makes inuse GC deletion default behavior (#4252) a0c7e4ae36 is described below commit a0c7e4ae36c68df418103118fe0dde367b8bb94c Author: Daniel Roberts <ddani...@gmail.com> AuthorDate: Mon Feb 12 14:09:38 2024 -0500 Makes inuse GC deletion default behavior (#4252) Removes the property allowing disabling of the in-use GC candidate deletion feature --- .../org/apache/accumulo/core/conf/Property.java | 4 - .../main/java/org/apache/accumulo/gc/GCRun.java | 10 --- .../accumulo/gc/GarbageCollectionAlgorithm.java | 4 +- .../accumulo/gc/GarbageCollectionEnvironment.java | 10 +-- .../apache/accumulo/gc/GarbageCollectionTest.java | 88 ++++++++-------------- 5 files changed, 34 insertions(+), 82 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/conf/Property.java b/core/src/main/java/org/apache/accumulo/core/conf/Property.java index 9acce283cd..497486cb18 100644 --- a/core/src/main/java/org/apache/accumulo/core/conf/Property.java +++ b/core/src/main/java/org/apache/accumulo/core/conf/Property.java @@ -794,10 +794,6 @@ public enum Property { "The listening port for the garbage collector's monitor service.", "1.3.5"), GC_DELETE_THREADS("gc.threads.delete", "16", PropertyType.COUNT, "The number of threads used to delete RFiles and write-ahead logs.", "1.3.5"), - GC_REMOVE_IN_USE_CANDIDATES("gc.remove.in.use.candidates", "true", PropertyType.BOOLEAN, - "GC will remove deletion candidates that are in-use from the metadata location. " - + "This is expected to increase the speed of subsequent GC runs.", - "2.1.3"), GC_SAFEMODE("gc.safemode", "false", PropertyType.BOOLEAN, "Provides listing of files to be deleted but does not delete any files.", "2.1.0"), GC_USE_FULL_COMPACTION("gc.post.metadata.action", "flush", PropertyType.GC_POST_ACTION, diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java index c210b58256..f02b85634e 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java @@ -472,16 +472,6 @@ public class GCRun implements GarbageCollectionEnvironment { return context.getConfiguration().getBoolean(Property.GC_SAFEMODE); } - /** - * Checks if InUse Candidates can be removed. - * - * @return value of {@link Property#GC_REMOVE_IN_USE_CANDIDATES} - */ - @Override - public boolean canRemoveInUseCandidates() { - return context.getConfiguration().getBoolean(Property.GC_REMOVE_IN_USE_CANDIDATES); - } - /** * Moves a file to trash. If this garbage collector is not using trash, this method returns false * and leaves the file alone. If the file is missing, this method returns false as opposed to diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java index ab9bff706e..eb6b8fe281 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java @@ -198,9 +198,7 @@ public class GarbageCollectionAlgorithm { Set<TableId> tableIdsAfter = gce.getCandidateTableIDs(); ensureAllTablesChecked(Collections.unmodifiableSet(tableIdsBefore), Collections.unmodifiableSet(tableIdsSeen), Collections.unmodifiableSet(tableIdsAfter)); - if (gce.canRemoveInUseCandidates()) { - gce.deleteGcCandidates(candidateEntriesToBeDeleted, GcCandidateType.INUSE); - } + gce.deleteGcCandidates(candidateEntriesToBeDeleted, GcCandidateType.INUSE); } private long removeBlipCandidates(GarbageCollectionEnvironment gce, diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionEnvironment.java b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionEnvironment.java index 1d2b1e6391..3c373e92b7 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionEnvironment.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionEnvironment.java @@ -28,7 +28,6 @@ import java.util.SortedMap; import java.util.stream.Stream; import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.gc.GcCandidate; import org.apache.accumulo.core.gc.Reference; @@ -48,13 +47,6 @@ public interface GarbageCollectionEnvironment { */ Iterator<GcCandidate> getCandidates() throws TableNotFoundException; - /** - * Used for determining if deletion of InUse candidates is enabled. - * - * @return value of {@link Property#GC_REMOVE_IN_USE_CANDIDATES} - */ - boolean canRemoveInUseCandidates(); - /** * Given an iterator to a deletion candidate list, return a sub-list of candidates which fit * within provided memory constraints. @@ -111,7 +103,7 @@ public interface GarbageCollectionEnvironment { throws TableNotFoundException; /** - * Delete in-use reference candidates based on property settings + * Delete in-use reference candidates * * @param GcCandidates Collection of deletion reference candidates to remove. */ diff --git a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java index 71e4ed9dbf..4c459f150c 100644 --- a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java +++ b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java @@ -60,8 +60,6 @@ public class GarbageCollectionTest { ArrayList<GcCandidate> fileDeletions = new ArrayList<>(); ArrayList<TableId> tablesDirsToDelete = new ArrayList<>(); - boolean deleteInUseRefs = false; - private long timestamp = 0L; private final Ample.DataLevel level; @@ -90,11 +88,6 @@ public class GarbageCollectionTest { return List.copyOf(candidates).iterator(); } - @Override - public boolean canRemoveInUseCandidates() { - return deleteInUseRefs; - } - @Override public List<GcCandidate> readCandidatesThatFitInMemory(Iterator<GcCandidate> candidatesIter) { List<GcCandidate> candidatesBatch = new ArrayList<>(); @@ -233,8 +226,7 @@ public class GarbageCollectionTest { } private void assertNoCandidatesRemoved(TestGCE gce) { - assertEquals(0, gce.deletedCandidates.size(), - "Deleted Candidates not empty: " + gce.deleteInUseRefs); + assertEquals(0, gce.deletedCandidates.size(), "Deleted Candidates not empty"); } private void assertCandidateRemoved(TestGCE gce, GcCandidateType gcCandidateType, @@ -242,8 +234,7 @@ public class GarbageCollectionTest { for (GcCandidate gcCandidate : gcCandidates) { assertEquals(gcCandidateType, gce.deletedCandidates.remove(gcCandidate)); } - assertEquals(0, gce.deletedCandidates.size(), - "Deleted Candidates not empty: " + gce.deleteInUseRefs); + assertEquals(0, gce.deletedCandidates.size(), "Deleted Candidates not empty"); } // This test was created to help track down a ConcurrentModificationException error that was @@ -274,7 +265,7 @@ public class GarbageCollectionTest { var candOne = gce.addCandidate("hdfs://foo:6000/accumulo/tables/4/t0/F000.rf"); var candTwo = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F001.rf"); - gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/5/t0/F005.rf"); + var candThree = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/5/t0/F005.rf"); gce.addFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf"); gce.addFileReference("4", null, "hdfs://foo:6000/accumulo/tables/4/t0/F001.rf"); @@ -285,10 +276,12 @@ public class GarbageCollectionTest { gca.collect(gce); assertFileDeleted(gce); + assertCandidateRemoved(gce, GcCandidateType.INUSE, candOne, candTwo, candThree); // Remove the reference to this flush file, run the GC which should not trim it from the // candidates, and assert that it's gone gce.removeFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf"); + candOne = gce.addCandidate("hdfs://foo:6000/accumulo/tables/4/t0/F000.rf"); gca.collect(gce); assertFileDeleted(gce, candOne); @@ -297,16 +290,11 @@ public class GarbageCollectionTest { gca.collect(gce); assertFileDeleted(gce); - // Remove the reference to a file in the candidates should cause it to be removed - gce.removeFileReference("4", null, "hdfs://foo:6000/accumulo/tables/4/t0/F001.rf"); + // Adding more candidates which do not have references that should be removed + var candFour = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf"); + var candFive = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf"); gca.collect(gce); - assertFileDeleted(gce, candTwo); - - // Adding more candidates which do not have references should be removed - var candThree = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf"); - var candFour = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf"); - gca.collect(gce); - assertFileDeleted(gce, candThree, candFour); + assertFileDeleted(gce, candFour, candFive); } @@ -323,7 +311,7 @@ public class GarbageCollectionTest { var candOne = gce.addCandidate("hdfs://foo:6000/accumulo/tables/4/t0/F000.rf"); var candTwo = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F001.rf"); - gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/5/t0/F005.rf"); + var candThree = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/5/t0/F005.rf"); int counter = 0; // items to be removed from candidates @@ -362,10 +350,12 @@ public class GarbageCollectionTest { gca.collect(gce); assertFileDeleted(gce, toBeRemoved); + assertCandidateRemoved(gce, GcCandidateType.INUSE, candOne, candTwo, candThree); - // Remove the reference to this flush file, run the GC which should not trim it from the - // candidates, and assert that it's gone + // Remove the reference to this flush file, add the candidate, run the GC and assert that it's + // gone gce.removeFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf"); + candOne = gce.addCandidate("hdfs://foo:6000/accumulo/tables/4/t0/F000.rf"); gca.collect(gce); assertFileDeleted(gce, candOne); @@ -376,14 +366,15 @@ public class GarbageCollectionTest { // Remove the reference to a file in the candidates should cause it to be removed gce.removeFileReference("4", null, "hdfs://foo:6000/accumulo/tables/4/t0/F001.rf"); + candTwo = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F001.rf"); gca.collect(gce); assertFileDeleted(gce, candTwo); // Adding more candidates which do no have references should be removed - var candThree = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf"); - var candFour = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf"); + var candFour = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf"); + var candFive = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf"); gca.collect(gce); - assertFileDeleted(gce, candThree, candFour); + assertFileDeleted(gce, candFour, candFive); } /** @@ -424,10 +415,6 @@ public class GarbageCollectionTest { GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); - // All candidates currently have references - gca.collect(gce); - assertFileDeleted(gce); - List<String[]> refsToRemove = new ArrayList<>(); refsToRemove.add(new String[] {"4", "/t0/F000.rf"}); refsToRemove.add(new String[] {"5", "../4/t0/F000.rf"}); @@ -440,24 +427,33 @@ public class GarbageCollectionTest { gca.collect(gce); assertFileDeleted(gce); } + // InUse references have been removed + assertCandidateRemoved(gce, GcCandidateType.INUSE, candOne, candTwo, candThree); gce.removeFileReference(refsToRemove.get(2)[0], null, refsToRemove.get(2)[1]); + var cand = gce.addCandidate(refsToRemove.get(2)[1]); gca.collect(gce); - assertFileDeleted(gce, candOne); + assertFileDeleted(gce, cand); gce.removeFileReference("4", null, "/t0/F001.rf"); + candThree = gce.addCandidate("/4/t0/F001.rf"); gca.collect(gce); assertFileDeleted(gce, candThree); - // add absolute candidate for file that already has a relative candidate + // add an absolute candidate for file that already has a relative candidate var candFour = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F002.rf"); gca.collect(gce); assertFileDeleted(gce); + assertCandidateRemoved(gce, GcCandidateType.INUSE, candFour); + // Re-add the absolute candidate + candFour = gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F002.rf"); gce.removeFileReference("4", null, "/t0/F002.rf"); gca.collect(gce); assertFileDeleted(gce, candFour); + // Finally re-add the relative candidate to remove the last file + candTwo = gce.addCandidate("/4/t0/F002.rf"); gca.collect(gce); assertFileDeleted(gce, candTwo); } @@ -836,16 +832,9 @@ public class GarbageCollectionTest { gce.addFileReference("6", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf"); GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); - gce.deleteInUseRefs = false; // All candidates currently have references gca.collect(gce); assertFileDeleted(gce); - assertNoCandidatesRemoved(gce); - - // Enable InUseRefs to be removed if the file ref is found. - gce.deleteInUseRefs = true; - gca.collect(gce); - assertFileDeleted(gce); assertCandidateRemoved(gce, GcCandidateType.INUSE, candidate); var cand1 = gce.addCandidate("/9/t0/F003.rf"); @@ -879,17 +868,9 @@ public class GarbageCollectionTest { gce.addFileReference("+r", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf"); GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); - gce.deleteInUseRefs = false; - // No InUse Candidates should be removed. - gca.collect(gce); - assertFileDeleted(gce); - assertNoCandidatesRemoved(gce); - - gce.deleteInUseRefs = true; - // Due to the gce Datalevel of ROOT, InUse candidate deletion is not supported regardless of - // property setting. gca.collect(gce); assertFileDeleted(gce); + // Due to the gce Datalevel of ROOT, InUse candidate deletion is not supported assertNoCandidatesRemoved(gce); gce.removeFileReference("+r", null, "/t0/F000.rf"); @@ -908,7 +889,7 @@ public class GarbageCollectionTest { TestGCE gce = new TestGCE(); GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); - // Check expected starting state. + // Check the expected starting state. assertEquals(0, gce.candidates.size()); // Ensure that dir candidates still work @@ -930,14 +911,11 @@ public class GarbageCollectionTest { assertEquals(0, gce.candidates.size()); - // Now enable InUse deletions - gce.deleteInUseRefs = true; - // Add deletion candidate for a directory. var candidate = new GcCandidate("6/t-0/", 10L); gce.candidates.add(candidate); - // Then create a InUse candidate for a file in that directory. + // Then create an InUse candidate for a file in that directory. gce.addFileReference("6", null, "/t-0/F003.rf"); var removedCandidate = gce.addCandidate("6/t-0/F003.rf"); @@ -961,8 +939,6 @@ public class GarbageCollectionTest { gce.addFileReference("4", null, "/t0/F000.rf"); GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); - gce.deleteInUseRefs = true; - gca.collect(gce); assertFileDeleted(gce, candTwo); assertCandidateRemoved(gce, GcCandidateType.INUSE, candOne);