This is an automated email from the ASF dual-hosted git repository.

ddanielr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new a0c7e4ae36 Makes inuse GC deletion default behavior (#4252)
a0c7e4ae36 is described below

commit a0c7e4ae36c68df418103118fe0dde367b8bb94c
Author: Daniel Roberts <ddani...@gmail.com>
AuthorDate: Mon Feb 12 14:09:38 2024 -0500

    Makes inuse GC deletion default behavior (#4252)
    
    Removes the property allowing disabling of the in-use GC candidate
    deletion feature
---
 .../org/apache/accumulo/core/conf/Property.java    |  4 -
 .../main/java/org/apache/accumulo/gc/GCRun.java    | 10 ---
 .../accumulo/gc/GarbageCollectionAlgorithm.java    |  4 +-
 .../accumulo/gc/GarbageCollectionEnvironment.java  | 10 +--
 .../apache/accumulo/gc/GarbageCollectionTest.java  | 88 ++++++++--------------
 5 files changed, 34 insertions(+), 82 deletions(-)

diff --git a/core/src/main/java/org/apache/accumulo/core/conf/Property.java 
b/core/src/main/java/org/apache/accumulo/core/conf/Property.java
index 9acce283cd..497486cb18 100644
--- a/core/src/main/java/org/apache/accumulo/core/conf/Property.java
+++ b/core/src/main/java/org/apache/accumulo/core/conf/Property.java
@@ -794,10 +794,6 @@ public enum Property {
       "The listening port for the garbage collector's monitor service.", 
"1.3.5"),
   GC_DELETE_THREADS("gc.threads.delete", "16", PropertyType.COUNT,
       "The number of threads used to delete RFiles and write-ahead logs.", 
"1.3.5"),
-  GC_REMOVE_IN_USE_CANDIDATES("gc.remove.in.use.candidates", "true", 
PropertyType.BOOLEAN,
-      "GC will remove deletion candidates that are in-use from the metadata 
location. "
-          + "This is expected to increase the speed of subsequent GC runs.",
-      "2.1.3"),
   GC_SAFEMODE("gc.safemode", "false", PropertyType.BOOLEAN,
       "Provides listing of files to be deleted but does not delete any 
files.", "2.1.0"),
   GC_USE_FULL_COMPACTION("gc.post.metadata.action", "flush", 
PropertyType.GC_POST_ACTION,
diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java 
b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java
index c210b58256..f02b85634e 100644
--- a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java
+++ b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java
@@ -472,16 +472,6 @@ public class GCRun implements GarbageCollectionEnvironment 
{
     return context.getConfiguration().getBoolean(Property.GC_SAFEMODE);
   }
 
-  /**
-   * Checks if InUse Candidates can be removed.
-   *
-   * @return value of {@link Property#GC_REMOVE_IN_USE_CANDIDATES}
-   */
-  @Override
-  public boolean canRemoveInUseCandidates() {
-    return 
context.getConfiguration().getBoolean(Property.GC_REMOVE_IN_USE_CANDIDATES);
-  }
-
   /**
    * Moves a file to trash. If this garbage collector is not using trash, this 
method returns false
    * and leaves the file alone. If the file is missing, this method returns 
false as opposed to
diff --git 
a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
 
b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
index ab9bff706e..eb6b8fe281 100644
--- 
a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
+++ 
b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
@@ -198,9 +198,7 @@ public class GarbageCollectionAlgorithm {
     Set<TableId> tableIdsAfter = gce.getCandidateTableIDs();
     ensureAllTablesChecked(Collections.unmodifiableSet(tableIdsBefore),
         Collections.unmodifiableSet(tableIdsSeen), 
Collections.unmodifiableSet(tableIdsAfter));
-    if (gce.canRemoveInUseCandidates()) {
-      gce.deleteGcCandidates(candidateEntriesToBeDeleted, 
GcCandidateType.INUSE);
-    }
+    gce.deleteGcCandidates(candidateEntriesToBeDeleted, GcCandidateType.INUSE);
   }
 
   private long removeBlipCandidates(GarbageCollectionEnvironment gce,
diff --git 
a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionEnvironment.java
 
b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionEnvironment.java
index 1d2b1e6391..3c373e92b7 100644
--- 
a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionEnvironment.java
+++ 
b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionEnvironment.java
@@ -28,7 +28,6 @@ import java.util.SortedMap;
 import java.util.stream.Stream;
 
 import org.apache.accumulo.core.client.TableNotFoundException;
-import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.data.TableId;
 import org.apache.accumulo.core.gc.GcCandidate;
 import org.apache.accumulo.core.gc.Reference;
@@ -48,13 +47,6 @@ public interface GarbageCollectionEnvironment {
    */
   Iterator<GcCandidate> getCandidates() throws TableNotFoundException;
 
-  /**
-   * Used for determining if deletion of InUse candidates is enabled.
-   *
-   * @return value of {@link Property#GC_REMOVE_IN_USE_CANDIDATES}
-   */
-  boolean canRemoveInUseCandidates();
-
   /**
    * Given an iterator to a deletion candidate list, return a sub-list of 
candidates which fit
    * within provided memory constraints.
@@ -111,7 +103,7 @@ public interface GarbageCollectionEnvironment {
       throws TableNotFoundException;
 
   /**
-   * Delete in-use reference candidates based on property settings
+   * Delete in-use reference candidates
    *
    * @param GcCandidates Collection of deletion reference candidates to remove.
    */
diff --git 
a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java 
b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java
index 71e4ed9dbf..4c459f150c 100644
--- a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java
+++ b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java
@@ -60,8 +60,6 @@ public class GarbageCollectionTest {
 
     ArrayList<GcCandidate> fileDeletions = new ArrayList<>();
     ArrayList<TableId> tablesDirsToDelete = new ArrayList<>();
-    boolean deleteInUseRefs = false;
-
     private long timestamp = 0L;
 
     private final Ample.DataLevel level;
@@ -90,11 +88,6 @@ public class GarbageCollectionTest {
       return List.copyOf(candidates).iterator();
     }
 
-    @Override
-    public boolean canRemoveInUseCandidates() {
-      return deleteInUseRefs;
-    }
-
     @Override
     public List<GcCandidate> 
readCandidatesThatFitInMemory(Iterator<GcCandidate> candidatesIter) {
       List<GcCandidate> candidatesBatch = new ArrayList<>();
@@ -233,8 +226,7 @@ public class GarbageCollectionTest {
   }
 
   private void assertNoCandidatesRemoved(TestGCE gce) {
-    assertEquals(0, gce.deletedCandidates.size(),
-        "Deleted Candidates not empty: " + gce.deleteInUseRefs);
+    assertEquals(0, gce.deletedCandidates.size(), "Deleted Candidates not 
empty");
   }
 
   private void assertCandidateRemoved(TestGCE gce, GcCandidateType 
gcCandidateType,
@@ -242,8 +234,7 @@ public class GarbageCollectionTest {
     for (GcCandidate gcCandidate : gcCandidates) {
       assertEquals(gcCandidateType, gce.deletedCandidates.remove(gcCandidate));
     }
-    assertEquals(0, gce.deletedCandidates.size(),
-        "Deleted Candidates not empty: " + gce.deleteInUseRefs);
+    assertEquals(0, gce.deletedCandidates.size(), "Deleted Candidates not 
empty");
   }
 
   // This test was created to help track down a 
ConcurrentModificationException error that was
@@ -274,7 +265,7 @@ public class GarbageCollectionTest {
 
     var candOne = 
gce.addCandidate("hdfs://foo:6000/accumulo/tables/4/t0/F000.rf");
     var candTwo = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F001.rf");
-    gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/5/t0/F005.rf");
+    var candThree = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/5/t0/F005.rf");
 
     gce.addFileReference("4", null, 
"hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf");
     gce.addFileReference("4", null, 
"hdfs://foo:6000/accumulo/tables/4/t0/F001.rf");
@@ -285,10 +276,12 @@ public class GarbageCollectionTest {
 
     gca.collect(gce);
     assertFileDeleted(gce);
+    assertCandidateRemoved(gce, GcCandidateType.INUSE, candOne, candTwo, 
candThree);
 
     // Remove the reference to this flush file, run the GC which should not 
trim it from the
     // candidates, and assert that it's gone
     gce.removeFileReference("4", null, 
"hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf");
+    candOne = gce.addCandidate("hdfs://foo:6000/accumulo/tables/4/t0/F000.rf");
     gca.collect(gce);
     assertFileDeleted(gce, candOne);
 
@@ -297,16 +290,11 @@ public class GarbageCollectionTest {
     gca.collect(gce);
     assertFileDeleted(gce);
 
-    // Remove the reference to a file in the candidates should cause it to be 
removed
-    gce.removeFileReference("4", null, 
"hdfs://foo:6000/accumulo/tables/4/t0/F001.rf");
+    // Adding more candidates which do not have references that should be 
removed
+    var candFour = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf");
+    var candFive = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf");
     gca.collect(gce);
-    assertFileDeleted(gce, candTwo);
-
-    // Adding more candidates which do not have references should be removed
-    var candThree = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf");
-    var candFour = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf");
-    gca.collect(gce);
-    assertFileDeleted(gce, candThree, candFour);
+    assertFileDeleted(gce, candFour, candFive);
 
   }
 
@@ -323,7 +311,7 @@ public class GarbageCollectionTest {
 
     var candOne = 
gce.addCandidate("hdfs://foo:6000/accumulo/tables/4/t0/F000.rf");
     var candTwo = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F001.rf");
-    gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/5/t0/F005.rf");
+    var candThree = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/5/t0/F005.rf");
 
     int counter = 0;
     // items to be removed from candidates
@@ -362,10 +350,12 @@ public class GarbageCollectionTest {
 
     gca.collect(gce);
     assertFileDeleted(gce, toBeRemoved);
+    assertCandidateRemoved(gce, GcCandidateType.INUSE, candOne, candTwo, 
candThree);
 
-    // Remove the reference to this flush file, run the GC which should not 
trim it from the
-    // candidates, and assert that it's gone
+    // Remove the reference to this flush file, add the candidate, run the GC 
and assert that it's
+    // gone
     gce.removeFileReference("4", null, 
"hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf");
+    candOne = gce.addCandidate("hdfs://foo:6000/accumulo/tables/4/t0/F000.rf");
     gca.collect(gce);
     assertFileDeleted(gce, candOne);
 
@@ -376,14 +366,15 @@ public class GarbageCollectionTest {
 
     // Remove the reference to a file in the candidates should cause it to be 
removed
     gce.removeFileReference("4", null, 
"hdfs://foo:6000/accumulo/tables/4/t0/F001.rf");
+    candTwo = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F001.rf");
     gca.collect(gce);
     assertFileDeleted(gce, candTwo);
 
     // Adding more candidates which do no have references should be removed
-    var candThree = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf");
-    var candFour = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf");
+    var candFour = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F003.rf");
+    var candFive = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F004.rf");
     gca.collect(gce);
-    assertFileDeleted(gce, candThree, candFour);
+    assertFileDeleted(gce, candFour, candFive);
   }
 
   /**
@@ -424,10 +415,6 @@ public class GarbageCollectionTest {
 
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
 
-    // All candidates currently have references
-    gca.collect(gce);
-    assertFileDeleted(gce);
-
     List<String[]> refsToRemove = new ArrayList<>();
     refsToRemove.add(new String[] {"4", "/t0/F000.rf"});
     refsToRemove.add(new String[] {"5", "../4/t0/F000.rf"});
@@ -440,24 +427,33 @@ public class GarbageCollectionTest {
       gca.collect(gce);
       assertFileDeleted(gce);
     }
+    // InUse references have been removed
+    assertCandidateRemoved(gce, GcCandidateType.INUSE, candOne, candTwo, 
candThree);
 
     gce.removeFileReference(refsToRemove.get(2)[0], null, 
refsToRemove.get(2)[1]);
+    var cand = gce.addCandidate(refsToRemove.get(2)[1]);
     gca.collect(gce);
-    assertFileDeleted(gce, candOne);
+    assertFileDeleted(gce, cand);
 
     gce.removeFileReference("4", null, "/t0/F001.rf");
+    candThree = gce.addCandidate("/4/t0/F001.rf");
     gca.collect(gce);
     assertFileDeleted(gce, candThree);
 
-    // add absolute candidate for file that already has a relative candidate
+    // add an absolute candidate for file that already has a relative candidate
     var candFour = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F002.rf");
     gca.collect(gce);
     assertFileDeleted(gce);
+    assertCandidateRemoved(gce, GcCandidateType.INUSE, candFour);
 
+    // Re-add the absolute candidate
+    candFour = 
gce.addCandidate("hdfs://foo.com:6000/accumulo/tables/4/t0/F002.rf");
     gce.removeFileReference("4", null, "/t0/F002.rf");
     gca.collect(gce);
     assertFileDeleted(gce, candFour);
 
+    // Finally re-add the relative candidate to remove the last file
+    candTwo = gce.addCandidate("/4/t0/F002.rf");
     gca.collect(gce);
     assertFileDeleted(gce, candTwo);
   }
@@ -836,16 +832,9 @@ public class GarbageCollectionTest {
     gce.addFileReference("6", null, 
"hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf");
 
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
-    gce.deleteInUseRefs = false;
     // All candidates currently have references
     gca.collect(gce);
     assertFileDeleted(gce);
-    assertNoCandidatesRemoved(gce);
-
-    // Enable InUseRefs to be removed if the file ref is found.
-    gce.deleteInUseRefs = true;
-    gca.collect(gce);
-    assertFileDeleted(gce);
     assertCandidateRemoved(gce, GcCandidateType.INUSE, candidate);
 
     var cand1 = gce.addCandidate("/9/t0/F003.rf");
@@ -879,17 +868,9 @@ public class GarbageCollectionTest {
     gce.addFileReference("+r", null, 
"hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf");
 
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
-    gce.deleteInUseRefs = false;
-    // No InUse Candidates should be removed.
-    gca.collect(gce);
-    assertFileDeleted(gce);
-    assertNoCandidatesRemoved(gce);
-
-    gce.deleteInUseRefs = true;
-    // Due to the gce Datalevel of ROOT, InUse candidate deletion is not 
supported regardless of
-    // property setting.
     gca.collect(gce);
     assertFileDeleted(gce);
+    // Due to the gce Datalevel of ROOT, InUse candidate deletion is not 
supported
     assertNoCandidatesRemoved(gce);
 
     gce.removeFileReference("+r", null, "/t0/F000.rf");
@@ -908,7 +889,7 @@ public class GarbageCollectionTest {
     TestGCE gce = new TestGCE();
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
 
-    // Check expected starting state.
+    // Check the expected starting state.
     assertEquals(0, gce.candidates.size());
 
     // Ensure that dir candidates still work
@@ -930,14 +911,11 @@ public class GarbageCollectionTest {
 
     assertEquals(0, gce.candidates.size());
 
-    // Now enable InUse deletions
-    gce.deleteInUseRefs = true;
-
     // Add deletion candidate for a directory.
     var candidate = new GcCandidate("6/t-0/", 10L);
     gce.candidates.add(candidate);
 
-    // Then create a InUse candidate for a file in that directory.
+    // Then create an InUse candidate for a file in that directory.
     gce.addFileReference("6", null, "/t-0/F003.rf");
     var removedCandidate = gce.addCandidate("6/t-0/F003.rf");
 
@@ -961,8 +939,6 @@ public class GarbageCollectionTest {
     gce.addFileReference("4", null, "/t0/F000.rf");
 
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
-    gce.deleteInUseRefs = true;
-
     gca.collect(gce);
     assertFileDeleted(gce, candTwo);
     assertCandidateRemoved(gce, GcCandidateType.INUSE, candOne);

Reply via email to