This is an automated email from the ASF dual-hosted git repository.

jmark99 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new 83211c2  Refactor listSplits operation when using maxSplits (#2381)
83211c2 is described below

commit 83211c207cc74c30e25433b595026d31752ba2be
Author: Mark Owens <jmar...@apache.org>
AuthorDate: Mon Dec 13 09:17:35 2021 -0500

    Refactor listSplits operation when using maxSplits (#2381)
    
    Refactored listSplits method in TableOperationsImpl. This change affects 
the listSplits command which takes maxSplits as an option.
    
    * Renamed variable names to enhance readability.
    * Added documentation for the method.
    * Replaced while-loop with if-loop after determining while-loop was only 
run at most once each time.
    * Created IT test for method in ShellIT class.
    
    Closes #2371
    
    Co-authored-by: Keith Turner <ktur...@apache.org>
---
 .../core/clientImpl/TableOperationsImpl.java       | 66 ++++++++++++++++------
 .../java/org/apache/accumulo/test/ShellIT.java     | 23 ++++++++
 2 files changed, 71 insertions(+), 18 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java
 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java
index c95910f..202860a 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java
@@ -689,28 +689,59 @@ public class TableOperationsImpl extends 
TableOperationsHelper {
 
   }
 
+  /**
+   * This version of listSplits is called when the maxSplits options is 
provided. If the value of
+   * maxSplits is greater than the number of existing splits, then all splits 
are returned and no
+   * additional processing is performed.
+   *
+   * But, if the value of maxSplits is less than the number of existing 
splits, maxSplit split
+   * values are returned. These split values are "evenly" selected from the 
existing splits based
+   * upon the algorithm implemented in the method.
+   *
+   * A stepSize is calculated based upon the number of splits requested and 
the total split count. A
+   * running sum adjusted by this stepSize is calculated as each split is 
parsed. Once this sum
+   * exceeds a value of 1, the current split point is selected to be returned. 
The sum is then
+   * decremented by 1 and the process continues until all existing splits have 
been parsed or
+   * maxSplits splits have been selected.
+   *
+   * @param tableName
+   *          the name of the table
+   * @param maxSplits
+   *          specifies the maximum number of splits to return
+   * @return a Collection containing a subset of evenly selected splits
+   */
   @Override
-  public Collection<Text> listSplits(String tableName, int maxSplits)
+  public Collection<Text> listSplits(final String tableName, final int 
maxSplits)
       throws TableNotFoundException, AccumuloSecurityException {
     // tableName is validated in _listSplits
-    List<Text> endRows = _listSplits(tableName);
-    if (endRows.size() <= maxSplits)
-      return endRows;
-
-    double r = (maxSplits + 1) / (double) (endRows.size());
-    double pos = 0;
-    ArrayList<Text> subset = new ArrayList<>(maxSplits);
-    int j = 0;
-    for (int i = 0; i < endRows.size() && j < maxSplits; i++) {
-      pos += r;
-      while (pos > 1) {
-        subset.add(endRows.get(i));
-        j++;
-        pos -= 1;
-      }
+    final List<Text> existingSplits = _listSplits(tableName);
+
+    // As long as maxSplits is equal to or larger than the number of current 
splits, the existing
+    // splits are returned and no additional processing is necessary.
+    if (existingSplits.size() <= maxSplits) {
+      return existingSplits;
     }
 
-    return subset;
+    // When the number of maxSplits requested is less than the number of 
existing splits, the
+    // following code populates the splitsSubset list 'evenly' from the 
existing splits
+    ArrayList<Text> splitsSubset = new ArrayList<>(maxSplits);
+    final int SELECTION_THRESHOLD = 1;
+
+    // stepSize can never be greater than 1 due to the if-loop check above.
+    final double stepSize = (maxSplits + 1) / (double) (existingSplits.size());
+    double selectionTrigger = 0.0;
+
+    for (Text existingSplit : existingSplits) {
+      if (splitsSubset.size() >= maxSplits) {
+        break;
+      }
+      selectionTrigger += stepSize;
+      if (selectionTrigger > SELECTION_THRESHOLD) {
+        splitsSubset.add(existingSplit);
+        selectionTrigger -= 1;
+      }
+    }
+    return splitsSubset;
   }
 
   @Override
@@ -727,7 +758,6 @@ public class TableOperationsImpl extends 
TableOperationsHelper {
       // should not happen
       throw new AssertionError(e);
     }
-
   }
 
   @Override
diff --git a/test/src/main/java/org/apache/accumulo/test/ShellIT.java 
b/test/src/main/java/org/apache/accumulo/test/ShellIT.java
index 0136f6c..86c9cb7 100644
--- a/test/src/main/java/org/apache/accumulo/test/ShellIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/ShellIT.java
@@ -589,4 +589,27 @@ public class ShellIT extends SharedMiniClusterBase {
     }
   }
 
+  // Test the maxSplits option for getsplits/listsplits.
+  @Test
+  public void testMaxSplitsOption() throws Exception {
+    Shell.log.debug("Starting testMaxSplits test ------------------");
+    exec("createtable maxtab", true);
+    exec("addsplits 0 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s 
t", true);
+    exec("getsplits -m 31", true,
+        
"0\n1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\n");
+    exec("getsplits -m 30", true,
+        
"0\n1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\n");
+    exec("getsplits -m 29", true,
+        
"1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\n");
+    exec("getsplits -m 15", true, 
"1\n3\n5\n7\n9\nb\nd\nf\ng\ni\nk\nm\no\nq\ns\n");
+    exec("getsplits -m 10", true, "2\n5\n8\na\nd\ng\nj\nl\no\nr\n");
+    exec("getsplits -m 5", true, "5\na\nf\nk\np\n");
+    exec("getsplits -m 3", true, "7\nf\nm\n");
+    exec("getsplits -m 1", true, "f\n");
+    // if 0 is supplied as maxSplits, the non-maxSplits version of getsplits 
is called and all
+    // are returned.
+    exec("getsplits -m 0", true,
+        
"0\n1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\n");
+  }
+
 }

Reply via email to