This is an automated email from the ASF dual-hosted git repository. jmark99 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push: new 83211c2 Refactor listSplits operation when using maxSplits (#2381) 83211c2 is described below commit 83211c207cc74c30e25433b595026d31752ba2be Author: Mark Owens <jmar...@apache.org> AuthorDate: Mon Dec 13 09:17:35 2021 -0500 Refactor listSplits operation when using maxSplits (#2381) Refactored listSplits method in TableOperationsImpl. This change affects the listSplits command which takes maxSplits as an option. * Renamed variable names to enhance readability. * Added documentation for the method. * Replaced while-loop with if-loop after determining while-loop was only run at most once each time. * Created IT test for method in ShellIT class. Closes #2371 Co-authored-by: Keith Turner <ktur...@apache.org> --- .../core/clientImpl/TableOperationsImpl.java | 66 ++++++++++++++++------ .../java/org/apache/accumulo/test/ShellIT.java | 23 ++++++++ 2 files changed, 71 insertions(+), 18 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java index c95910f..202860a 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/TableOperationsImpl.java @@ -689,28 +689,59 @@ public class TableOperationsImpl extends TableOperationsHelper { } + /** + * This version of listSplits is called when the maxSplits options is provided. If the value of + * maxSplits is greater than the number of existing splits, then all splits are returned and no + * additional processing is performed. + * + * But, if the value of maxSplits is less than the number of existing splits, maxSplit split + * values are returned. These split values are "evenly" selected from the existing splits based + * upon the algorithm implemented in the method. + * + * A stepSize is calculated based upon the number of splits requested and the total split count. A + * running sum adjusted by this stepSize is calculated as each split is parsed. Once this sum + * exceeds a value of 1, the current split point is selected to be returned. The sum is then + * decremented by 1 and the process continues until all existing splits have been parsed or + * maxSplits splits have been selected. + * + * @param tableName + * the name of the table + * @param maxSplits + * specifies the maximum number of splits to return + * @return a Collection containing a subset of evenly selected splits + */ @Override - public Collection<Text> listSplits(String tableName, int maxSplits) + public Collection<Text> listSplits(final String tableName, final int maxSplits) throws TableNotFoundException, AccumuloSecurityException { // tableName is validated in _listSplits - List<Text> endRows = _listSplits(tableName); - if (endRows.size() <= maxSplits) - return endRows; - - double r = (maxSplits + 1) / (double) (endRows.size()); - double pos = 0; - ArrayList<Text> subset = new ArrayList<>(maxSplits); - int j = 0; - for (int i = 0; i < endRows.size() && j < maxSplits; i++) { - pos += r; - while (pos > 1) { - subset.add(endRows.get(i)); - j++; - pos -= 1; - } + final List<Text> existingSplits = _listSplits(tableName); + + // As long as maxSplits is equal to or larger than the number of current splits, the existing + // splits are returned and no additional processing is necessary. + if (existingSplits.size() <= maxSplits) { + return existingSplits; } - return subset; + // When the number of maxSplits requested is less than the number of existing splits, the + // following code populates the splitsSubset list 'evenly' from the existing splits + ArrayList<Text> splitsSubset = new ArrayList<>(maxSplits); + final int SELECTION_THRESHOLD = 1; + + // stepSize can never be greater than 1 due to the if-loop check above. + final double stepSize = (maxSplits + 1) / (double) (existingSplits.size()); + double selectionTrigger = 0.0; + + for (Text existingSplit : existingSplits) { + if (splitsSubset.size() >= maxSplits) { + break; + } + selectionTrigger += stepSize; + if (selectionTrigger > SELECTION_THRESHOLD) { + splitsSubset.add(existingSplit); + selectionTrigger -= 1; + } + } + return splitsSubset; } @Override @@ -727,7 +758,6 @@ public class TableOperationsImpl extends TableOperationsHelper { // should not happen throw new AssertionError(e); } - } @Override diff --git a/test/src/main/java/org/apache/accumulo/test/ShellIT.java b/test/src/main/java/org/apache/accumulo/test/ShellIT.java index 0136f6c..86c9cb7 100644 --- a/test/src/main/java/org/apache/accumulo/test/ShellIT.java +++ b/test/src/main/java/org/apache/accumulo/test/ShellIT.java @@ -589,4 +589,27 @@ public class ShellIT extends SharedMiniClusterBase { } } + // Test the maxSplits option for getsplits/listsplits. + @Test + public void testMaxSplitsOption() throws Exception { + Shell.log.debug("Starting testMaxSplits test ------------------"); + exec("createtable maxtab", true); + exec("addsplits 0 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k l m n o p q r s t", true); + exec("getsplits -m 31", true, + "0\n1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\n"); + exec("getsplits -m 30", true, + "0\n1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\n"); + exec("getsplits -m 29", true, + "1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\n"); + exec("getsplits -m 15", true, "1\n3\n5\n7\n9\nb\nd\nf\ng\ni\nk\nm\no\nq\ns\n"); + exec("getsplits -m 10", true, "2\n5\n8\na\nd\ng\nj\nl\no\nr\n"); + exec("getsplits -m 5", true, "5\na\nf\nk\np\n"); + exec("getsplits -m 3", true, "7\nf\nm\n"); + exec("getsplits -m 1", true, "f\n"); + // if 0 is supplied as maxSplits, the non-maxSplits version of getsplits is called and all + // are returned. + exec("getsplits -m 0", true, + "0\n1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\n"); + } + }