This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch methods_to_compute_partition_limits
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git

commit 20166c7f681a272bd9277d7eaa490829c50c5000
Author: Lee Rhodes <[email protected]>
AuthorDate: Thu Apr 11 21:37:02 2024 -0700

    temp commit
---
 .../apache/datasketches/kll/KllItemsSketch.java    | 17 +++--
 .../datasketches/partitions/Partitioner.java       |  4 +-
 .../apache/datasketches/quantiles/ItemsSketch.java | 17 +++--
 .../GenericPartitionBoundaries.java                | 45 ++++++++++---
 .../quantilescommon/GenericSortedView.java         |  2 +-
 .../quantilescommon/ItemsSketchSortedView.java     | 46 +++++++------
 .../quantilescommon/PartitionBoundaries.java       | 67 -------------------
 .../quantilescommon/PartitioningFeature.java       | 78 ++++++++++++++++++----
 .../quantilescommon/QuantilesGenericAPI.java       |  2 +-
 .../quantilescommon/SketchPartitionLimits.java     | 63 +++++++++++++++++
 .../datasketches/quantiles/ItemsSketchTest.java    |  4 +-
 .../quantilescommon/CrossCheckQuantilesTest.java   |  6 +-
 .../quantilescommon/PartitionBoundariesTest.java   | 18 +++--
 13 files changed, 228 insertions(+), 141 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java 
b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java
index 9f5a5ae7..efcca934 100644
--- a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java
@@ -156,11 +156,21 @@ public abstract class KllItemsSketch<T> extends KllSketch 
implements QuantilesGe
   }
 
   @Override
-  public GenericPartitionBoundaries<T> getPartitionBoundaries(final int 
numEquallySized,
+  public GenericPartitionBoundaries<T> getPartitionBoundariesFromNumParts(
+      final int numEquallySizedParts,
       final QuantileSearchCriteria searchCrit) {
     if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); }
     refreshSortedView();
-    return itemsSV.getPartitionBoundaries(numEquallySized, searchCrit);
+    return itemsSV.getPartitionBoundariesFromNumParts(numEquallySizedParts, 
searchCrit);
+  }
+
+  @Override
+  public GenericPartitionBoundaries<T> getPartitionBoundariesFromPartSize(
+      final long nominalPartSizeItems,
+      final QuantileSearchCriteria searchCrit) {
+    if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); }
+    refreshSortedView();
+    return itemsSV.getPartitionBoundariesFromPartSize(nominalPartSizeItems, 
searchCrit);
   }
 
   @Override
@@ -424,9 +434,8 @@ public abstract class KllItemsSketch<T> extends KllSketch 
implements QuantilesGe
       quantiles = (T[]) Array.newInstance(serDe.getClassOfT(), numQuantiles);
       cumWeights = new long[numQuantiles];
       populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles);
-      final double normRankErr = getNormalizedRankError(getK(), true);
       return new ItemsSketchSortedView(
-          quantiles, cumWeights, getN(), comparator, getMaxItem(), 
getMinItem(), normRankErr);
+          quantiles, cumWeights, getN(), comparator, getMaxItem(), 
getMinItem());
     }
 
     private void populateFromSketch(final Object[] srcQuantiles, final int[] 
srcLevels,
diff --git a/src/main/java/org/apache/datasketches/partitions/Partitioner.java 
b/src/main/java/org/apache/datasketches/partitions/Partitioner.java
index be1247ca..66030fb2 100644
--- a/src/main/java/org/apache/datasketches/partitions/Partitioner.java
+++ b/src/main/java/org/apache/datasketches/partitions/Partitioner.java
@@ -117,7 +117,7 @@ public class Partitioner<T, S extends 
QuantilesGenericAPI<T> & PartitioningFeatu
     this.numLevels = (int)max(1, ceil(log(guessNumParts) / 
log(maxPartsPerSk)));
     final int partsPerSk = (int)round(pow(guessNumParts, 1.0 / numLevels));
     this.partitionsPerSk = min(partsPerSk, maxPartsPerSk);
-    final GenericPartitionBoundaries<T> gpb = 
sk.getPartitionBoundaries(partitionsPerSk, criteria);
+    final GenericPartitionBoundaries<T> gpb = 
sk.getPartitionBoundariesFromNumParts(partitionsPerSk, criteria);
     final StackElement<T> se = new StackElement<>(gpb, 0, "1");
     stack.push(se);
     partitionSearch(stack);
@@ -144,7 +144,7 @@ public class Partitioner<T, S extends 
QuantilesGenericAPI<T> & PartitioningFeatu
       if (++se.part <= numParts) {
         final PartitionBoundsRow<T> row = new PartitionBoundsRow<>(se);
         final S sk = fillReq.getRange(row.lowerBound, row.upperBound, 
row.rule);
-        final GenericPartitionBoundaries<T> gpb2 = 
sk.getPartitionBoundaries(this.partitionsPerSk, criteria);
+        final GenericPartitionBoundaries<T> gpb2 = 
sk.getPartitionBoundariesFromNumParts(this.partitionsPerSk, criteria);
         final int level = stack.size() + 1;
         final String partId = se.levelPartId + "." + se.part + "," + level;
         final StackElement<T> se2 = new StackElement<>(gpb2, 0, partId);
diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java 
b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java
index 9361c6cd..3d2d3388 100644
--- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java
+++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java
@@ -277,11 +277,21 @@ public final class ItemsSketch<T> implements 
QuantilesGenericAPI<T> {
   }
 
   @Override
-  public GenericPartitionBoundaries<T> getPartitionBoundaries(final int 
numEquallySized,
+  public GenericPartitionBoundaries<T> getPartitionBoundariesFromNumParts(
+      final int numEquallySizedParts,
       final QuantileSearchCriteria searchCrit) {
     if (isEmpty()) { throw new 
IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
     refreshSortedView();
-    return classicQisSV.getPartitionBoundaries(numEquallySized, searchCrit);
+    return 
classicQisSV.getPartitionBoundariesFromNumParts(numEquallySizedParts, 
searchCrit);
+  }
+
+  @Override
+  public GenericPartitionBoundaries<T> getPartitionBoundariesFromPartSize(
+      final long nominalPartSizeItems,
+      final QuantileSearchCriteria searchCrit) {
+    if (isEmpty()) { throw new 
IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
+    refreshSortedView();
+    return 
classicQisSV.getPartitionBoundariesFromPartSize(nominalPartSizeItems, 
searchCrit);
   }
 
   @Override
@@ -656,9 +666,8 @@ public final class ItemsSketch<T> implements 
QuantilesGenericAPI<T> {
       throw new SketchesStateException("Sorted View is misconfigured. TotalN 
does not match cumWeights.");
     }
 
-    final double normRankErr = getNormalizedRankError(sk.getK(), true);
     return new ItemsSketchSortedView<>(
-        svQuantiles, svCumWeights, sk.getN(), comparator, sk.getMaxItem(), 
sk.getMinItem(), normRankErr);
+        svQuantiles, svCumWeights, sk.getN(), comparator, sk.getMaxItem(), 
sk.getMinItem());
 
   }
 
diff --git 
a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java
 
b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java
index b21cde77..ee53e910 100644
--- 
a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java
+++ 
b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java
@@ -25,9 +25,10 @@ import static 
org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INC
 import org.apache.datasketches.common.SketchesStateException;
 
 /**
- * Implements PartitionBoundaries
+ * This defines the returned results of the getParitionBoundaries() function 
and
+ * includes the basic methods needed to construct actual partitions.
  */
-final public class GenericPartitionBoundaries<T> implements 
PartitionBoundaries {
+public final class GenericPartitionBoundaries<T> {
   private long totalN; //totalN of source sketch
   private T[] boundaries;       //quantiles at the boundaries
   private long[] natRanks;      //natural ranks at the boundaries
@@ -36,7 +37,7 @@ final public class GenericPartitionBoundaries<T> implements 
PartitionBoundaries
   private T minItem;            //of the source sketch
   private QuantileSearchCriteria searchCrit; //of the source sketch query to 
getPartitionBoundaries.
   //computed
-  private long[] numDeltaItems; //num of items in each part
+  private long[] numDeltaItems; //num of items in each partition
   private int numPartitions;    //num of partitions
 
   public GenericPartitionBoundaries(
@@ -48,7 +49,7 @@ final public class GenericPartitionBoundaries<T> implements 
PartitionBoundaries
       final T minItem,
       final QuantileSearchCriteria searchCrit) {
     this.totalN = totalN;
-    this.boundaries = boundaries; //SpotBugs EI_EXPOSE_REP2 copying from 
sketch class to this "friend" class.
+    this.boundaries = boundaries; //SpotBugs EI_EXPOSE_REP2 OK: copying from 
sketch class to this "friend" class.
     this.natRanks = natRanks;     // "
     this.normRanks = normRanks;   // "
     this.maxItem = maxItem;
@@ -56,7 +57,7 @@ final public class GenericPartitionBoundaries<T> implements 
PartitionBoundaries
     this.searchCrit = searchCrit;
     //check and compute
     final int len = boundaries.length;
-    if (len < 2) { throw new SketchesStateException("Source sketch is empty"); 
}
+    if (len < 2) { throw new SketchesStateException("Source sketch is empty"); 
} //class is final, this is ok
     numDeltaItems = new long[len];
     numDeltaItems[0] = 0; // index 0 is always 0
     for (int i = 1; i < len; i++) {
@@ -67,7 +68,10 @@ final public class GenericPartitionBoundaries<T> implements 
PartitionBoundaries
     this.numPartitions = len - 1;
   }
 
-  @Override
+  /**
+   * Gets the length of the input stream offered to the underlying sketch.
+   * @return the length of the input stream offered to the underlying sketch.
+   */
   public long getN() { return totalN; }
 
   /**
@@ -100,16 +104,32 @@ final public class GenericPartitionBoundaries<T> 
implements PartitionBoundaries
    */
   public T[] getBoundaries() { return boundaries.clone(); }
 
-  @Override
+  /**
+   * Gets an ordered array of natural ranks of the associated array of 
partition boundaries utilizing
+   * a specified search criterion. Natural ranks are integral values on the 
interval [1, N]
+   * @return an array of natural ranks.
+   */
   public long[] getNaturalRanks() { return natRanks.clone(); }
 
-  @Override
+  /**
+   * Gets an ordered array of normalized ranks of the associated array of 
partition boundaries utilizing
+   * a specified search criterion. Normalized ranks are double values on the 
interval [0.0, 1.0].
+   * @return an array of normalized ranks.
+   */
   public double[] getNormalizedRanks() { return normRanks.clone(); }
 
-  @Override
+  /**
+   * Gets the number of items to be included for each partition as an array.
+   * The count at index 0 is 0.  The number of items included in the first 
partition, defined by the boundaries at
+   * index 0 and index 1, is at index 1 in this array, etc.
+   * @return the number of items to be included for each partition as an array.
+   */
   public long[] getNumDeltaItems() { return numDeltaItems.clone(); }
 
-  @Override
+  /**
+   * Gets the number of partitions
+   * @return the number of partitions
+   */
   public int getNumPartitions() { return numPartitions; }
 
   /**
@@ -130,7 +150,10 @@ final public class GenericPartitionBoundaries<T> 
implements PartitionBoundaries
    */
   public T getMinItem() { return minItem; }
 
-  @Override
+  /**
+   * Gets the search criteria specified for the source sketch
+   * @return The search criteria specified for the source sketch
+   */
   public QuantileSearchCriteria getSearchCriteria() { return searchCrit; }
 
 }
diff --git 
a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java 
b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java
index 1c54395f..4f9da76a 100644
--- 
a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java
+++ 
b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java
@@ -30,7 +30,7 @@ import 
org.apache.datasketches.common.SketchesArgumentException;
  * @author Alexander Saydakov
  * @author Lee Rhodes
  */
-public interface GenericSortedView<T> extends PartitioningFeature<T>, 
SortedView {
+public interface GenericSortedView<T>  extends PartitioningFeature<T>, 
SketchPartitionLimits, SortedView  {
 
   /**
    * Returns an approximation to the Cumulative Distribution Function (CDF) of 
the input stream
diff --git 
a/src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java
 
b/src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java
index 5fbcf434..57735c6d 100644
--- 
a/src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java
+++ 
b/src/main/java/org/apache/datasketches/quantilescommon/ItemsSketchSortedView.java
@@ -19,6 +19,7 @@
 
 package org.apache.datasketches.quantilescommon;
 
+import static java.lang.Math.min;
 import static 
org.apache.datasketches.quantilescommon.GenericInequalitySearch.find;
 import static 
org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
 import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
@@ -38,7 +39,6 @@ import 
org.apache.datasketches.quantilescommon.GenericInequalitySearch.Inequalit
  * @author Lee Rhodes
  */
 public class ItemsSketchSortedView<T> implements GenericSortedView<T> {
-  private static final double PARTITIONING_ERROR_FACTOR = 2.0;
   private final T[] quantiles;
   private final long[] cumWeights; //cumulative natural weights
   private final long totalN;
@@ -46,7 +46,6 @@ public class ItemsSketchSortedView<T> implements 
GenericSortedView<T> {
   private final T maxItem;
   private final T minItem;
   private final Class<T> clazz;
-  private final double normRankErr;//assumes PMF type error
 
   /**
    * Construct from elements, also used in testing.
@@ -56,7 +55,6 @@ public class ItemsSketchSortedView<T> implements 
GenericSortedView<T> {
    * @param comparator the Comparator for type T
    * @param maxItem of type T
    * @param minItem of type T
-   * @param normRankErr the normalized rank error of the originating sketch.
    */
   @SuppressWarnings("unchecked")
   public ItemsSketchSortedView(
@@ -65,8 +63,7 @@ public class ItemsSketchSortedView<T> implements 
GenericSortedView<T> {
       final long totalN,
       final Comparator<? super T> comparator,
       final T maxItem,
-      final T minItem,
-      final double normRankErr) {
+      final T minItem) {
     this.quantiles = quantiles;
     this.cumWeights = cumWeights;
     this.totalN = totalN;
@@ -74,7 +71,6 @@ public class ItemsSketchSortedView<T> implements 
GenericSortedView<T> {
     this.maxItem = maxItem;
     this.minItem = minItem;
     this.clazz = (Class<T>)quantiles[0].getClass();
-    this.normRankErr = normRankErr;
   }
 
   //end of constructors
@@ -118,29 +114,35 @@ public class ItemsSketchSortedView<T> implements 
GenericSortedView<T> {
   }
 
   @Override
-  @SuppressWarnings("unchecked")
-  public GenericPartitionBoundaries<T> getPartitionBoundaries(final int 
numEquallySized,
+  public GenericPartitionBoundaries<T> getPartitionBoundariesFromPartSize(
+      final long nominalPartitionSize,
       final QuantileSearchCriteria searchCrit) {
     if (isEmpty()) { throw new 
SketchesArgumentException(QuantilesAPI.EMPTY_MSG); }
-    final long totalN = this.totalN;
-    final int maxParts = (int) (totalN / Math.ceil(normRankErr * 
PARTITIONING_ERROR_FACTOR) );
-    final int svLen = cumWeights.length;
-
-    if (numEquallySized > maxParts) {
+    final long partSizeItems = getMinPartitionSizeItems();
+    if (nominalPartitionSize < partSizeItems) {
       throw new SketchesArgumentException(QuantilesAPI.UNSUPPORTED_MSG
-          + "The requested number of partitions is too large for the 'k' of 
this sketch "
-          + "if it exceeds the maximum number of partitions allowed by the 
error threshold for the 'k' of this sketch."
-          + "Requested Partitions: " + numEquallySized + " > " + maxParts);
+          + " The requested nominal partition size is too small for this 
sketch.");
     }
-    if (numEquallySized > svLen / 2.0) {
+    final long totalN = this.totalN;
+    final int numEquallySizedParts = (int) min(totalN / partSizeItems, 
getMaxPartitions());
+    return getPartitionBoundariesFromNumParts(numEquallySizedParts);
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public GenericPartitionBoundaries<T> getPartitionBoundariesFromNumParts(
+      final int numEquallySizedParts,
+      final QuantileSearchCriteria searchCrit) {
+    if (isEmpty()) { throw new 
SketchesArgumentException(QuantilesAPI.EMPTY_MSG); }
+    final int maxParts = getMaxPartitions();
+    if (numEquallySizedParts > maxParts) {
       throw new SketchesArgumentException(QuantilesAPI.UNSUPPORTED_MSG
-          + "The requested number of partitions is too large for the number of 
retained items "
-          + "if it exceeds maximum number of retained items divided by 2."
-          + "Requested Partitions: " + numEquallySized + " > "
-          + "Retained Items / 2: " + (svLen / 2));
+          + " The requested number of partitions is too large for this 
sketch.");
     }
+    final long totalN = this.totalN;
+    final int svLen = cumWeights.length;
 
-    final double[] searchNormRanks = evenlySpacedDoubles(0, 1.0, 
numEquallySized + 1);
+    final double[] searchNormRanks = evenlySpacedDoubles(0, 1.0, 
numEquallySizedParts + 1);
     final int partArrLen = searchNormRanks.length;
     final T[] partQuantiles = (T[]) Array.newInstance(clazz, partArrLen);
     final long[] partNatRanks = new long[partArrLen];
diff --git 
a/src/main/java/org/apache/datasketches/quantilescommon/PartitionBoundaries.java
 
b/src/main/java/org/apache/datasketches/quantilescommon/PartitionBoundaries.java
deleted file mode 100644
index e3c59d2c..00000000
--- 
a/src/main/java/org/apache/datasketches/quantilescommon/PartitionBoundaries.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.datasketches.quantilescommon;
-
-/**
- * This defines a set of results computed from the getParitionBoundaries() 
function and
- * encapsulates the basic methods needed to construct actual partitions based 
on generic items.
- */
-public interface PartitionBoundaries {
-
-  /**
-   * Gets the length of the input stream offered to the underlying sketch.
-   * @return the length of the input stream offered to the underlying sketch.
-   */
-  long getN();
-
-  /**
-   * Gets an ordered array of natural ranks of the associated array of 
partition boundaries utilizing
-   * a specified search criterion. Natural ranks are integral values on the 
interval [1, N]
-   * @return an array of natural ranks.
-   */
-  long[] getNaturalRanks();
-
-  /**
-   * Gets an ordered array of normalized ranks of the associated array of 
partition boundaries utilizing
-   * a specified search criterion. Normalized ranks are double values on the 
interval [0.0, 1.0].
-   * @return an array of normalized ranks.
-   */
-  double[] getNormalizedRanks();
-
-  /**
-   * Gets the number of items to be included for each partition as an array.
-   * The count at index 0 is 0.  The number of items included in the first 
partition, defined by the boundaries at
-   * index 0 and index 1, is at index 1 in this array, etc.
-   * @return the number of items to be included for each partition as an array.
-   */
-  long[] getNumDeltaItems();
-
-  /**
-   * Gets the number of partitions
-   * @return the number of partitions
-   */
-  int getNumPartitions();
-
-  /**
-   * Gets the search criteria specified for the source sketch
-   * @return The search criteria specified for the source sketch
-   */
-  QuantileSearchCriteria getSearchCriteria();
-}
diff --git 
a/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java
 
b/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java
index 2c36bb10..3d35cfe9 100644
--- 
a/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java
+++ 
b/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java
@@ -33,13 +33,15 @@ public interface PartitioningFeature<T> {
    * refers to an approximately equal number of items per partition.
    *
    * <p>This method is equivalent to
-   * {@link #getPartitionBoundaries(int, QuantileSearchCriteria) 
getPartitionBoundaries(numEquallySized, INCLUSIVE)}.
+   * {@link #getPartitionBoundariesFromNumParts(int, QuantileSearchCriteria)
+   * getPartitionBoundariesFromNumParts(numEquallySizedParts, INCLUSIVE)}.
    * </p>
    *
-   * @param numEquallySized an integer that specifies the number of equally 
sized partitions between
+   * @param numEquallySizedParts an integer that specifies the number of 
equally sized partitions between
    * {@link GenericPartitionBoundaries#getMinItem() getMinItem()} and
    * {@link GenericPartitionBoundaries#getMaxItem() getMaxItem()}.
-   * This must be a positive integer greater than zero.
+   * This must be a positive integer less than
+   * {@link SketchPartitionLimits#getMaxPartitions() getMaxPartitions()}
    * <ul>
    * <li>A 1 will return: minItem, maxItem.</li>
    * <li>A 2 will return: minItem, median quantile, maxItem.</li>
@@ -47,11 +49,12 @@ public interface PartitioningFeature<T> {
    * </ul>
    *
    * @return an instance of {@link GenericPartitionBoundaries 
GenericPartitionBoundaries}.
-   * @throws IllegalArgumentException if sketch is empty.
-   * @throws IllegalArgumentException if <i>numEquallySized</i> is less than 1.
+   * @throws SketchesArgumentException if sketch is empty.
+   * @throws SketchesArgumentException if <i>numEquallySized</i> is greater 
than
+   * {@link SketchPartitionLimits#getMaxPartitions() getMaxPartitions()}
    */
-  default GenericPartitionBoundaries<T> getPartitionBoundaries(int 
numEquallySized) {
-    return getPartitionBoundaries(numEquallySized, INCLUSIVE);
+  default GenericPartitionBoundaries<T> getPartitionBoundariesFromNumParts(int 
numEquallySizedParts) {
+    return getPartitionBoundariesFromNumParts(numEquallySizedParts, INCLUSIVE);
   }
 
   /**
@@ -60,10 +63,11 @@ public interface PartitioningFeature<T> {
    * sufficient information for the user to create the given number of equally 
sized partitions, where "equally sized"
    * refers to an approximately equal number of items per partition.
    *
-   * @param numEquallySized an integer that specifies the number of equally 
sized partitions between
+   * @param numEquallySizedParts an integer that specifies the number of 
equally sized partitions between
    * {@link GenericPartitionBoundaries#getMinItem() getMinItem()} and
    * {@link GenericPartitionBoundaries#getMaxItem() getMaxItem()}.
-   * This must be a positive integer greater than zero.
+   * This must be a positive integer less than
+   * {@link SketchPartitionLimits#getMaxPartitions() getMaxPartitions()}
    * <ul>
    * <li>A 1 will return: minItem, maxItem.</li>
    * <li>A 2 will return: minItem, median quantile, maxItem.</li>
@@ -77,9 +81,59 @@ public interface PartitioningFeature<T> {
    * with the exception of the highest returned quantile, which is the upper 
boundary of the highest ranked partition.
    *
    * @return an instance of {@link GenericPartitionBoundaries 
GenericPartitionBoundaries}.
-   * @throws IllegalArgumentException if sketch is empty.
-   * @throws IllegalArgumentException if <i>numEquallySized</i> is less than 1.
+   * @throws SketchesArgumentException if sketch is empty.
+   * @throws SketchesArgumentException if <i>numEquallySized</i> is greater 
than
+   * {@link SketchPartitionLimits#getMaxPartitions() getMaxPartitions()}
    */
-  GenericPartitionBoundaries<T> getPartitionBoundaries(int numEquallySized, 
QuantileSearchCriteria searchCrit);
+  GenericPartitionBoundaries<T> getPartitionBoundariesFromNumParts(
+      int numEquallySizedParts, QuantileSearchCriteria searchCrit);
+
+  /**
+   * This method returns an instance of
+   * {@link GenericPartitionBoundaries GenericPartitionBoundaries} which 
provides
+   * sufficient information for the user to create the given number of equally 
sized partitions, where "equally sized"
+   * refers to an approximately equal number of items per partition.
+   *
+   * <p>This method is equivalent to
+   * {@link #getPartitionBoundariesFromPartSize(long, QuantileSearchCriteria)
+   * getPartitionBoundariesFromPartSize(nominalPartSizeItems, INCLUSIVE)}.
+   * </p>
+   *
+   * @param nominalPartSizeItems an integer that specifies the nominal size, 
in items, of each target partition.
+   * This must be a positive integer greater than
+   * {@link SketchPartitionLimits#getMinPartitionSizeItems() 
getMinPartitionSizeItems()}
+   *
+   * @return an instance of {@link GenericPartitionBoundaries 
GenericPartitionBoundaries}.
+   * @throws SketchesArgumentException if sketch is empty.
+   * @throws SketchesArgumentException if <i>nominalPartSizeItems</i> is less 
than
+   * {@link SketchPartitionLimits#getMinPartitionSizeItems() 
getMinPartitionSizeItems()}
+   */
+  default GenericPartitionBoundaries<T> 
getPartitionBoundariesFromPartSize(long nominalPartSizeItems) {
+    return getPartitionBoundariesFromPartSize(nominalPartSizeItems, INCLUSIVE);
+  }
+
+  /**
+   * This method returns an instance of
+   * {@link GenericPartitionBoundaries GenericPartitionBoundaries} which 
provides
+   * sufficient information for the user to create the given number of equally 
sized partitions, where "equally sized"
+   * refers to an approximately equal number of items per partition.
+   *
+   * @param nominalPartSizeItems an integer that specifies the nominal size, 
in items, of each target partition.
+   * This must be a positive integer greater than
+   * {@link SketchPartitionLimits#getMinPartitionSizeItems() 
getMinPartitionSizeItems()}.
+   *
+   * @param searchCrit
+   * If INCLUSIVE, all the returned quantiles are the upper boundaries of the 
equally sized partitions
+   * with the exception of the lowest returned quantile, which is the lowest 
boundary of the lowest ranked partition.
+   * If EXCLUSIVE, all the returned quantiles are the lower boundaries of the 
equally sized partitions
+   * with the exception of the highest returned quantile, which is the upper 
boundary of the highest ranked partition.
+   *
+   * @return an instance of {@link GenericPartitionBoundaries 
GenericPartitionBoundaries}.
+   * @throws SketchesArgumentException if sketch is empty.
+   * @throws SketchesArgumentException if <i>nominalPartSizeItems</i> is less 
than
+   * {@link SketchPartitionLimits#getMinPartitionSizeItems() 
getMinPartitionSizeItems()}
+   */
+  GenericPartitionBoundaries<T> getPartitionBoundariesFromPartSize(
+      long nominalPartSizeItems, QuantileSearchCriteria searchCrit);
 
 }
diff --git 
a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java
 
b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java
index 404ec7a7..d422f15c 100644
--- 
a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java
+++ 
b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java
@@ -27,7 +27,7 @@ import static 
org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INC
  * @param <T> The given item type
  * @author Lee Rhodes
  */
-public interface QuantilesGenericAPI<T> extends QuantilesAPI, 
PartitioningFeature<T> {
+public interface QuantilesGenericAPI<T> extends QuantilesAPI, 
PartitioningFeature<T>, SketchPartitionLimits {
 
   /**
    * This is equivalent to {@link #getCDF(Object[], QuantileSearchCriteria) 
getCDF(splitPoints, INCLUSIVE)}
diff --git 
a/src/main/java/org/apache/datasketches/quantilescommon/SketchPartitionLimits.java
 
b/src/main/java/org/apache/datasketches/quantilescommon/SketchPartitionLimits.java
new file mode 100644
index 00000000..578f142a
--- /dev/null
+++ 
b/src/main/java/org/apache/datasketches/quantilescommon/SketchPartitionLimits.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.quantilescommon;
+
+import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
+
+import org.apache.datasketches.common.SketchesArgumentException;
+
+/**
+ * This defines the methods required to compute the partition limits.
+ */
+public interface SketchPartitionLimits {
+
+  /**
+   * Gets the maximum number of partitions this sketch will support based on 
the configured size <i>K</i>
+   * and the number of retained values of this sketch.
+   * @return the maximum number of partitions this sketch will support.
+   */
+  default int getMaxPartitions() {
+    return getNumRetained() / 2;
+  }
+
+  /**
+   * Gets the minimum partition size in items this sketch will support based 
on the configured size <i>K</i> of this
+   * sketch and the number of retained values of this sketch.
+   * @return the minimum partition size in items this sketch will support.
+   */
+  default long getMinPartitionSizeItems() {
+    final long totalN = getN();
+    if (totalN <= 0) { throw new SketchesArgumentException(EMPTY_MSG); }
+    return totalN / getMaxPartitions();
+  }
+
+  /**
+   * Gets the length of the input stream offered to the sketch..
+   * @return the length of the input stream offered to the sketch.
+   */
+  long getN();
+
+  /**
+   * Gets the number of quantiles retained by the sketch.
+   * @return the number of quantiles retained by the sketch
+   */
+  int getNumRetained();
+
+}
diff --git 
a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java 
b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java
index 02990ae0..69e73667 100644
--- a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java
@@ -19,7 +19,6 @@
 
 package org.apache.datasketches.quantiles;
 
-import static org.apache.datasketches.quantiles.PreambleUtil.DEFAULT_K;
 import static 
org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.getString;
 import static 
org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE;
 import static 
org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
@@ -623,8 +622,7 @@ public class ItemsSketchTest {
     Double[] qArr = {8.0, 10.0, 10.0, 20.0};
     long[] cwArr =  {1,  3,  4,  5};
     Comparator<Double> comp = Comparator.naturalOrder();
-    final double normRankErr = ItemsSketch.getNormalizedRankError(DEFAULT_K, 
true);
-    ItemsSketchSortedView<Double> sv = new ItemsSketchSortedView<>(qArr, 
cwArr, 5L, comp, 20.0, 8.0, normRankErr);
+    ItemsSketchSortedView<Double> sv = new ItemsSketchSortedView<>(qArr, 
cwArr, 5L, comp, 20.0, 8.0);
     double[] ranks = {0, .1, .2, .3, .6, .7, .8, .9, 1.0};
     Double[] qOut = new Double[9];
     for (int i = 0; i < ranks.length; i++) {
diff --git 
a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
 
b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
index dd07ae60..00cd380f 100644
--- 
a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
+++ 
b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
@@ -39,7 +39,6 @@ import 
org.apache.datasketches.common.SketchesArgumentException;
 import org.apache.datasketches.kll.KllDoublesSketch;
 import org.apache.datasketches.kll.KllFloatsSketch;
 import org.apache.datasketches.kll.KllItemsSketch;
-import org.apache.datasketches.kll.KllSketch;
 import org.apache.datasketches.quantiles.DoublesSketch;
 import org.apache.datasketches.quantiles.ItemsSketch;
 import org.apache.datasketches.quantiles.UpdateDoublesSketch;
@@ -321,11 +320,10 @@ public class CrossCheckQuantilesTest {
     String svImin = svIValues[set][0];
 
     kllItemsSV = new ItemsSketchSortedView<>(svIValues[set], 
svCumWeights[set], totalN[set],
-        comparator, svImax, svImin, KllSketch.getNormalizedRankError(k, true));
+        comparator, svImax, svImin);
 
     classicItemsSV = new ItemsSketchSortedView<>(svIValues[set], 
svCumWeights[set], totalN[set],
-        comparator, svImax, svImin, ItemsSketch.getNormalizedRankError(k, 
true));
-
+        comparator, svImax, svImin);
   }
 
   /********BUILD DATA SETS**********/
diff --git 
a/src/test/java/org/apache/datasketches/quantilescommon/PartitionBoundariesTest.java
 
b/src/test/java/org/apache/datasketches/quantilescommon/PartitionBoundariesTest.java
index d4a3eb43..1849c04f 100644
--- 
a/src/test/java/org/apache/datasketches/quantilescommon/PartitionBoundariesTest.java
+++ 
b/src/test/java/org/apache/datasketches/quantilescommon/PartitionBoundariesTest.java
@@ -71,7 +71,7 @@ public class PartitionBoundariesTest {
       printf(rowdfmt2, j++, itr.getNormalizedRank(searchCrit), 
itr.getNaturalRank(searchCrit), itr.getQuantile());
     }
 
-    GenericPartitionBoundaries<String> gpb = 
sv.getPartitionBoundaries(numParts, searchCrit);
+    GenericPartitionBoundaries<String> gpb = 
sv.getPartitionBoundariesFromNumParts(numParts, searchCrit);
     int arrLen = gpb.getBoundaries().length;
     double[] normRanks = gpb.getNormalizedRanks();
     long[] natRanks = gpb.getNaturalRanks();
@@ -111,7 +111,7 @@ public class PartitionBoundariesTest {
       printf(rowdfmt2, j++, itr.getNormalizedRank(searchCrit), 
itr.getNaturalRank(searchCrit), itr.getQuantile());
     }
 
-    GenericPartitionBoundaries<String> gpb = 
sv.getPartitionBoundaries(numParts, searchCrit);
+    GenericPartitionBoundaries<String> gpb = 
sv.getPartitionBoundariesFromNumParts(numParts, searchCrit);
     int arrLen = gpb.getBoundaries().length;
     double[] normRanks = gpb.getNormalizedRanks();
     long[] natRanks = gpb.getNaturalRanks();
@@ -136,10 +136,10 @@ public class PartitionBoundariesTest {
     sketch.update("C");
     sketch.update("D");
     String[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, 
EXCLUSIVE);
-    String[] quantiles2 = sketch.getPartitionBoundaries(2, 
EXCLUSIVE).getBoundaries();
+    String[] quantiles2 = sketch.getPartitionBoundariesFromNumParts(2, 
EXCLUSIVE).getBoundaries();
     assertEquals(quantiles1, quantiles2);
     quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE);
-    quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).getBoundaries();
+    quantiles2 = sketch.getPartitionBoundariesFromNumParts(2, 
INCLUSIVE).getBoundaries();
     assertEquals(quantiles1, quantiles2);
   }
 
@@ -151,10 +151,10 @@ public class PartitionBoundariesTest {
     sketch.update(3);
     sketch.update(4);
     Integer[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, 
EXCLUSIVE);
-    Integer[] quantiles2 = sketch.getPartitionBoundaries(2, 
EXCLUSIVE).getBoundaries();
+    Integer[] quantiles2 = sketch.getPartitionBoundariesFromNumParts(2, 
EXCLUSIVE).getBoundaries();
     assertEquals(quantiles1, quantiles2);
     quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE);
-    quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).getBoundaries();
+    quantiles2 = sketch.getPartitionBoundariesFromNumParts(2, 
INCLUSIVE).getBoundaries();
     assertEquals(quantiles1, quantiles2);
   }
 
@@ -164,22 +164,20 @@ public class PartitionBoundariesTest {
    */
   @Test
   public void checkSimpleEndsAdjustment() {
-    final int k = 128;
     final String[] quantiles = {"2","4","6","7"};
     final long[] cumWeights = {2, 4, 6, 8};
     final long totalN = 8;
     final Comparator<String> comparator = Comparator.naturalOrder();
     final String maxItem = "8";
     final String minItem = "1";
-    final double normRankErr = ItemsSketch.getNormalizedRankError(k, true);
     ItemsSketchSortedView<String> sv = new ItemsSketchSortedView<>(
-        quantiles, cumWeights, totalN, comparator, maxItem, minItem, 
normRankErr);
+        quantiles, cumWeights, totalN, comparator, maxItem, minItem);
 
     GenericSortedViewIterator<String> itr = sv.iterator();
     while (itr.next()) {
       println(itr.getNaturalRank(INCLUSIVE) + ", " + 
itr.getQuantile(INCLUSIVE));
     }
-    GenericPartitionBoundaries<String> gpb = sv.getPartitionBoundaries(2);
+    GenericPartitionBoundaries<String> gpb = 
sv.getPartitionBoundariesFromNumParts(2);
     String[] boundaries = gpb.getBoundaries();
     long[] natRanks = gpb.getNaturalRanks();
     double[] normRanks = gpb.getNormalizedRanks();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to