This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch Fixes_for_getPartitionBoundaries
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git

commit 405cb7620d820319d12e18601997523861a64de7
Author: Lee Rhodes <[email protected]>
AuthorDate: Wed Oct 25 17:07:09 2023 -0700

    Initial fixes for getPartitionBoundaries.
---
 .../quantiles/ItemsSketchSortedView.java           |   3 +-
 .../quantilescommon/InequalitySearch.java          | 255 ++++++++++++++++++++-
 .../quantilescommon/QuantilesFloatsAPI.java        |   2 +-
 3 files changed, 245 insertions(+), 15 deletions(-)

diff --git 
a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java 
b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java
index d2ccf9fd..0dcd35fb 100644
--- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java
+++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java
@@ -130,8 +130,7 @@ public class ItemsSketchSortedView<T> implements 
GenericSortedView<T> {
     if (isEmpty()) { throw new 
IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
     QuantilesUtil.checkNormalizedRankBounds(rank);
     final int len = cumWeights.length;
-    final long naturalRank = (searchCrit == INCLUSIVE)
-        ? (long)Math.ceil(rank * totalN) : (long)Math.floor(rank * totalN);
+    final long naturalRank = Math.round(rank * totalN);
     final InequalitySearch crit = (searchCrit == INCLUSIVE) ? 
InequalitySearch.GE : InequalitySearch.GT;
     final int index = InequalitySearch.find(cumWeights, 0, len - 1, 
naturalRank, crit);
     if (index == -1) {
diff --git 
a/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java 
b/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java
index 5a61e525..51b01357 100644
--- 
a/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java
+++ 
b/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java
@@ -73,6 +73,11 @@ public enum InequalitySearch {
       return v <= arr[a] ? -1 : arr[b] < v ? 1 : 0;
     }
 
+    @Override
+    int compare(final long[] arr, final int a, final int b, final double v) {
+      return v <= arr[a] ? -1 : arr[b] < v ? 1 : 0;
+    }
+
     @Override
     int getIndex(final double[] arr, final int a, final int b, final double v) 
{
       return a;
@@ -88,6 +93,11 @@ public enum InequalitySearch {
       return a;
     }
 
+    @Override
+    int getIndex(final long[] arr, final int a, final int b, final double v) {
+      return a;
+    }
+
     @Override
     int resolve(final double[] arr, final int lo, final int hi, final double 
v) {
       return (lo == hi)
@@ -109,6 +119,13 @@ public enum InequalitySearch {
           : v > arr[hi] ? hi : (v > arr[lo] ? lo : -1);
     }
 
+    @Override
+    int resolve(final long[] arr, final int lo, final int hi, final double v) {
+      return (lo == hi)
+          ? (v > arr[lo] ? lo : -1)
+          : v > arr[hi] ? hi : (v > arr[lo] ? lo : -1);
+    }
+
     @Override
     public String desc(final double[] arr, final int low, final int high, 
final double v, final int idx) {
       if (idx == -1) {
@@ -150,6 +167,20 @@ public enum InequalitySearch {
       + ": arr[" + idx + "]=" + arr[idx] + " < " + v + " <= arr[" + (idx + 1) 
+ "]=" + arr[idx + 1]
       + "; return arr[" + idx + "]=" + arr[idx];
     }
+
+    @Override
+    public String desc(final long[] arr, final int low, final int high, final 
double v, final int idx) {
+      if (idx == -1) {
+        return "LT: " + v + " <= arr[" + low + "]=" + arr[low] + "; return -1";
+      }
+      if (idx == high) {
+        return "LT: " + v + " > arr[" + high + "]=" + arr[high]
+            + "; return arr[" + high + "]=" + arr[high];
+      } //idx < high
+      return "LT: " + v
+      + ": arr[" + idx + "]=" + arr[idx] + " < " + v + " <= arr[" + (idx + 1) 
+ "]=" + arr[idx + 1]
+      + "; return arr[" + idx + "]=" + arr[idx];
+    }
   },
 
   /**
@@ -179,6 +210,11 @@ public enum InequalitySearch {
       return v < arr[a] ? -1 : arr[b] <= v ? 1 : 0;
     }
 
+    @Override
+    int compare(final long[] arr, final int a, final int b, final double v) {
+      return v < arr[a] ? -1 : arr[b] <= v ? 1 : 0;
+    }
+
     @Override
     int getIndex(final double[] arr, final int a, final int b, final double v) 
{
       return a;
@@ -194,6 +230,11 @@ public enum InequalitySearch {
       return a;
     }
 
+    @Override
+    int getIndex(final long[] arr, final int a, final int b, final double v) {
+      return a;
+    }
+
     @Override
     int resolve(final double[] arr, final int lo, final int hi, final double 
v) {
       return (lo == hi)
@@ -215,6 +256,13 @@ public enum InequalitySearch {
           : v >= arr[hi] ? hi : (v >= arr[lo] ? lo : -1);
     }
 
+    @Override
+    int resolve(final long[] arr, final int lo, final int hi, final double v) {
+      return (lo == hi)
+          ? (v >= arr[lo] ? lo : -1)
+          : v >= arr[hi] ? hi : (v >= arr[lo] ? lo : -1);
+    }
+
     @Override
     public String desc(final double[] arr, final int low, final int high, 
final double v, final int idx) {
       if (idx == -1) {
@@ -256,6 +304,20 @@ public enum InequalitySearch {
       + ": arr[" + idx + "]=" + arr[idx] + " <= " + v + " < arr[" + (idx + 1) 
+ "]=" + arr[idx + 1]
           + "; return arr[" + idx + "]=" + arr[idx];
     }
+
+    @Override
+    public String desc(final long[] arr, final int low, final int high, final 
double v, final int idx) {
+      if (idx == -1) {
+        return "LE: " + v + " < arr[" + low + "]=" + arr[low] + "; return -1";
+      }
+      if (idx == high) {
+        return "LE: " + v + " >= arr[" + high + "]=" + arr[high]
+            + "; return arr[" + high + "]=" + arr[high];
+      }
+      return "LE: " + v
+      + ": arr[" + idx + "]=" + arr[idx] + " <= " + v + " < arr[" + (idx + 1) 
+ "]=" + arr[idx + 1]
+          + "; return arr[" + idx + "]=" + arr[idx];
+    }
   },
 
   /**
@@ -281,6 +343,11 @@ public enum InequalitySearch {
       return v < arr[a] ? -1 : arr[b] < v ? 1 : 0;
     }
 
+    @Override
+    int compare(final long[] arr, final int a, final int b, final double v) {
+      return v < arr[a] ? -1 : arr[b] < v ? 1 : 0;
+    }
+
     @Override
     int getIndex(final double[] arr, final int a, final int b, final double v) 
{
       return v == arr[a] ? a : v == arr[b] ? b : -1;
@@ -296,6 +363,11 @@ public enum InequalitySearch {
       return v == arr[a] ? a : v == arr[b] ? b : -1;
     }
 
+    @Override
+    int getIndex(final long[] arr, final int a, final int b, final double v) {
+      return v == arr[a] ? a : v == arr[b] ? b : -1;
+    }
+
     @Override
     int resolve(final double[] arr, final int lo, final int hi, final double 
v) {
       return (lo == hi)
@@ -317,6 +389,13 @@ public enum InequalitySearch {
           : v == arr[lo] ? lo : (v == arr[hi] ? hi : -1);
     }
 
+    @Override
+    int resolve(final long[] arr, final int lo, final int hi, final double v) {
+      return (lo == hi)
+          ? (v == arr[lo] ? lo : -1)
+          : v == arr[lo] ? lo : (v == arr[hi] ? hi : -1);
+    }
+
     @Override
     public String desc(final double[] arr, final int low, final int high, 
final double v, final int idx) {
       if (idx == -1) {
@@ -358,6 +437,20 @@ public enum InequalitySearch {
       }
       return "EQ: " + v + " == arr[" + idx + "]; return arr[" + idx + "]=" + 
arr[idx];
     }
+
+    @Override
+    public String desc(final long[] arr, final int low, final int high, final 
double v, final int idx) {
+      if (idx == -1) {
+        if (v > arr[high]) {
+          return "EQ: " + v + " > arr[" + high + "]; return -1";
+        }
+        if (v < arr[low]) {
+          return "EQ: " + v + " < arr[" + low + "]; return -1";
+        }
+        return "EQ: " + v + " Cannot be found within arr[" + low + "], arr[" + 
high + "]; return -1";
+      }
+      return "EQ: " + v + " == arr[" + idx + "]; return arr[" + idx + "]=" + 
arr[idx];
+    }
   },
 
   /**
@@ -387,6 +480,11 @@ public enum InequalitySearch {
       return v <= arr[a] ? -1 : arr[b] < v ? 1 : 0;
     }
 
+    @Override
+    int compare(final long[] arr, final int a, final int b, final double v) {
+      return v <= arr[a] ? -1 : arr[b] < v ? 1 : 0;
+    }
+
     @Override
     int getIndex(final double[] arr, final int a, final int b, final double v) 
{
       return b;
@@ -402,6 +500,11 @@ public enum InequalitySearch {
       return b;
     }
 
+    @Override
+    int getIndex(final long[] arr, final int a, final int b, final double v) {
+      return b;
+    }
+
     @Override
     int resolve(final double[] arr, final int lo, final int hi, final double 
v) {
       return (lo == hi)
@@ -423,6 +526,13 @@ public enum InequalitySearch {
           : v <= arr[lo] ? lo : (v <= arr[hi] ? hi : -1);
     }
 
+    @Override
+    int resolve(final long[] arr, final int lo, final int hi, final double v) {
+      return (lo == hi)
+          ? (v <= arr[lo] ? lo : -1)
+          : v <= arr[lo] ? lo : (v <= arr[hi] ? hi : -1);
+    }
+
     @Override
     public String desc(final double[] arr, final int low, final int high, 
final double v, final int idx) {
       if (idx == -1) {
@@ -464,6 +574,20 @@ public enum InequalitySearch {
       + ": arr[" + (idx - 1) + "]=" + arr[idx - 1] + " < " + v + " <= arr[" + 
idx + "]=" + arr[idx]
           + "; return arr[" + idx + "]=" + arr[idx];
     }
+
+    @Override
+    public String desc(final long[] arr, final int low, final int high, final 
double v, final int idx) {
+      if (idx == -1) {
+        return "GE: " + v + " > arr[" + high + "]=" + arr[high] + "; return 
-1";
+      }
+      if (idx == low) {
+        return "GE: " + v + " <= arr[" + low + "]=" + arr[low]
+            + "; return arr[" + low + "]=" + arr[low];
+      } //idx > low
+      return "GE: " + v
+      + ": arr[" + (idx - 1) + "]=" + arr[idx - 1] + " < " + v + " <= arr[" + 
idx + "]=" + arr[idx]
+          + "; return arr[" + idx + "]=" + arr[idx];
+    }
   },
 
   /**
@@ -493,6 +617,11 @@ public enum InequalitySearch {
       return v < arr[a] ? -1 : arr[b] <= v ? 1 : 0;
     }
 
+    @Override
+    int compare(final long[] arr, final int a, final int b, final double v) {
+      return v < arr[a] ? -1 : arr[b] <= v ? 1 : 0;
+    }
+
     @Override
     int getIndex(final double[] arr, final int a, final int b, final double v) 
{
       return b;
@@ -508,6 +637,11 @@ public enum InequalitySearch {
       return b;
     }
 
+    @Override
+    int getIndex(final long[] arr, final int a, final int b, final double v) {
+      return b;
+    }
+
     @Override
     int resolve(final double[] arr, final int lo, final int hi, final double 
v) {
       return (lo == hi)
@@ -529,6 +663,13 @@ public enum InequalitySearch {
           : v < arr[lo] ? lo : (v < arr[hi] ? hi : -1);
     }
 
+    @Override
+    int resolve(final long[] arr, final int lo, final int hi, final double v) {
+      return (lo == hi)
+          ? (v < arr[lo] ? lo : -1)
+          : v < arr[lo] ? lo : (v < arr[hi] ? hi : -1);
+    }
+
     @Override
     public String desc(final double[] arr, final int low, final int high, 
final double v, final int idx) {
       if (idx == -1) {
@@ -570,14 +711,28 @@ public enum InequalitySearch {
       + ": arr[" + (idx - 1) + "]=" + arr[idx - 1] + " <= " + v + " < arr[" + 
idx + "]=" + arr[idx]
           + "; return arr[" + idx + "]=" + arr[idx];
     }
+
+    @Override
+    public String desc(final long[] arr, final int low, final int high, final 
double v, final int idx) {
+      if (idx == -1) {
+        return "GT: " + v + " >= arr[" + high + "]=" + arr[high] + "; return 
-1";
+      }
+      if (idx == low) {
+        return "GT: " + v + " < arr[" + low + "]=" + arr[low]
+            + "; return arr[" + low + "]=" + arr[low];
+      } //idx > low
+      return "GT: " + v
+      + ": arr[" + (idx - 1) + "]=" + arr[idx - 1] + " <= " + v + " < arr[" + 
idx + "]=" + arr[idx]
+          + "; return arr[" + idx + "]=" + arr[idx];
+    }
   };
 
   /**
    * The call to compare index a and index b with the value v.
-   * @param arr The underlying sorted array of double values
+   * @param arr The underlying sorted array of values
    * @param a the lower index of the current pair
    * @param b the higher index of the current pair
-   * @param v the double value to search for
+   * @param v the value to search for
    * @return +1, which means we must search higher in the array, or -1, which 
means we must
    * search lower in the array, or 0, which means we have found the correct 
bounding pair.
    */
@@ -585,10 +740,10 @@ public enum InequalitySearch {
 
   /**
    * The call to compare index a and index b with the value v.
-   * @param arr The underlying sorted array of float values
+   * @param arr The underlying sorted array of values
    * @param a the lower index of the current pair
    * @param b the higher index of the current pair
-   * @param v the float value to search for
+   * @param v the value to search for
    * @return +1, which means we must search higher in the array, or -1, which 
means we must
    * search lower in the array, or 0, which means we have found the correct 
bounding pair.
    */
@@ -596,15 +751,26 @@ public enum InequalitySearch {
 
   /**
    * The call to compare index a and index b with the value v.
-   * @param arr The underlying sorted array of long values
+   * @param arr The underlying sorted array of values
    * @param a the lower index of the current pair
    * @param b the higher index of the current pair
-   * @param v the long value to search for
+   * @param v the value to search for
    * @return +1, which means we must search higher in the array, or -1, which 
means we must
    * search lower in the array, or 0, which means we have found the correct 
bounding pair.
    */
   abstract int compare(long[] arr, int a, int b, long v);
 
+  /**
+   * The call to compare index a and index b with the value v.
+   * @param arr The underlying sorted array of values
+   * @param a the lower index of the current pair
+   * @param b the higher index of the current pair
+   * @param v the value to search for
+   * @return +1, which means we must search higher in the array, or -1, which 
means we must
+   * search lower in the array, or 0, which means we have found the correct 
bounding pair.
+   */
+  abstract int compare(long[] arr, int a, int b, double v);
+
   /**
    * If the compare operation returns 0, which means "found", this returns the 
index of the
    * found value that satisfies the selected criteria.
@@ -638,6 +804,17 @@ public enum InequalitySearch {
    */
   abstract int getIndex(long[] arr, int a, int b, long v);
 
+  /**
+   * If the compare operation returns 0, which means "found", this returns the 
index of the
+   * found value that satisfies the selected criteria.
+   * @param arr the array being searched
+   * @param a the lower index of the current pair
+   * @param b the higher index of the current pair
+   * @param v the value being searched for.
+   * @return the index of the found value that satisfies the selected criteria.
+   */
+  abstract int getIndex(long[] arr, int a, int b, double v);
+
   /**
    * Called to resolve the search when the hi and lo pointers are equal or 
adjacent.
    * @param arr the array being searched
@@ -668,13 +845,23 @@ public enum InequalitySearch {
    */
   abstract int resolve(long[] arr, int lo, int hi, long v);
 
+  /**
+   * Called to resolve the search when the hi and lo pointers are equal or 
adjacent.
+   * @param arr the array being searched
+   * @param lo the current lo value
+   * @param hi the current hi value
+   * @param v the value being searched for
+   * @return the index of the resolution or -1, if it cannot be resolved.
+   */
+  abstract int resolve(long[] arr, int lo, int hi, double v);
+
   /**
    * Optional call that describes the details of the results of the search.
    * Used primarily for debugging.
-   * @param arr The underlying sorted array of double values
+   * @param arr The underlying sorted array of values
    * @param low the low index of the range
    * @param high the high index of the range
-   * @param v the double value to search for
+   * @param v the value to search for
    * @param idx the resolved index from the search
    * @return the descriptive string.
    */
@@ -683,10 +870,10 @@ public enum InequalitySearch {
   /**
    * Optional call that describes the details of the results of the search.
    * Used primarily for debugging.
-   * @param arr The underlying sorted array of double values
+   * @param arr The underlying sorted array of values
    * @param low the low index of the range
    * @param high the high index of the range
-   * @param v the double value to search for
+   * @param v the value to search for
    * @param idx the resolved index from the search
    * @return the descriptive string.
    */
@@ -695,15 +882,27 @@ public enum InequalitySearch {
   /**
    * Optional call that describes the details of the results of the search.
    * Used primarily for debugging.
-   * @param arr The underlying sorted array of double values
+   * @param arr The underlying sorted array of values
    * @param low the low index of the range
    * @param high the high index of the range
-   * @param v the double value to search for
+   * @param v the value to search for
    * @param idx the resolved index from the search
    * @return the descriptive string.
    */
   public abstract String desc(long[] arr, int low, int high, long v, int idx);
 
+  /**
+   * Optional call that describes the details of the results of the search.
+   * Used primarily for debugging.
+   * @param arr The underlying sorted array of values
+   * @param low the low index of the range
+   * @param high the high index of the range
+   * @param v the value to search for
+   * @param idx the resolved index from the search
+   * @return the descriptive string.
+   */
+  public abstract String desc(long[] arr, int low, int high, double v, int 
idx);
+
   /**
    * Binary Search for the index of the double value in the given search range 
that satisfies
    * the given InequalitySearch criterion.
@@ -804,4 +1003,36 @@ public enum InequalitySearch {
     return -1; //should never return here
   }
 
+  /**
+   * Binary Search for the index of the double value in the given search range 
that satisfies
+   * the given InequalitySearch criterion.
+   * If -1 is returned there are no values in the search range that satisfy 
the criterion.
+   *
+   * @param arr the given array that must be sorted.
+   * @param low the lowest index of the lowest value in the search range, 
inclusive.
+   * @param high the highest index of the highest value in the search range, 
inclusive.
+   * @param v the value to search for.
+   * @param crit one of LT, LE, EQ, GT, GE
+   * @return the index of the value in the given search range that satisfies 
the criterion
+   */
+  public static int find(final long[] arr, final int low, final int high,
+      final double v, final InequalitySearch crit) {
+    Objects.requireNonNull(arr, "Input arr must not be null");
+    Objects.requireNonNull(crit, "Input crit must not be null");
+    if (arr.length == 0) { throw new SketchesArgumentException("Input array 
must not be empty."); }
+    int lo = low;
+    int hi = high;
+    while (lo <= hi) {
+      if (hi - lo <= 1) {
+        return crit.resolve(arr, lo, hi, v);
+      }
+      final int mid = lo + (hi - lo) / 2;
+      final int ret = crit.compare(arr, mid, mid + 1, v);
+      if (ret == -1 ) { hi = mid; }
+      else if (ret == 1) { lo = mid + 1; }
+      else  { return crit.getIndex(arr, mid, mid + 1, v); }
+    }
+    return -1; //should never return here
+  }
+
 } //End of enum
diff --git 
a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java 
b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java
index ddece47e..aebdc46b 100644
--- 
a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java
+++ 
b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java
@@ -221,7 +221,7 @@ public interface QuantilesFloatsAPI extends QuantilesAPI {
    * Gets the lower bound of the quantile confidence interval in which the 
quantile of the
    * given rank exists.
    *
-   * <p>Although it is possible to estimate the probablity that the true 
quantile
+   * <p>Although it is possible to estimate the probability that the true 
quantile
    * exists within the quantile confidence interval specified by the upper and 
lower quantile bounds,
    * it is not possible to guarantee the width of the quantile confidence 
interval
    * as an additive or multiplicative percent of the true quantile.</p>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to