gsmiller commented on code in PR #14238:
URL: https://github.com/apache/lucene/pull/14238#discussion_r1958713253
##########
lucene/facet/src/test/org/apache/lucene/facet/range/TestDynamicRangeUtil.java:
##########
@@ -76,13 +95,243 @@ public void
testComputeDynamicNumericRangesWithOneLargeWeight() {
long[] values = new long[] {45, 32, 52, 14, 455, 342, 53};
long[] weights = new long[] {143, 23, 1, 52343, 53, 12, 2534};
- // value 14 has its own bin since the weight is large, and the rest of
values fall the other bin
- expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(1, 52343,
14L, 14L, 14D));
+ // value 14 has its own bin since the weight is large, and the rest of the
values fall in the
+ // other bin
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(1, 52343L,
14L, 14L, 14D));
expectedRangeInfoList.add(
- new DynamicRangeUtil.DynamicRangeInfo(6, 2766, 32L, 455L,
163.16666666666666D));
+ new DynamicRangeUtil.DynamicRangeInfo(6, 2766L, 32L, 455L,
163.16666666666666D));
assertDynamicNumericRangeResults(values, weights, 4, 55109,
expectedRangeInfoList);
}
+ public void testComputeDynamicNumericRangesWithLargeTopN() {
+ List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
+ long[] values = new long[] {487, 439, 794, 277};
+ long[] weights = new long[] {59, 508, 736, 560};
+
+ // More requested ranges (TopN) than values should return ranges with
weights larger than the
+ // average weight - excluding the last range
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(1, 560L,
277L, 277L, 277D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(1, 508L,
439L, 439L, 439D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(2, 795L,
487L, 794L, 640.5D));
+ assertDynamicNumericRangeResults(values, weights, 42, 1863L,
expectedRangeInfoList);
+ }
+
+ public void testComputeDynamicNumericRangesWithSingleTopN() {
+ List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
+ long[] values = new long[] {487, 439, 794, 277};
+ long[] weights = new long[] {59, 508, 736, 560};
+
+ // A single requested range (TopN) should return a single range regardless
of the weights
+ // provided
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(4, 1863L,
277L, 794L, 499.25D));
+ assertDynamicNumericRangeResults(values, weights, 1, 1863L,
expectedRangeInfoList);
+ }
+
+ public void testComputeDynamicNumericRangesWithTwoTopN() {
+ List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
+ long[] values = new long[] {487, 439, 794, 277};
+ long[] weights = new long[] {59, 508, 736, 560};
+
+ // Two requested ranges (TopN) should return two ranges where the first
range's weight is equal
+ // or larger than half of the total weight
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(2, 1068L,
277L, 439L, 358.0D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(2, 795L,
487L, 794L, 640.5D));
+ assertDynamicNumericRangeResults(values, weights, 2, 1863L,
expectedRangeInfoList);
+ }
+
+ public void testComputeDynamicNumericRangesWithSameWeightsShuffled() {
+ List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
+ long[] values = new long[100];
+ long[] weights = new long[100];
+ for (int i = 0; i < 100; i++) {
+ values[i] = i;
+ weights[i] = 50;
+ }
+
+ // Shuffling the values and weights should not change the answer between
runs
+ // We expect that returned ranges should come in a strict, deterministic
order
+ // with the same values and weights
+ shuffleValuesWeights(values, weights);
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L,
0L, 24L, 12.0D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L,
25L, 49L, 37.0D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L,
50L, 74L, 62.0D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L,
75L, 99L, 87.0D));
+ assertDynamicNumericRangeResults(values, weights, 4, 5000L,
expectedRangeInfoList);
+ }
+
+ public void testComputeDynamicNumericRangesWithSameValuesShuffled() {
+ List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
+ long totalWeight = 0;
+ long[] values = new long[100];
+ long[] weights = new long[100];
+ for (int i = 0; i < 100; i++) {
+ values[i] = 50;
+ weights[i] = i;
+ totalWeight += i;
+ }
+
+ // Shuffling the values and weights should not change the answer between
runs
+ // We expect that returned ranges should come in a strict, deterministic
order
+ // with the same values and weights
+ shuffleValuesWeights(values, weights);
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(51, 1275L,
50L, 50L, 50D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(21, 1281L,
50L, 50L, 50D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(16, 1272L,
50L, 50L, 50D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(12, 1122L,
50L, 50L, 50D));
+
+ assertDynamicNumericRangeResults(values, weights, 4, totalWeight,
expectedRangeInfoList);
+ }
+
+ public void testComputeDynamicNumericRangesWithMisplacedValue() {
+ List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
+ long[] values =
+ new long[] {
+ 1, 2, 11, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 12, 111,
112, 113, 114, 115
+ };
+ long[] weights =
+ new long[] {
+ 2, 3, 12, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 13, 112,
113, 114, 115, 116
+ };
+
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(8, 444L,
1L, 104L, 54.5D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(4, 430L,
105L, 108L, 106.5D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(4, 446L,
109L, 112L, 110.5D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(3, 345L,
113L, 115L, 114.0D));
+ assertDynamicNumericRangeResults(values, weights, 4, 1665,
expectedRangeInfoList);
+ }
+
+ public void testComputeDynamicNumericRangesWithRandomValues() {
+ int arraySize = random().nextInt(100);
+ long[] values = new long[arraySize];
+ long[] weights = new long[arraySize];
+
+ for (int i = 0; i < arraySize; i++) {
+ values[i] = random().nextLong(1000);
+ weights[i] = random().nextLong(1000);
+ }
+
+ int topN = random().nextInt(100);
+
+ long totalWeight = 0;
+ for (long weight : weights) {
+ totalWeight += weight;
+ }
+
+ assertDynamicNumericRangeValidProperties(values, weights, topN,
totalWeight);
+ }
+
+ /** Implementation of Durstenfeld's algorithm for shuffling values and
weights */
+ private void shuffleValuesWeights(long[] values, long[] weights) {
+ for (int i = values.length - 1; i > 0; i--) {
+ int rdmIdx = random().nextInt(i + 1);
+ long tmpValue = values[i];
+ long tmpWeight = weights[i];
+ values[i] = values[rdmIdx];
+ weights[i] = weights[rdmIdx];
+ values[rdmIdx] = tmpValue;
+ weights[rdmIdx] = tmpWeight;
+ }
+ }
+
+ private static void assertDynamicNumericRangeValidProperties(
+ long[] values, long[] weights, int topN, long totalWeight) {
+ List<List<Long>> sortedPairs = new ArrayList<>();
+ for (int i = 0; i < values.length; i++) {
+ long value = values[i];
+ long weight = weights[i];
+ List<Long> pair = new ArrayList<>();
Review Comment:
Would it make this code (and maybe other places in this testing class)
simpler if you introduced a `record` for keeping track of (value, weight)
tuples?
##########
lucene/facet/src/test/org/apache/lucene/facet/range/TestDynamicRangeUtil.java:
##########
@@ -76,13 +95,243 @@ public void
testComputeDynamicNumericRangesWithOneLargeWeight() {
long[] values = new long[] {45, 32, 52, 14, 455, 342, 53};
long[] weights = new long[] {143, 23, 1, 52343, 53, 12, 2534};
- // value 14 has its own bin since the weight is large, and the rest of
values fall the other bin
- expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(1, 52343,
14L, 14L, 14D));
+ // value 14 has its own bin since the weight is large, and the rest of the
values fall in the
+ // other bin
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(1, 52343L,
14L, 14L, 14D));
expectedRangeInfoList.add(
- new DynamicRangeUtil.DynamicRangeInfo(6, 2766, 32L, 455L,
163.16666666666666D));
+ new DynamicRangeUtil.DynamicRangeInfo(6, 2766L, 32L, 455L,
163.16666666666666D));
assertDynamicNumericRangeResults(values, weights, 4, 55109,
expectedRangeInfoList);
}
+ public void testComputeDynamicNumericRangesWithLargeTopN() {
+ List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
+ long[] values = new long[] {487, 439, 794, 277};
+ long[] weights = new long[] {59, 508, 736, 560};
+
+ // More requested ranges (TopN) than values should return ranges with
weights larger than the
+ // average weight - excluding the last range
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(1, 560L,
277L, 277L, 277D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(1, 508L,
439L, 439L, 439D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(2, 795L,
487L, 794L, 640.5D));
+ assertDynamicNumericRangeResults(values, weights, 42, 1863L,
expectedRangeInfoList);
+ }
+
+ public void testComputeDynamicNumericRangesWithSingleTopN() {
+ List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
+ long[] values = new long[] {487, 439, 794, 277};
+ long[] weights = new long[] {59, 508, 736, 560};
+
+ // A single requested range (TopN) should return a single range regardless
of the weights
+ // provided
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(4, 1863L,
277L, 794L, 499.25D));
+ assertDynamicNumericRangeResults(values, weights, 1, 1863L,
expectedRangeInfoList);
+ }
+
+ public void testComputeDynamicNumericRangesWithTwoTopN() {
+ List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
+ long[] values = new long[] {487, 439, 794, 277};
+ long[] weights = new long[] {59, 508, 736, 560};
+
+ // Two requested ranges (TopN) should return two ranges where the first
range's weight is equal
+ // or larger than half of the total weight
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(2, 1068L,
277L, 439L, 358.0D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(2, 795L,
487L, 794L, 640.5D));
+ assertDynamicNumericRangeResults(values, weights, 2, 1863L,
expectedRangeInfoList);
+ }
+
+ public void testComputeDynamicNumericRangesWithSameWeightsShuffled() {
+ List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
+ long[] values = new long[100];
+ long[] weights = new long[100];
+ for (int i = 0; i < 100; i++) {
+ values[i] = i;
+ weights[i] = 50;
+ }
+
+ // Shuffling the values and weights should not change the answer between
runs
+ // We expect that returned ranges should come in a strict, deterministic
order
+ // with the same values and weights
+ shuffleValuesWeights(values, weights);
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L,
0L, 24L, 12.0D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L,
25L, 49L, 37.0D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L,
50L, 74L, 62.0D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L,
75L, 99L, 87.0D));
+ assertDynamicNumericRangeResults(values, weights, 4, 5000L,
expectedRangeInfoList);
+ }
+
+ public void testComputeDynamicNumericRangesWithSameValuesShuffled() {
+ List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
+ long totalWeight = 0;
+ long[] values = new long[100];
+ long[] weights = new long[100];
+ for (int i = 0; i < 100; i++) {
+ values[i] = 50;
+ weights[i] = i;
+ totalWeight += i;
+ }
+
+ // Shuffling the values and weights should not change the answer between
runs
+ // We expect that returned ranges should come in a strict, deterministic
order
+ // with the same values and weights
+ shuffleValuesWeights(values, weights);
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(51, 1275L,
50L, 50L, 50D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(21, 1281L,
50L, 50L, 50D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(16, 1272L,
50L, 50L, 50D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(12, 1122L,
50L, 50L, 50D));
+
+ assertDynamicNumericRangeResults(values, weights, 4, totalWeight,
expectedRangeInfoList);
+ }
+
+ public void testComputeDynamicNumericRangesWithMisplacedValue() {
+ List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
+ long[] values =
+ new long[] {
+ 1, 2, 11, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 12, 111,
112, 113, 114, 115
+ };
+ long[] weights =
+ new long[] {
+ 2, 3, 12, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 13, 112,
113, 114, 115, 116
+ };
+
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(8, 444L,
1L, 104L, 54.5D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(4, 430L,
105L, 108L, 106.5D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(4, 446L,
109L, 112L, 110.5D));
+ expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(3, 345L,
113L, 115L, 114.0D));
+ assertDynamicNumericRangeResults(values, weights, 4, 1665,
expectedRangeInfoList);
+ }
+
+ public void testComputeDynamicNumericRangesWithRandomValues() {
+ int arraySize = random().nextInt(100);
+ long[] values = new long[arraySize];
+ long[] weights = new long[arraySize];
+
+ for (int i = 0; i < arraySize; i++) {
+ values[i] = random().nextLong(1000);
+ weights[i] = random().nextLong(1000);
+ }
+
+ int topN = random().nextInt(100);
+
+ long totalWeight = 0;
+ for (long weight : weights) {
+ totalWeight += weight;
+ }
+
+ assertDynamicNumericRangeValidProperties(values, weights, topN,
totalWeight);
+ }
+
+ /** Implementation of Durstenfeld's algorithm for shuffling values and
weights */
+ private void shuffleValuesWeights(long[] values, long[] weights) {
Review Comment:
nit: let's make this static?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]