houserjohn commented on PR #13914:
URL: https://github.com/apache/lucene/pull/13914#issuecomment-2667735504
Here are the promised modified randomized unit tests. These should work with
your API change, but you might need to modify them to suit the caveat you
mentioned. Of course, add the correct imports:
```
public void testComputeDynamicNumericRangesWithSameWeightsShuffled() {
List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
long[] values = new long[100];
long[] weights = new long[100];
for (int i = 0; i < 100; i++) {
values[i] = i;
weights[i] = 50;
}
// Shuffling the values and weights should not change the answer between
runs
// We expect that returned ranges should come in a strict, deterministic
order
// with the same values and weights
shuffleValuesWeights(values, weights);
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L,
0L, 24L, 12.0D));
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L,
25L, 49L, 37.0D));
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L,
50L, 74L, 62.0D));
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(25, 1250L,
75L, 99L, 87.0D));
assertDynamicNumericRangeResults(values, weights, 4, 4950L, 5000L,
expectedRangeInfoList);
}
public void testComputeDynamicNumericRangesWithSameValuesShuffled() {
List<DynamicRangeUtil.DynamicRangeInfo> expectedRangeInfoList = new
ArrayList<>();
long totalWeight = 0;
long[] values = new long[100];
long[] weights = new long[100];
for (int i = 0; i < 100; i++) {
values[i] = 50;
weights[i] = i;
totalWeight += i;
}
// Shuffling the values and weights should not change the answer between
runs
// We expect that returned ranges should come in a strict, deterministic
order
// with the same values and weights
shuffleValuesWeights(values, weights);
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(51, 1275L,
50L, 50L, 50D));
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(21, 1281L,
50L, 50L, 50D));
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(16, 1272L,
50L, 50L, 50D));
expectedRangeInfoList.add(new DynamicRangeUtil.DynamicRangeInfo(12, 1122L,
50L, 50L, 50D));
assertDynamicNumericRangeResults(values, weights, 4, 5000L, totalWeight,
expectedRangeInfoList);
}
public void testComputeDynamicNumericRangesWithRandomValues() {
int arraySize = random().nextInt(100);
long[] values = new long[arraySize];
long[] weights = new long[arraySize];
for (int i = 0; i < arraySize; i++) {
values[i] = random().nextLong(1000);
weights[i] = random().nextLong(1000);
}
int topN = random().nextInt(100);
long totalWeight = 0;
long totalValue = 0;
for (int i = 0; i < arraySize; i++) {
totalWeight += weights[i];
totalValue += values[i];
}
assertDynamicNumericRangeValidProperties(values, weights, topN,
totalValue, totalWeight);
}
private static void assertDynamicNumericRangeValidProperties(
long[] values, long[] weights, int topN, long totalValue, long
totalWeight) {
List<WeightedPair> sortedPairs = new ArrayList<>();
for (int i = 0; i < values.length; i++) {
long value = values[i];
long weight = weights[i];
WeightedPair pair = new WeightedPair(value, weight);
sortedPairs.add(pair);
}
sortedPairs.sort(
Comparator.comparingLong(WeightedPair::value).thenComparingLong(WeightedPair::weight));
int len = values.length;
double rangeWeightTarget = (double) totalWeight / Math.min(topN, len);
List<DynamicRangeUtil.DynamicRangeInfo> mockDynamicRangeResult =
DynamicRangeUtil.computeDynamicNumericRanges(
values, weights, values.length, totalValue, totalWeight, topN);
// Zero requested ranges (TopN) should return a empty list of ranges
regardless of inputs
if (topN == 0) {
assertTrue(mockDynamicRangeResult.size() == 0);
return; // Early return; do not check anything else
}
// Adjacent ranges do not overlap - only adjacent max-min can overlap
for (int i = 0; i < mockDynamicRangeResult.size() - 1; i++) {
DynamicRangeUtil.DynamicRangeInfo rangeInfo =
mockDynamicRangeResult.get(i);
DynamicRangeUtil.DynamicRangeInfo nextRangeInfo =
mockDynamicRangeResult.get(i + 1);
assertTrue(rangeInfo.max() <= nextRangeInfo.min());
}
// The count of every range sums to the number of values
int accuCount = 0;
for (int i = 0; i < mockDynamicRangeResult.size(); i++) {
DynamicRangeUtil.DynamicRangeInfo rangeInfo =
mockDynamicRangeResult.get(i);
int count = rangeInfo.count();
accuCount += count;
}
assertTrue(accuCount == len);
// The sum of every range weight equals the total weight
long accuWeight = 0;
for (int i = 0; i < mockDynamicRangeResult.size(); i++) {
DynamicRangeUtil.DynamicRangeInfo rangeInfo =
mockDynamicRangeResult.get(i);
long weight = rangeInfo.weight();
accuWeight += weight;
}
assertTrue(accuWeight == totalWeight);
// All values appear in atleast one range
for (int pairOffset = 0, rangeIdx = 0; rangeIdx <
mockDynamicRangeResult.size(); rangeIdx++) {
DynamicRangeUtil.DynamicRangeInfo rangeInfo =
mockDynamicRangeResult.get(rangeIdx);
int count = rangeInfo.count();
for (int i = pairOffset; i < pairOffset + count; i++) {
WeightedPair pair = sortedPairs.get(i);
long value = pair.value();
assertTrue(rangeInfo.min() <= value && value <= rangeInfo.max());
}
pairOffset += count;
}
// The minimum/maximum of each range is actually the smallest/largest value
for (int pairOffset = 0, rangeIdx = 0; rangeIdx <
mockDynamicRangeResult.size(); rangeIdx++) {
DynamicRangeUtil.DynamicRangeInfo rangeInfo =
mockDynamicRangeResult.get(rangeIdx);
int count = rangeInfo.count();
WeightedPair minPair = sortedPairs.get(pairOffset);
WeightedPair maxPair = sortedPairs.get(pairOffset + count - 1);
long min = minPair.value();
long max = maxPair.value();
assertTrue(rangeInfo.min() == min);
assertTrue(rangeInfo.max() == max);
pairOffset += count;
}
// Weights of each range is over the rangeWeightTarget - exclude last range
for (int i = 0; i < mockDynamicRangeResult.size() - 1; i++) {
DynamicRangeUtil.DynamicRangeInfo rangeInfo =
mockDynamicRangeResult.get(i);
assertTrue(rangeInfo.weight() >= rangeWeightTarget);
}
// Removing the last weight from a range brings it under the
rangeWeightTarget - exclude last
// range
for (int pairOffset = 0, rangeIdx = 0;
rangeIdx < mockDynamicRangeResult.size() - 1;
rangeIdx++) {
DynamicRangeUtil.DynamicRangeInfo rangeInfo =
mockDynamicRangeResult.get(rangeIdx);
int count = rangeInfo.count();
WeightedPair lastPair = sortedPairs.get(pairOffset + count - 1);
long lastWeight = lastPair.weight();
pairOffset += count;
assertTrue(rangeInfo.weight() - lastWeight < rangeWeightTarget);
}
// Centroids for each range are correct
for (int pairOffset = 0, rangeIdx = 0; rangeIdx <
mockDynamicRangeResult.size(); rangeIdx++) {
DynamicRangeUtil.DynamicRangeInfo rangeInfo =
mockDynamicRangeResult.get(rangeIdx);
int count = rangeInfo.count();
long accuValue = 0;
for (int i = pairOffset; i < pairOffset + count; i++) {
WeightedPair pair = sortedPairs.get(i);
long value = pair.value();
accuValue += value;
}
pairOffset += count;
assertTrue(rangeInfo.centroid() == ((double) accuValue / count));
}
}
/** Implementation of Durstenfeld's algorithm for shuffling values and
weights */
private static void shuffleValuesWeights(long[] values, long[] weights) {
for (int i = values.length - 1; i > 0; i--) {
int rdmIdx = random().nextInt(i + 1);
long tmpValue = values[i];
long tmpWeight = weights[i];
values[i] = values[rdmIdx];
weights[i] = weights[rdmIdx];
values[rdmIdx] = tmpValue;
weights[rdmIdx] = tmpWeight;
}
}
/**
* Holds parameters of a weighted pair.
*
* @param value the value of the pair
* @param weight the weight of the pair
*/
private record WeightedPair(long value, long weight) {}
```
Additionally, here is a command that you can run from the command line to
search for bugs:
First, `./gradlew clean` and `./gradlew build`
```
for i in {1..100}; do; echo $i; ./gradlew check; if [ $? -gt 0 ]; then; cat
$path_to_test_output >> "../errors.txt"; fi; done;
```
`$path_to_test_output` should be a path ending with
`OUTPUT-org.apache.lucene.facet.range.TestDynamicRangeUtil.txt`. This should be
the same location that you go whenever you want to view the output from unit
tests (which appears when a unit test fails after a build fails).
After the command finishes, all of the found bugs should be in
`../error.txt`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]