This is an automated email from the ASF dual-hosted git repository.
karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 2df0f61290e Speed up ClusterByStatisticsCollector tests. (#18927)
2df0f61290e is described below
commit 2df0f61290e8aaf8a63d32e65787a4f50bbdeffd
Author: Gian Merlino <[email protected]>
AuthorDate: Mon Jan 19 20:42:41 2026 -0800
Speed up ClusterByStatisticsCollector tests. (#18927)
In QuantilesSketchKeyCollectorTest, DistinctKeyCollectorTest, and
ClusterByStatisticsCollectorImplTest, lower the number of rows per test
to 100k, 100k, and 150k respectively. Also lower MAX_BYTES and MAX_BUCKETS
in ClusterByStatisticsCollectorImplTest. This provides a similar level
of coverage while running significantly faster: a few seconds per test
rather than nearly a minute.
---
.../ClusterByStatisticsCollectorImplTest.java | 32 +++++++++++-----------
.../msq/statistics/DistinctKeyCollectorTest.java | 2 +-
.../QuantilesSketchKeyCollectorTest.java | 4 +--
3 files changed, 19 insertions(+), 19 deletions(-)
diff --git
a/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/ClusterByStatisticsCollectorImplTest.java
b/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/ClusterByStatisticsCollectorImplTest.java
index 13fda326267..4c65ca366c5 100644
---
a/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/ClusterByStatisticsCollectorImplTest.java
+++
b/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/ClusterByStatisticsCollectorImplTest.java
@@ -70,7 +70,7 @@ import java.util.stream.LongStream;
public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlingTest
{
- private static final double PARTITION_SIZE_LEEWAY = 0.3;
+ private static final double PARTITION_SIZE_LEEWAY = 0.35;
private static final RowSignature SIGNATURE = RowSignature.builder()
.add("x",
ColumnType.LONG)
@@ -96,14 +96,14 @@ public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlin
1
);
- // These numbers are roughly 10x lower than authentic production numbers.
(See StageDefinition.)
- private static final int MAX_BYTES = 1_000_000;
- private static final int MAX_BUCKETS = 1000;
+ // These numbers are roughly 50x lower than authentic production numbers.
(See StageDefinition.)
+ private static final int MAX_BYTES = 150_000;
+ private static final int MAX_BUCKETS = 200;
@Test
public void test_clusterByX_unique()
{
- final long numRows = 1_000_000;
+ final long numRows = 150_000;
final boolean aggregate = false;
final ClusterBy clusterBy = CLUSTER_BY_X;
final Iterable<RowKey> keys = () ->
@@ -122,7 +122,7 @@ public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlin
Assert.assertEquals(StringUtils.format("%s: tracked bucket count",
testName), 1, trackedBuckets(collector));
Assert.assertEquals(StringUtils.format("%s: tracked row count",
testName), numRows, trackedRows(collector));
- for (int targetPartitionWeight : new int[]{51111, 65432, (int)
numRows + 10}) {
+ for (int targetPartitionWeight : new int[]{5111, 6543, (int) numRows
+ 10}) {
verifyPartitionsWithTargetWeight(
StringUtils.format("%s:
generatePartitionsWithTargetWeight(%d)", testName, targetPartitionWeight),
collector,
@@ -150,7 +150,7 @@ public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlin
@Test
public void test_clusterByX_everyKeyAppearsTwice()
{
- final long numRows = 1_000_000;
+ final long numRows = 150_000;
final boolean aggregate = false;
final ClusterBy clusterBy = CLUSTER_BY_X;
final List<RowKey> keys = new ArrayList<>();
@@ -171,7 +171,7 @@ public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlin
Assert.assertEquals(StringUtils.format("%s: tracked bucket count",
testName), 1, trackedBuckets(collector));
Assert.assertEquals(StringUtils.format("%s: tracked row count",
testName), numRows, trackedRows(collector));
- for (int targetPartitionWeight : new int[]{51111, 65432, (int)
numRows + 10}) {
+ for (int targetPartitionWeight : new int[]{5111, 6543, (int) numRows
+ 10}) {
verifyPartitionsWithTargetWeight(
StringUtils.format("%s:
generatePartitionsWithTargetWeight(%d)", testName, targetPartitionWeight),
collector,
@@ -199,7 +199,7 @@ public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlin
@Test
public void test_clusterByX_everyKeyAppearsTwice_withAggregation()
{
- final long numRows = 1_000_000;
+ final long numRows = 150_000;
final boolean aggregate = true;
final ClusterBy clusterBy = CLUSTER_BY_X;
final List<RowKey> keys = new ArrayList<>();
@@ -229,7 +229,7 @@ public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlin
expectedNumRows * .05 // Acceptable estimation error
);
- for (int targetPartitionWeight : new int[]{51111, 65432, (int)
numRows + 10}) {
+ for (int targetPartitionWeight : new int[]{5111, 6543, (int) numRows
+ 10}) {
verifyPartitionsWithTargetWeight(
StringUtils.format("%s:
generatePartitionsWithTargetWeight(%d)", testName, targetPartitionWeight),
collector,
@@ -259,7 +259,7 @@ public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlin
{
final int numBuckets = 3;
final boolean aggregate = false;
- final long numRows = 1_000_000;
+ final long numRows = 150_000;
final ClusterBy clusterBy = CLUSTER_BY_XY_BUCKET_BY_X;
final List<RowKey> keys = new ArrayList<>((int) numRows);
@@ -281,7 +281,7 @@ public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlin
Assert.assertEquals(StringUtils.format("%s: bucket count",
testName), numBuckets, trackedBuckets(collector));
Assert.assertEquals(StringUtils.format("%s: row count", testName),
numRows, trackedRows(collector));
- for (int targetPartitionWeight : new int[]{17001, 23007}) {
+ for (int targetPartitionWeight : new int[]{5111, 6543}) {
verifyPartitionsWithTargetWeight(
StringUtils.format("%s:
generatePartitionsWithTargetWeight(%d)", testName, targetPartitionWeight),
collector,
@@ -323,7 +323,7 @@ public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlin
{
final int numBuckets = MAX_BUCKETS;
final boolean aggregate = false;
- final long numRows = 1_000_000;
+ final long numRows = 150_000;
final ClusterBy clusterBy = CLUSTER_BY_XY_BUCKET_BY_X;
final List<RowKey> keys = new ArrayList<>((int) numRows);
@@ -344,7 +344,7 @@ public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlin
(testName, collector) -> {
Assert.assertEquals(StringUtils.format("%s: bucket count",
testName), numBuckets, trackedBuckets(collector));
- for (int targetPartitionWeight : new int[]{17001, 23007}) {
+ for (int targetPartitionWeight : new int[]{1701, 2301}) {
verifyPartitionsWithTargetWeight(
StringUtils.format("%s:
generatePartitionsWithTargetWeight(%d)", testName, targetPartitionWeight),
collector,
@@ -394,7 +394,7 @@ public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlin
{
final int numBuckets = MAX_BUCKETS;
final boolean aggregate = true;
- final long numRows = 1_000_000;
+ final long numRows = 150_000;
final ClusterBy clusterBy = CLUSTER_BY_XY_BUCKET_BY_X;
final List<RowKey> keys = new ArrayList<>((int) numRows);
@@ -418,7 +418,7 @@ public class ClusterByStatisticsCollectorImplTest extends
InitializedNullHandlin
// trackedRows will equal numBuckets, because the collectors have
been downsampled so much
Assert.assertEquals(StringUtils.format("%s: row count", testName),
numBuckets, trackedRows(collector));
- for (int targetPartitionWeight : new int[]{17001, 23007}) {
+ for (int targetPartitionWeight : new int[]{1701, 2301}) {
verifyPartitionsWithTargetWeight(
StringUtils.format("%s:
generatePartitionsWithTargetWeight(%d)", testName, targetPartitionWeight),
collector,
diff --git
a/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/DistinctKeyCollectorTest.java
b/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/DistinctKeyCollectorTest.java
index 943b3108c8f..765aead2694 100644
---
a/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/DistinctKeyCollectorTest.java
+++
b/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/DistinctKeyCollectorTest.java
@@ -46,7 +46,7 @@ public class DistinctKeyCollectorTest
private final ClusterBy clusterBy = new ClusterBy(ImmutableList.of(new
KeyColumn("x", KeyOrder.ASCENDING)), 0);
private final RowSignature signature = RowSignature.builder().add("x",
ColumnType.LONG).build();
private final Comparator<RowKey> comparator =
clusterBy.keyComparator(signature);
- private final int numKeys = 500_000;
+ private final int numKeys = 100_000;
@Test
public void test_empty()
diff --git
a/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorTest.java
b/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorTest.java
index c4c32becdb2..c53563aa68a 100644
---
a/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorTest.java
+++
b/multi-stage-query/src/test/java/org/apache/druid/msq/statistics/QuantilesSketchKeyCollectorTest.java
@@ -45,7 +45,7 @@ public class QuantilesSketchKeyCollectorTest
private final ClusterBy clusterBy = new ClusterBy(ImmutableList.of(new
KeyColumn("x", KeyOrder.ASCENDING)), 0);
private final RowSignature signature = RowSignature.builder().add("x",
ColumnType.LONG).build();
private final Comparator<RowKey> comparator =
clusterBy.keyComparator(signature);
- private final int numKeys = 500_000;
+ private final int numKeys = 100_000;
@Test
public void test_empty()
@@ -120,7 +120,7 @@ public class QuantilesSketchKeyCollectorTest
}
Assert.assertEquals(testName, 2, collector.getSketch().getK());
- Assert.assertEquals(testName, 14, collector.estimatedRetainedKeys());
+ Assert.assertEquals(testName, 12, collector.estimatedRetainedKeys());
// Don't use verifyCollector, since this collector is downsampled so
aggressively that it can't possibly
// hope to pass those tests. Grade on a curve.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]