cecemei commented on code in PR #18508:
URL: https://github.com/apache/druid/pull/18508#discussion_r2334683561
##########
processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java:
##########
@@ -1484,6 +1523,155 @@ public void testProjectionSelectionTwoVirtual()
);
}
+ @Test
+ public void testProjectionFilteredProjectionMatch()
+ {
+ final GroupByQuery query =
+ GroupByQuery.builder()
+ .setDataSource("test")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimFilter(new EqualityFilter("a", ColumnType.STRING,
"a", null))
+ .addDimension("a")
+ .build();
+
+ final boolean isRealtime = projectionsCursorFactory instanceof
IncrementalIndexCursorFactory;
+ final ExpectedProjectionGroupBy queryMetrics = new
ExpectedProjectionGroupBy(isRealtime ? "abfoo" :
"a_hourly_c_sum_filter_a_to_a");
+
+ final CursorBuildSpec buildSpec =
GroupingEngine.makeCursorBuildSpec(query, queryMetrics);
+
+ assertCursorProjection(buildSpec, queryMetrics, isRealtime ? 4 : 2);
+
+ testGroupBy(
+ query,
+ queryMetrics,
+ makeArrayResultSet(
+ new Object[]{"a"}
+ )
+ );
+ }
+
+ @Test
+ public void testProjectionFilteredNoFilteredProjectionMatch()
+ {
+ final GroupByQuery query =
+ GroupByQuery.builder()
+ .setDataSource("test")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimFilter(new EqualityFilter("a", ColumnType.STRING,
"b", null))
+ .addDimension("a")
+ .build();
+
+ final boolean isRealtime = projectionsCursorFactory instanceof
IncrementalIndexCursorFactory;
+ final ExpectedProjectionGroupBy queryMetrics = new
ExpectedProjectionGroupBy(isRealtime ? "abfoo" :
"a_hourly_c_sum_with_count_latest");
+
+ final CursorBuildSpec buildSpec =
GroupingEngine.makeCursorBuildSpec(query, queryMetrics);
+
+ assertCursorProjection(buildSpec, queryMetrics, isRealtime ? 2 : 1);
+
+ testGroupBy(
+ query,
+ queryMetrics,
+ makeArrayResultSet(
+ new Object[]{"b"}
+ )
+ );
+ }
+
+ @Test
+ public void testProjectionFilteredToEmpty()
+ {
+ final GroupByQuery query =
+ GroupByQuery.builder()
+ .setDataSource("test")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimFilter(new EqualityFilter("a", ColumnType.STRING,
"nomatch", null))
+ .setContext(Map.of("useProjection",
"a_hourly_c_sum_filter_a_to_empty"))
+ .addDimension("a")
+ .build();
+
+ final ExpectedProjectionGroupBy queryMetrics = new
ExpectedProjectionGroupBy("a_hourly_c_sum_filter_a_to_empty");
+
+ final CursorBuildSpec buildSpec =
GroupingEngine.makeCursorBuildSpec(query, queryMetrics);
+
+ assertCursorProjection(buildSpec, queryMetrics, 0);
+
+ testGroupBy(
+ query,
+ queryMetrics,
+ makeArrayResultSet()
+ );
+ }
+
+ @Test
+ public void testProjectionFilteredToEmptyTimeseries()
+ {
+ final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder()
+ .dataSource("test")
+
.intervals(ImmutableList.of(Intervals.ETERNITY))
+ .granularity(Granularities.ALL)
+ .filters(new EqualityFilter("a",
ColumnType.STRING, "nomatch", null))
+ .aggregators(new
LongSumAggregatorFactory("c_sum", "c"))
+
.context(ImmutableMap.of(QueryContexts.USE_PROJECTION,
"a_hourly_c_sum_filter_a_to_empty"))
+ .build();
+
+ final ExpectedProjectionTimeseries queryMetrics =
+ new ExpectedProjectionTimeseries("a_hourly_c_sum_filter_a_to_empty");
+
+ final CursorBuildSpec buildSpec =
TimeseriesQueryEngine.makeCursorBuildSpec(query, queryMetrics);
+
+ assertCursorProjection(buildSpec, queryMetrics, 0);
+
+ // realltime results are inconsistent between projection and base table
since projection is totally empty, but base
+ // table is reduced with filter
+ final boolean isRealtime = projectionsCursorFactory instanceof
IncrementalIndexCursorFactory;
+ final List<Object[]> expectedResults = Collections.singletonList(new
Object[]{TIMESTAMP, null});
Review Comment:
nit: could be more declarative with:
`final List<Object[]> expectedResults = isRealtime ? List.of() :
List.of(new Object[]{TIMESTAMP, null});`
##########
processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java:
##########
@@ -1484,6 +1523,155 @@ public void testProjectionSelectionTwoVirtual()
);
}
+ @Test
+ public void testProjectionFilteredProjectionMatch()
+ {
+ final GroupByQuery query =
+ GroupByQuery.builder()
+ .setDataSource("test")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimFilter(new EqualityFilter("a", ColumnType.STRING,
"a", null))
+ .addDimension("a")
+ .build();
+
+ final boolean isRealtime = projectionsCursorFactory instanceof
IncrementalIndexCursorFactory;
+ final ExpectedProjectionGroupBy queryMetrics = new
ExpectedProjectionGroupBy(isRealtime ? "abfoo" :
"a_hourly_c_sum_filter_a_to_a");
Review Comment:
it's not obvious the projection matching returns different results for
realtime/ data segments. digging into it, seems like it's about the sorting of
projections (less # of rows first, then coarser gran first), and first match is
returned. in realtime segments, # of rows is always 0 thus `abfoo` with ALL
gran always returns first, while in data segments,
`a_hourly_c_sum_filter_a_to_a` wins by having less total rows in projection (2
< 6). we should add some comments here in case ppl are wondering.
##########
processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java:
##########
@@ -1484,6 +1523,155 @@ public void testProjectionSelectionTwoVirtual()
);
}
+ @Test
+ public void testProjectionFilteredProjectionMatch()
+ {
+ final GroupByQuery query =
+ GroupByQuery.builder()
+ .setDataSource("test")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimFilter(new EqualityFilter("a", ColumnType.STRING,
"a", null))
+ .addDimension("a")
+ .build();
+
+ final boolean isRealtime = projectionsCursorFactory instanceof
IncrementalIndexCursorFactory;
+ final ExpectedProjectionGroupBy queryMetrics = new
ExpectedProjectionGroupBy(isRealtime ? "abfoo" :
"a_hourly_c_sum_filter_a_to_a");
+
+ final CursorBuildSpec buildSpec =
GroupingEngine.makeCursorBuildSpec(query, queryMetrics);
+
+ assertCursorProjection(buildSpec, queryMetrics, isRealtime ? 4 : 2);
+
+ testGroupBy(
+ query,
+ queryMetrics,
+ makeArrayResultSet(
+ new Object[]{"a"}
+ )
+ );
+ }
+
+ @Test
+ public void testProjectionFilteredNoFilteredProjectionMatch()
+ {
+ final GroupByQuery query =
+ GroupByQuery.builder()
+ .setDataSource("test")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimFilter(new EqualityFilter("a", ColumnType.STRING,
"b", null))
+ .addDimension("a")
+ .build();
+
+ final boolean isRealtime = projectionsCursorFactory instanceof
IncrementalIndexCursorFactory;
+ final ExpectedProjectionGroupBy queryMetrics = new
ExpectedProjectionGroupBy(isRealtime ? "abfoo" :
"a_hourly_c_sum_with_count_latest");
+
+ final CursorBuildSpec buildSpec =
GroupingEngine.makeCursorBuildSpec(query, queryMetrics);
+
+ assertCursorProjection(buildSpec, queryMetrics, isRealtime ? 2 : 1);
+
+ testGroupBy(
+ query,
+ queryMetrics,
+ makeArrayResultSet(
+ new Object[]{"b"}
+ )
+ );
+ }
+
+ @Test
+ public void testProjectionFilteredToEmpty()
+ {
+ final GroupByQuery query =
+ GroupByQuery.builder()
+ .setDataSource("test")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimFilter(new EqualityFilter("a", ColumnType.STRING,
"nomatch", null))
+ .setContext(Map.of("useProjection",
"a_hourly_c_sum_filter_a_to_empty"))
+ .addDimension("a")
+ .build();
+
+ final ExpectedProjectionGroupBy queryMetrics = new
ExpectedProjectionGroupBy("a_hourly_c_sum_filter_a_to_empty");
+
+ final CursorBuildSpec buildSpec =
GroupingEngine.makeCursorBuildSpec(query, queryMetrics);
+
+ assertCursorProjection(buildSpec, queryMetrics, 0);
+
+ testGroupBy(
+ query,
+ queryMetrics,
+ makeArrayResultSet()
+ );
+ }
+
+ @Test
+ public void testProjectionFilteredToEmptyTimeseries()
+ {
+ final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder()
+ .dataSource("test")
+
.intervals(ImmutableList.of(Intervals.ETERNITY))
+ .granularity(Granularities.ALL)
+ .filters(new EqualityFilter("a",
ColumnType.STRING, "nomatch", null))
+ .aggregators(new
LongSumAggregatorFactory("c_sum", "c"))
+
.context(ImmutableMap.of(QueryContexts.USE_PROJECTION,
"a_hourly_c_sum_filter_a_to_empty"))
+ .build();
+
+ final ExpectedProjectionTimeseries queryMetrics =
+ new ExpectedProjectionTimeseries("a_hourly_c_sum_filter_a_to_empty");
+
+ final CursorBuildSpec buildSpec =
TimeseriesQueryEngine.makeCursorBuildSpec(query, queryMetrics);
+
+ assertCursorProjection(buildSpec, queryMetrics, 0);
+
+ // realltime results are inconsistent between projection and base table
since projection is totally empty, but base
+ // table is reduced with filter
+ final boolean isRealtime = projectionsCursorFactory instanceof
IncrementalIndexCursorFactory;
+ final List<Object[]> expectedResults = Collections.singletonList(new
Object[]{TIMESTAMP, null});
+ final List<Object[]> expectedRealtimeResults = List.of();
+
+ final Sequence<Result<TimeseriesResultValue>> resultRows =
timeseriesEngine.process(
+ query,
+ projectionsCursorFactory,
+ projectionsTimeBoundaryInspector,
+ queryMetrics
+ );
+
+ queryMetrics.assertProjection();
+
+ final List<Result<TimeseriesResultValue>> results = resultRows.toList();
+ assertTimeseriesResults(
+ query.getResultRowSignature(RowSignature.Finalization.YES),
+ isRealtime ? expectedRealtimeResults : expectedResults,
+ results
+ );
+
+ Assertions.assertEquals(TIMESTAMP,
projectionsTimeBoundaryInspector.getMinTime());
+ if (isRealtime || segmentSortedByTime) {
+ Assertions.assertEquals(TIMESTAMP.plusHours(1).plusMinutes(1),
projectionsTimeBoundaryInspector.getMaxTime());
+ } else {
+ Assertions.assertEquals(
+ TIMESTAMP.plusHours(1).plusMinutes(1).plusMillis(1),
+ projectionsTimeBoundaryInspector.getMaxTime()
+ );
+ }
+
+ Assume.assumeTrue(segmentSortedByTime);
Review Comment:
is this assumeTrue necessary?
##########
processing/src/test/java/org/apache/druid/segment/CursorFactoryProjectionTest.java:
##########
@@ -1484,6 +1523,155 @@ public void testProjectionSelectionTwoVirtual()
);
}
+ @Test
+ public void testProjectionFilteredProjectionMatch()
+ {
+ final GroupByQuery query =
+ GroupByQuery.builder()
+ .setDataSource("test")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimFilter(new EqualityFilter("a", ColumnType.STRING,
"a", null))
+ .addDimension("a")
+ .build();
+
+ final boolean isRealtime = projectionsCursorFactory instanceof
IncrementalIndexCursorFactory;
+ final ExpectedProjectionGroupBy queryMetrics = new
ExpectedProjectionGroupBy(isRealtime ? "abfoo" :
"a_hourly_c_sum_filter_a_to_a");
+
+ final CursorBuildSpec buildSpec =
GroupingEngine.makeCursorBuildSpec(query, queryMetrics);
+
+ assertCursorProjection(buildSpec, queryMetrics, isRealtime ? 4 : 2);
+
+ testGroupBy(
+ query,
+ queryMetrics,
+ makeArrayResultSet(
+ new Object[]{"a"}
+ )
+ );
+ }
+
+ @Test
+ public void testProjectionFilteredNoFilteredProjectionMatch()
+ {
+ final GroupByQuery query =
+ GroupByQuery.builder()
+ .setDataSource("test")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimFilter(new EqualityFilter("a", ColumnType.STRING,
"b", null))
+ .addDimension("a")
+ .build();
+
+ final boolean isRealtime = projectionsCursorFactory instanceof
IncrementalIndexCursorFactory;
+ final ExpectedProjectionGroupBy queryMetrics = new
ExpectedProjectionGroupBy(isRealtime ? "abfoo" :
"a_hourly_c_sum_with_count_latest");
+
+ final CursorBuildSpec buildSpec =
GroupingEngine.makeCursorBuildSpec(query, queryMetrics);
+
+ assertCursorProjection(buildSpec, queryMetrics, isRealtime ? 2 : 1);
+
+ testGroupBy(
+ query,
+ queryMetrics,
+ makeArrayResultSet(
+ new Object[]{"b"}
+ )
+ );
+ }
+
+ @Test
+ public void testProjectionFilteredToEmpty()
+ {
+ final GroupByQuery query =
+ GroupByQuery.builder()
+ .setDataSource("test")
+ .setGranularity(Granularities.ALL)
+ .setInterval(Intervals.ETERNITY)
+ .setDimFilter(new EqualityFilter("a", ColumnType.STRING,
"nomatch", null))
+ .setContext(Map.of("useProjection",
"a_hourly_c_sum_filter_a_to_empty"))
+ .addDimension("a")
+ .build();
+
+ final ExpectedProjectionGroupBy queryMetrics = new
ExpectedProjectionGroupBy("a_hourly_c_sum_filter_a_to_empty");
+
+ final CursorBuildSpec buildSpec =
GroupingEngine.makeCursorBuildSpec(query, queryMetrics);
+
+ assertCursorProjection(buildSpec, queryMetrics, 0);
+
+ testGroupBy(
+ query,
+ queryMetrics,
+ makeArrayResultSet()
+ );
+ }
+
+ @Test
+ public void testProjectionFilteredToEmptyTimeseries()
+ {
+ final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder()
+ .dataSource("test")
+
.intervals(ImmutableList.of(Intervals.ETERNITY))
+ .granularity(Granularities.ALL)
+ .filters(new EqualityFilter("a",
ColumnType.STRING, "nomatch", null))
+ .aggregators(new
LongSumAggregatorFactory("c_sum", "c"))
+
.context(ImmutableMap.of(QueryContexts.USE_PROJECTION,
"a_hourly_c_sum_filter_a_to_empty"))
+ .build();
+
+ final ExpectedProjectionTimeseries queryMetrics =
+ new ExpectedProjectionTimeseries("a_hourly_c_sum_filter_a_to_empty");
+
+ final CursorBuildSpec buildSpec =
TimeseriesQueryEngine.makeCursorBuildSpec(query, queryMetrics);
+
+ assertCursorProjection(buildSpec, queryMetrics, 0);
+
+ // realltime results are inconsistent between projection and base table
since projection is totally empty, but base
+ // table is reduced with filter
+ final boolean isRealtime = projectionsCursorFactory instanceof
IncrementalIndexCursorFactory;
+ final List<Object[]> expectedResults = Collections.singletonList(new
Object[]{TIMESTAMP, null});
+ final List<Object[]> expectedRealtimeResults = List.of();
+
+ final Sequence<Result<TimeseriesResultValue>> resultRows =
timeseriesEngine.process(
+ query,
+ projectionsCursorFactory,
+ projectionsTimeBoundaryInspector,
+ queryMetrics
+ );
+
+ queryMetrics.assertProjection();
+
+ final List<Result<TimeseriesResultValue>> results = resultRows.toList();
+ assertTimeseriesResults(
+ query.getResultRowSignature(RowSignature.Finalization.YES),
+ isRealtime ? expectedRealtimeResults : expectedResults,
+ results
+ );
+
+ Assertions.assertEquals(TIMESTAMP,
projectionsTimeBoundaryInspector.getMinTime());
+ if (isRealtime || segmentSortedByTime) {
+ Assertions.assertEquals(TIMESTAMP.plusHours(1).plusMinutes(1),
projectionsTimeBoundaryInspector.getMaxTime());
+ } else {
+ Assertions.assertEquals(
Review Comment:
maybe add a comment here explaining for data segments that's not sorted by
time, the max time is inexact
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]