This is an automated email from the ASF dual-hosted git repository.
kfaraz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new cac8b9da8fa Restrict segment metadata kill query till maxInterval from
last kill task time (#17770)
cac8b9da8fa is described below
commit cac8b9da8fa6f4fe061b5c171fe8ade3317d50e1
Author: Chetan Patidar <[email protected]>
AuthorDate: Tue Mar 4 13:42:58 2025 +0530
Restrict segment metadata kill query till maxInterval from last kill task
time (#17770)
Changes
---------
- Use `maxIntervalToKill` to determine search interval for killing unused
segments.
- If no segment has been killed for the datasource yet, use durationToRetain
---
.../coordinator/duty/KillUnusedSegments.java | 18 ++++++--
.../coordinator/duty/KillUnusedSegmentsTest.java | 50 ++++++++++++++++++++++
2 files changed, 65 insertions(+), 3 deletions(-)
diff --git
a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java
b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java
index 4538d39de95..cd1c1746912 100644
---
a/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java
+++
b/server/src/main/java/org/apache/druid/server/coordinator/duty/KillUnusedSegments.java
@@ -273,9 +273,21 @@ public class KillUnusedSegments implements CoordinatorDuty
)
{
final DateTime minStartTime =
datasourceToLastKillIntervalEnd.get(dataSource);
- final DateTime maxEndTime = ignoreDurationToRetain
- ? DateTimes.COMPARE_DATE_AS_STRING_MAX
- : DateTimes.nowUtc().minus(durationToRetain);
+
+ // Once the first segment from a datasource is killed, we have a valid
minStartTime.
+ // Restricting the upper bound to scan segments metadata while running the
kill task results in a efficient SQL query.
+ final DateTime maxEndTime;
+ if (ignoreDurationToRetain) {
+ maxEndTime = DateTimes.COMPARE_DATE_AS_STRING_MAX;
+ } else if (minStartTime == null) {
+ maxEndTime = DateTimes.nowUtc().minus(durationToRetain);
+ } else {
+ // If we have already killed a segment, limit the kill interval based on
the minStartTime
+ maxEndTime = DateTimes.min(
+ DateTimes.nowUtc().minus(durationToRetain),
+ minStartTime.plus(maxIntervalToKill)
+ );
+ }
final List<Interval> unusedSegmentIntervals = limitToPeriod(
segmentsMetadataManager.getUnusedSegmentIntervals(
diff --git
a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java
b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java
index 59df67e4b49..272ed1887ce 100644
---
a/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java
+++
b/server/src/test/java/org/apache/druid/server/coordinator/duty/KillUnusedSegmentsTest.java
@@ -78,6 +78,7 @@ public class KillUnusedSegmentsTest
private static final DateTime NOW = DateTimes.nowUtc();
private static final Interval YEAR_OLD = new Interval(Period.days(1),
NOW.minusDays(365));
private static final Interval MONTH_OLD = new Interval(Period.days(1),
NOW.minusDays(30));
+ private static final Interval FIFTEEN_DAY_OLD = new Interval(Period.days(1),
NOW.minusDays(15));
private static final Interval DAY_OLD = new Interval(Period.days(1),
NOW.minusDays(1));
private static final Interval HOUR_OLD = new Interval(Period.days(1),
NOW.minusHours(1));
private static final Interval NEXT_DAY = new Interval(Period.days(1),
NOW.plusDays(1));
@@ -604,6 +605,55 @@ public class KillUnusedSegmentsTest
validateLastKillStateAndReset(DS1, YEAR_OLD);
}
+ @Test
+ public void testMaxIntervalToKillOverridesDurationToRetain()
+ {
+ configBuilder.withDurationToRetain(Period.hours(6).toStandardDuration())
+ .withMaxIntervalToKill(Period.days(20));
+
+ initDuty();
+
+ createAndAddUnusedSegment(DS1, MONTH_OLD, VERSION, NOW.minusDays(29));
+ CoordinatorRunStats newDatasourceStats = runDutyAndGetStats();
+
+ // For a new datasource, the duration to retain is used to determine kill
interval
+ Assert.assertEquals(1,
newDatasourceStats.get(Stats.Kill.ELIGIBLE_UNUSED_SEGMENTS, DS1_STAT_KEY));
+ validateLastKillStateAndReset(DS1, MONTH_OLD);
+
+ // For a datasource where kill has already happened, maxIntervalToKill is
used
+ // if it leads to a smaller kill interval than durationToRetain
+ createAndAddUnusedSegment(DS1, FIFTEEN_DAY_OLD, VERSION,
NOW.minusDays(14));
+ createAndAddUnusedSegment(DS1, DAY_OLD, VERSION, NOW.minusHours(2));
+ CoordinatorRunStats oldDatasourceStats = runDutyAndGetStats();
+
+ Assert.assertEquals(2,
oldDatasourceStats.get(Stats.Kill.ELIGIBLE_UNUSED_SEGMENTS, DS1_STAT_KEY));
+ validateLastKillStateAndReset(DS1, FIFTEEN_DAY_OLD);
+ }
+
+ @Test
+ public void testDurationToRetainOverridesMaxIntervalToKill()
+ {
+ configBuilder.withDurationToRetain(Period.days(20).toStandardDuration())
+ .withMaxIntervalToKill(Period.days(350));
+
+ initDuty();
+
+ createAndAddUnusedSegment(DS1, YEAR_OLD, VERSION, NOW.minusDays(29));
+ CoordinatorRunStats newDatasourceStats = runDutyAndGetStats();
+
+ Assert.assertEquals(1,
newDatasourceStats.get(Stats.Kill.ELIGIBLE_UNUSED_SEGMENTS, DS1_STAT_KEY));
+ validateLastKillStateAndReset(DS1, YEAR_OLD);
+
+ // For a datasource where (now - durationToRetain) < (lastKillTime(year
old segment) + maxInterval)
+ // Fifteen day old segment will be rejected
+ createAndAddUnusedSegment(DS1, MONTH_OLD, VERSION, NOW.minusDays(29));
+ createAndAddUnusedSegment(DS1, FIFTEEN_DAY_OLD, VERSION,
NOW.minusDays(14));
+ CoordinatorRunStats oldDatasourceStats = runDutyAndGetStats();
+
+ Assert.assertEquals(2,
oldDatasourceStats.get(Stats.Kill.ELIGIBLE_UNUSED_SEGMENTS, DS1_STAT_KEY));
+ validateLastKillStateAndReset(DS1, MONTH_OLD);
+ }
+
@Test
public void testHigherMaxIntervalToKill()
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]