This is an automated email from the ASF dual-hosted git repository. wusheng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/skywalking.git
The following commit(s) were added to refs/heads/master by this push: new e448febb4f Add a release mechanism for alarm windows when it is expired in case of OOM. (#11336) e448febb4f is described below commit e448febb4f669f823164be6fe54ccebe9f866680 Author: dylanforest <dylanwu...@foxmail.com> AuthorDate: Sat Sep 16 07:13:40 2023 +0800 Add a release mechanism for alarm windows when it is expired in case of OOM. (#11336) --- docs/en/changes/changes.md | 1 + .../server/core/alarm/provider/RunningRule.java | 18 ++ .../core/alarm/provider/RunningRuleTest.java | 197 ++++++++++++--------- 3 files changed, 136 insertions(+), 80 deletions(-) diff --git a/docs/en/changes/changes.md b/docs/en/changes/changes.md index 813e28b213..ff96e24e17 100644 --- a/docs/en/changes/changes.md +++ b/docs/en/changes/changes.md @@ -10,6 +10,7 @@ * Support Kafka Monitoring. * [Breaking Change] Elasticsearch storage merge all management data indices into one index `management`, including `ui_template,ui_menu,continuous_profiling_policy`. +* Add a release mechanism for alarm windows when it is expired in case of OOM. #### UI diff --git a/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRule.java b/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRule.java index 62cedae9ff..961473bb0d 100644 --- a/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRule.java +++ b/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRule.java @@ -198,8 +198,14 @@ public class RunningRule { */ public List<AlarmMessage> check() { List<AlarmMessage> alarmMessageList = new ArrayList<>(30); + List<AlarmEntity> expiredEntityList = new ArrayList<>(); windows.forEach((alarmEntity, window) -> { + if (window.isExpired()) { + expiredEntityList.add(alarmEntity); + return; + } + Optional<AlarmMessage> alarmMessageOptional = window.checkAlarm(); if (alarmMessageOptional.isPresent()) { AlarmMessage alarmMessage = alarmMessageOptional.get(); @@ -218,6 +224,7 @@ public class RunningRule { } }); + expiredEntityList.forEach(windows::remove); return alarmMessageList; } @@ -383,6 +390,17 @@ public class RunningRule { return isMatch == 1; } + public boolean isExpired() { + if (this.values != null) { + for (Map<String, Metrics> value : this.values) { + if (value != null) { + return false; + } + } + } + return true; + } + private void init() { values = new LinkedList<>(); for (int i = 0; i < period; i++) { diff --git a/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRuleTest.java b/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRuleTest.java index dc50eefc52..60d2932fba 100644 --- a/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRuleTest.java +++ b/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/RunningRuleTest.java @@ -26,6 +26,7 @@ import org.apache.skywalking.oap.server.core.Const; import org.apache.skywalking.oap.server.core.alarm.AlarmCallback; import org.apache.skywalking.oap.server.core.alarm.AlarmMessage; import org.apache.skywalking.oap.server.core.alarm.MetaInAlarm; +import org.apache.skywalking.oap.server.core.analysis.TimeBucket; import org.apache.skywalking.oap.server.core.analysis.metrics.DataTable; import org.apache.skywalking.oap.server.core.analysis.metrics.IntValueHolder; import org.apache.skywalking.oap.server.core.analysis.metrics.LabeledValueHolder; @@ -38,9 +39,8 @@ import org.apache.skywalking.oap.server.core.source.DefaultScopeDefine; import org.apache.skywalking.oap.server.core.storage.StorageID; import org.apache.skywalking.oap.server.core.storage.annotation.Column; import org.apache.skywalking.oap.server.core.storage.annotation.ValueColumnMetadata; +import org.joda.time.DateTime; import org.joda.time.LocalDateTime; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -58,8 +58,6 @@ import java.util.Objects; * So in this test, we need to simulate a lot of scenario to see the reactions. */ public class RunningRuleTest { - private static DateTimeFormatter TIME_BUCKET_FORMATTER = DateTimeFormat.forPattern("yyyyMMddHHmm"); - @BeforeEach public void setup() { ValueColumnMetadata.INSTANCE.putIfAbsent( @@ -81,8 +79,11 @@ public class RunningRuleTest { put("key", "value"); }}); RunningRule runningRule = new RunningRule(alarmRule); - LocalDateTime startTime = TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301434"); - long timeInPeriod1 = 201808301434L; + + DateTime startTime = DateTime.now(); + long timeInPeriod1 = TimeBucket.getMinuteTimeBucket(startTime.getMillis()); + DateTime targetTime = new DateTime(TimeBucket.getTimestamp(timeInPeriod1)); + runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod1, 70)); Map<AlarmEntity, RunningRule.Window> windows = Whitebox.getInternalState(runningRule, "windows"); @@ -92,7 +93,7 @@ public class RunningRuleTest { int period = Whitebox.getInternalState(window, "period"); LinkedList<Metrics> metricsBuffer = Whitebox.getInternalState(window, "values"); - Assertions.assertTrue(startTime.equals(endTime)); + Assertions.assertTrue(targetTime.equals(endTime.toDateTime())); Assertions.assertEquals(15, period); Assertions.assertEquals(15, metricsBuffer.size()); } @@ -109,26 +110,53 @@ public class RunningRuleTest { put("key", "value"); }}); RunningRule runningRule = new RunningRule(alarmRule); - LocalDateTime startTime = TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301440"); - long timeInPeriod1 = 201808301434L; - long timeInPeriod2 = 201808301436L; - long timeInPeriod3 = 201808301438L; + DateTime startTime = DateTime.now(); + long timeInPeriod1 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(6).getMillis()); + long timeInPeriod2 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(4).getMillis()); + long timeInPeriod3 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(2).getMillis()); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod1, 70)); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod2, 71)); - // check at 201808301440 + // check at startTime - 4 List<AlarmMessage> alarmMessages = runningRule.check(); Assertions.assertEquals(0, alarmMessages.size()); + // check at startTime - 2 runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod3, 74)); - - // check at 201808301440 alarmMessages = runningRule.check(); Assertions.assertEquals(1, alarmMessages.size()); } + @Test + public void testAlarmMetricsOutOfDate() throws IllegalExpressionException { + AlarmRule alarmRule = new AlarmRule(); + alarmRule.setAlarmRuleName("endpoint_percent_rule"); + alarmRule.setExpression("sum(endpoint_percent < 75) >= 3"); + alarmRule.getIncludeMetrics().add("endpoint_percent"); + alarmRule.setPeriod(15); + alarmRule.setMessage("Successful rate of endpoint {name} is lower than 75%"); + alarmRule.setTags(new HashMap<String, String>() {{ + put("key", "value"); + }}); + RunningRule runningRule = new RunningRule(alarmRule); + + DateTime startTime = DateTime.now(); + long timeInPeriod1 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(153).getMillis()); + long timeInPeriod2 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(152).getMillis()); + long timeInPeriod3 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(151).getMillis()); + + runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod1, 70)); + runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod2, 71)); + runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod3, 74)); + + // check at startTime + runningRule.moveTo(startTime.toLocalDateTime()); + List<AlarmMessage> alarmMessages = runningRule.check(); + Assertions.assertEquals(0, alarmMessages.size()); + } + @Test public void testMultipleValuesAlarm() throws IllegalExpressionException { AlarmRule alarmRule = new AlarmRule(); @@ -141,26 +169,24 @@ public class RunningRuleTest { put("key", "value"); }}); RunningRule runningRule = new RunningRule(alarmRule); - LocalDateTime startTime = TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301440"); - long timeInPeriod1 = 201808301434L; - long timeInPeriod2 = 201808301436L; - long timeInPeriod3 = 201808301438L; + DateTime startTime = DateTime.now(); + long timeInPeriod1 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(6).getMillis()); + long timeInPeriod2 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(4).getMillis()); + long timeInPeriod3 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(2).getMillis()); runningRule.in(getMetaInAlarm(123, "endpoint_multiple_values"), getMultipleValueMetrics(timeInPeriod1, 70, 60, 40, 40, 40)); runningRule.in(getMetaInAlarm(123, "endpoint_multiple_values"), getMultipleValueMetrics(timeInPeriod2, 60, 60, 40, 40, 40)); - // check at 201808301440 + // check at startTime - 4 List<AlarmMessage> alarmMessages = runningRule.check(); Assertions.assertEquals(0, alarmMessages.size()); - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301441")); + // check at now + runningRule.moveTo(startTime.toLocalDateTime()); runningRule.in(getMetaInAlarm(123, "endpoint_multiple_values"), getMultipleValueMetrics(timeInPeriod3, 74, 60, 40, 40, 40)); - - // check at 201808301440 alarmMessages = runningRule.check(); Assertions.assertEquals(1, alarmMessages.size()); - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301441")); } @Test @@ -199,11 +225,10 @@ public class RunningRuleTest { put("key", "value"); }}); RunningRule runningRule = new RunningRule(alarmRule); - LocalDateTime startTime = TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301440"); - - long timeInPeriod1 = 201808301434L; - long timeInPeriod2 = 201808301436L; - long timeInPeriod3 = 201808301438L; + DateTime startTime = DateTime.now(); + long timeInPeriod1 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(6).getMillis()); + long timeInPeriod2 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(4).getMillis()); + long timeInPeriod3 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(2).getMillis()); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod1, 70)); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod2, 71)); @@ -232,7 +257,6 @@ public class RunningRuleTest { put("key", "value"); }}); RunningRule runningRule = new RunningRule(alarmRule); - LocalDateTime startTime = TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301441"); final boolean[] isAlarm = {false}; AlarmCallback assertCallback = new AlarmCallback() { @@ -244,24 +268,28 @@ public class RunningRuleTest { LinkedList<AlarmCallback> callbackList = new LinkedList<>(); callbackList.add(assertCallback); - long timeInPeriod1 = 201808301434L; - long timeInPeriod2 = 201808301436L; - long timeInPeriod3 = 201808301438L; - long timeInPeriod4 = 201808301432L; - long timeInPeriod5 = 201808301440L; + DateTime startTime = DateTime.now(); + long timeInPeriod1 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(7).getMillis()); + long timeInPeriod2 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(5).getMillis()); + long timeInPeriod3 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(3).getMillis()); + long timeInPeriod4 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(9).getMillis()); + long timeInPeriod5 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(1).getMillis()); + runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod1, 70)); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod2, 71)); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod3, 74)); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod4, 90)); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod5, 95)); - // check at 201808301440 + // check at startTime - 1 Assertions.assertEquals(0, runningRule.check().size()); - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301442")); - // check at 201808301441 + + // check at startTime + runningRule.moveTo(startTime.toLocalDateTime()); Assertions.assertEquals(0, runningRule.check().size()); - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301443")); - // check at 201808301442 + + // check at startTime + 1 + runningRule.moveTo(startTime.plusMinutes(1).toLocalDateTime()); Assertions.assertEquals(0, runningRule.check().size()); } @@ -278,23 +306,24 @@ public class RunningRuleTest { }}); RunningRule runningRule = new RunningRule(alarmRule); - long timeInPeriod1 = 201808301434L; - long timeInPeriod2 = 201808301436L; - long timeInPeriod3 = 201808301438L; + DateTime startTime = DateTime.now(); + long timeInPeriod1 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(6).getMillis()); + long timeInPeriod2 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(4).getMillis()); + long timeInPeriod3 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(2).getMillis()); + runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod1, 70)); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod2, 71)); - // check at 201808301440 + // check at startTime - 4 Assertions.assertEquals(0, runningRule.check().size()); //check matches, no alarm - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301441")); + // check at startTime + runningRule.moveTo(startTime.toLocalDateTime()); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod3, 74)); - - // check at 201808301440 Assertions.assertEquals(1, runningRule.check().size()); //alarm - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301441")); - // check at 201808301442 + // check at starTime + 1 + runningRule.moveTo(startTime.plusMinutes(1).toLocalDateTime()); Assertions.assertEquals(0, runningRule.check().size()); //silence, no alarm Assertions.assertEquals(0, runningRule.check().size()); //silence, no alarm Assertions.assertNotEquals(0, runningRule.check().size()); //alarm @@ -317,21 +346,24 @@ public class RunningRuleTest { }}); RunningRule runningRule = new RunningRule(alarmRule); - long timeInPeriod1 = 201808301434L; - long timeInPeriod2 = 201808301436L; - long timeInPeriod3 = 201808301438L; + DateTime startTime = DateTime.now(); + long timeInPeriod1 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(6).getMillis()); + long timeInPeriod2 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(4).getMillis()); + long timeInPeriod3 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(2).getMillis()); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod1, 70)); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod2, 71)); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod3, 74)); - // check at 201808301440 + // check at startTime - 2 Assertions.assertEquals(0, runningRule.check().size()); - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301441")); - // check at 201808301441 + + // check at startTime + runningRule.moveTo(startTime.toLocalDateTime()); Assertions.assertEquals(0, runningRule.check().size()); - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301442")); - // check at 201808301442 + + // check at startTime + 1 + runningRule.moveTo(startTime.plusMinutes(1).toLocalDateTime()); Assertions.assertEquals(0, runningRule.check().size()); } @@ -350,21 +382,24 @@ public class RunningRuleTest { }}); RunningRule runningRule = new RunningRule(alarmRule); - long timeInPeriod1 = 201808301434L; - long timeInPeriod2 = 201808301436L; - long timeInPeriod3 = 201808301439L; + DateTime startTime = DateTime.now(); + long timeInPeriod1 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(6).getMillis()); + long timeInPeriod2 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(4).getMillis()); + long timeInPeriod3 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(1).getMillis()); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod1, 70)); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod2, 70)); runningRule.in(getMetaInAlarm(223), getMetrics(timeInPeriod3, 74)); - // check at 201808301440 + // check at startTime - 1 Assertions.assertEquals(1, runningRule.check().size()); - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301441")); - // check at 201808301441 + + // check at startTime + runningRule.moveTo(startTime.toLocalDateTime()); Assertions.assertEquals(1, runningRule.check().size()); - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301446")); - // check at 201808301442 + + // check at startTime + 6 + runningRule.moveTo(startTime.plusMinutes(6).toLocalDateTime()); Assertions.assertEquals(0, runningRule.check().size()); } @@ -383,21 +418,24 @@ public class RunningRuleTest { }}); RunningRule runningRule = new RunningRule(alarmRule); - long timeInPeriod1 = 201808301434L; - long timeInPeriod2 = 201808301436L; - long timeInPeriod3 = 201808301439L; + DateTime startTime = DateTime.now(); + long timeInPeriod1 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(6).getMillis()); + long timeInPeriod2 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(4).getMillis()); + long timeInPeriod3 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(1).getMillis()); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod1, 70)); runningRule.in(getMetaInAlarm(123), getMetrics(timeInPeriod2, 70)); runningRule.in(getMetaInAlarm(223), getMetrics(timeInPeriod3, 74)); - // check at 201808301440 + // check at startTime - 1 Assertions.assertEquals(1, runningRule.check().size()); - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301441")); - // check at 201808301441 + + // check at startTime + runningRule.moveTo(startTime.toLocalDateTime()); Assertions.assertEquals(1, runningRule.check().size()); - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301446")); - // check at 201808301442 + + // check at startTime + 6 + runningRule.moveTo(startTime.plusMinutes(6).toLocalDateTime()); Assertions.assertEquals(0, runningRule.check().size()); } @@ -640,24 +678,23 @@ public class RunningRuleTest { put("key", "value"); }}); RunningRule runningRule = new RunningRule(alarmRule); - LocalDateTime startTime = TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301440"); - long timeInPeriod1 = 201808301434L; - long timeInPeriod2 = 201808301436L; - long timeInPeriod3 = 201808301438L; + DateTime startTime = DateTime.now(); + long timeInPeriod1 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(6).getMillis()); + long timeInPeriod2 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(4).getMillis()); + long timeInPeriod3 = TimeBucket.getMinuteTimeBucket(startTime.minusMinutes(2).getMillis()); runningRule.in(getMetaInAlarm(123, "endpoint_labeled"), getLabeledValueMetrics(timeInPeriod1, value1)); runningRule.in(getMetaInAlarm(123, "endpoint_labeled"), getLabeledValueMetrics(timeInPeriod2, value2)); + // check at startTime - 4 List<AlarmMessage> alarmMessages = runningRule.check(); Assertions.assertEquals(0, alarmMessages.size()); - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301441")); + // check at startTime + runningRule.moveTo(startTime.toLocalDateTime()); runningRule.in(getMetaInAlarm(123, "endpoint_labeled"), getLabeledValueMetrics(timeInPeriod3, value3)); - - // check at 201808301440 alarmMessages = runningRule.check(); Assertions.assertEquals(alarmMsgSize, alarmMessages.size()); - runningRule.moveTo(TIME_BUCKET_FORMATTER.parseLocalDateTime("201808301441")); } }