This is an automated email from the ASF dual-hosted git repository.

wusheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/skywalking.git


The following commit(s) were added to refs/heads/master by this push:
     new 062b31f4ba Fix AlarmRule expression validation: add labeled metrics 
mock data for check. (#11437)
062b31f4ba is described below

commit 062b31f4ba5586c33f301dd4052f602894b784c4
Author: Wan Kai <wankai...@foxmail.com>
AuthorDate: Fri Oct 20 10:45:15 2023 +0800

    Fix AlarmRule expression validation: add labeled metrics mock data for 
check. (#11437)
---
 docs/en/changes/changes.md                         |  1 +
 .../oap/server/core/alarm/provider/AlarmRule.java  | 18 +-----
 .../provider/expr/rt/AlarmMQEVerifyVisitor.java    | 66 ++++++++++++++++++++--
 .../server/core/alarm/provider/AlarmRuleTest.java  | 18 ++++--
 test/e2e-v2/cases/alarm/alarm-settings.yml         | 11 ++--
 .../alarm/expected/silence-after-graphql-warn.yml  | 48 ++++++++++++++++
 .../alarm/expected/silence-before-graphql-warn.yml | 24 ++++++++
 7 files changed, 156 insertions(+), 30 deletions(-)

diff --git a/docs/en/changes/changes.md b/docs/en/changes/changes.md
index a027a04a80..cebc12bf8c 100644
--- a/docs/en/changes/changes.md
+++ b/docs/en/changes/changes.md
@@ -25,6 +25,7 @@
 * Fix `AlarmCore` doAlarm: catch exception for each callback to avoid 
interruption.
 * Optimize queryBasicTraces in TraceQueryEsDAO.
 * Fix `WebhookCallback` send incorrect messages, add catch exception for each 
callback HTTP Post.
+* Fix AlarmRule expression validation: add labeled metrics mock data for check.
 
 #### UI
 
diff --git 
a/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRule.java
 
b/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRule.java
index 5094957265..76c6e6cc1a 100644
--- 
a/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRule.java
+++ 
b/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRule.java
@@ -21,7 +21,6 @@ package org.apache.skywalking.oap.server.core.alarm.provider;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Map;
-import java.util.Optional;
 import java.util.Set;
 import lombok.AccessLevel;
 import lombok.Data;
@@ -39,7 +38,6 @@ import org.apache.skywalking.mqe.rt.grammar.MQEParser;
 import org.apache.skywalking.mqe.rt.type.ExpressionResult;
 import org.apache.skywalking.mqe.rt.type.ExpressionResultType;
 import 
org.apache.skywalking.oap.server.core.alarm.provider.expr.rt.AlarmMQEVerifyVisitor;
-import org.apache.skywalking.oap.server.core.storage.annotation.Column;
 import 
org.apache.skywalking.oap.server.core.storage.annotation.ValueColumnMetadata;
 import org.apache.skywalking.oap.server.library.util.StringUtil;
 
@@ -95,25 +93,11 @@ public class AlarmRule {
     private void verifyIncludeMetrics(Set<String> includeMetrics, String 
expression) throws IllegalExpressionException {
         Set<String> scopeSet = new HashSet<>();
         for (String metricName : includeMetrics) {
-            Optional<ValueColumnMetadata.ValueColumn> valueColumn = 
ValueColumnMetadata.INSTANCE.readValueColumnDefinition(
-                metricName);
-            if (valueColumn.isEmpty()) {
-                throw new IllegalExpressionException("Metric: [" + metricName 
+ "] dose not exist.");
-            }
             
scopeSet.add(ValueColumnMetadata.INSTANCE.getScope(metricName).name());
-            Column.ValueDataType dataType = valueColumn.get().getDataType();
-            switch (dataType) {
-                case COMMON_VALUE:
-                case LABELED_VALUE:
-                    break;
-                default:
-                    throw new IllegalExpressionException(
-                        "Metric dose not supported in alarm, metric: [" + 
metricName + "] is not a common or labeled metric.");
-            }
         }
         if (scopeSet.size() != 1) {
             throw new IllegalExpressionException(
-                "The metrics in expression: " + expression + " must have the 
same scope level, but got: " + scopeSet);
+                "The metrics in expression: " + expression + " must have the 
same scope level, but got: " + scopeSet + ".");
         }
     }
 }
diff --git 
a/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/expr/rt/AlarmMQEVerifyVisitor.java
 
b/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/expr/rt/AlarmMQEVerifyVisitor.java
index 78c93afdd7..c6c556f9c8 100644
--- 
a/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/expr/rt/AlarmMQEVerifyVisitor.java
+++ 
b/oap-server/server-alarm-plugin/src/main/java/org/apache/skywalking/oap/server/core/alarm/provider/expr/rt/AlarmMQEVerifyVisitor.java
@@ -18,7 +18,12 @@
 
 package org.apache.skywalking.oap.server.core.alarm.provider.expr.rt;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;
+import java.util.Optional;
 import java.util.Set;
 import lombok.Getter;
 import lombok.extern.slf4j.Slf4j;
@@ -28,6 +33,13 @@ import org.apache.skywalking.mqe.rt.type.ExpressionResult;
 import org.apache.skywalking.mqe.rt.type.ExpressionResultType;
 import org.apache.skywalking.mqe.rt.type.MQEValue;
 import org.apache.skywalking.mqe.rt.type.MQEValues;
+import org.apache.skywalking.mqe.rt.type.Metadata;
+import org.apache.skywalking.oap.server.core.Const;
+import org.apache.skywalking.oap.server.core.query.type.KeyValue;
+import org.apache.skywalking.oap.server.core.storage.annotation.Column;
+import 
org.apache.skywalking.oap.server.core.storage.annotation.ValueColumnMetadata;
+import org.apache.skywalking.oap.server.library.util.CollectionUtils;
+import org.apache.skywalking.oap.server.library.util.StringUtil;
 
 /**
  * Used for verify the alarm expression and get the metrics name when read the 
alarm rules.
@@ -41,6 +53,14 @@ public class AlarmMQEVerifyVisitor extends MQEVisitorBase {
     public ExpressionResult visitMetric(MQEParser.MetricContext ctx) {
         ExpressionResult result = new ExpressionResult();
         String metricName = ctx.metricName().getText();
+        Optional<ValueColumnMetadata.ValueColumn> valueColumn = 
ValueColumnMetadata.INSTANCE.readValueColumnDefinition(
+            metricName);
+        if (valueColumn.isEmpty()) {
+            result.setType(ExpressionResultType.UNKNOWN);
+            result.setError("Metric: [" + metricName + "] dose not exist.");
+            return result;
+        }
+
         this.includeMetrics.add(metricName);
 
         if (ctx.parent instanceof MQEParser.TopNOPContext) {
@@ -48,13 +68,49 @@ public class AlarmMQEVerifyVisitor extends MQEVisitorBase {
             result.setError("Unsupported operation: [top_n] in alarm 
expression.");
             return result;
         }
+        Column.ValueDataType dataType = valueColumn.get().getDataType();
 
-        MQEValues mqeValues = new MQEValues();
+        MQEValues mockMqeValues = new MQEValues();
         MQEValue mqeValue = new MQEValue();
         mqeValue.setEmptyValue(true);
-        mqeValues.getValues().add(mqeValue);
-        result.getResults().add(mqeValues);
-        result.setType(ExpressionResultType.TIME_SERIES_VALUES);
-        return result;
+        mockMqeValues.getValues().add(mqeValue);
+        if (dataType == Column.ValueDataType.COMMON_VALUE) {
+            result.getResults().add(mockMqeValues);
+            result.setType(ExpressionResultType.TIME_SERIES_VALUES);
+            return result;
+        } else if (dataType == Column.ValueDataType.LABELED_VALUE) {
+            List<String> labelValues = Collections.emptyList();
+            if (ctx.label() != null) {
+                String labelValue = ctx.label().labelValue().getText();
+                String labelValueTrim = labelValue.substring(1, 
labelValue.length() - 1);
+                if (StringUtil.isNotBlank(labelValueTrim)) {
+                    labelValues = 
Arrays.asList(labelValueTrim.split(Const.COMMA));
+                }
+            }
+            ArrayList<MQEValues> mqeValuesList = new ArrayList<>();
+            if (CollectionUtils.isEmpty(labelValues)) {
+                KeyValue label = new KeyValue(GENERAL_LABEL_NAME, 
GENERAL_LABEL_NAME);
+                Metadata metadata = new Metadata();
+                metadata.getLabels().add(label);
+                mockMqeValues.setMetric(metadata);
+                mqeValuesList.add(mockMqeValues);
+            } else {
+                for (String value : labelValues) {
+                    Metadata metadata = new Metadata();
+                    KeyValue label = new KeyValue(GENERAL_LABEL_NAME, value);
+                    metadata.getLabels().add(label);
+                    mockMqeValues.setMetric(metadata);
+                    mqeValuesList.add(mockMqeValues);
+                }
+            }
+            result.setType(ExpressionResultType.TIME_SERIES_VALUES);
+            result.setResults(mqeValuesList);
+            result.setLabeledResult(true);
+            return result;
+        } else {
+            result.setType(ExpressionResultType.UNKNOWN);
+            result.setError("Metric dose not supported in alarm, metric: [" + 
metricName + "] is not a common or labeled metric.");
+            return result;
+        }
     }
 }
diff --git 
a/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRuleTest.java
 
b/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRuleTest.java
index 098d1d3592..64e353587a 100644
--- 
a/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRuleTest.java
+++ 
b/oap-server/server-alarm-plugin/src/test/java/org/apache/skywalking/oap/server/core/alarm/provider/AlarmRuleTest.java
@@ -41,6 +41,10 @@ public class AlarmRuleTest {
             "endpoint_percent", "testColumn", 
Column.ValueDataType.COMMON_VALUE, Function.Avg, 0,
             Scope.Endpoint.getScopeId()
         );
+        ValueColumnMetadata.INSTANCE.putIfAbsent(
+            "meter_status_code", "testColumn", 
Column.ValueDataType.LABELED_VALUE, Function.Avg, 0,
+            Scope.Service.getScopeId()
+        );
         ValueColumnMetadata.INSTANCE.putIfAbsent(
             "record", "testColumn", Column.ValueDataType.SAMPLED_RECORD, 
Function.Avg, 0,
             Scope.Endpoint.getScopeId()
@@ -58,9 +62,15 @@ public class AlarmRuleTest {
     @Test
     public void testExpressionVerify() throws IllegalExpressionException {
         AlarmRule rule = new AlarmRule();
-        //normal
+        //normal common metric
         rule.setExpression("sum(service_percent < 85) >= 3");
-
+        //normal labeled metric
+        //4xx + 5xx > 10
+        
rule.setExpression("sum(aggregate_labels(meter_status_code{_='4xx,5xx'},sum) > 
10) > 3");
+        rule.setExpression("sum(aggregate_labels(meter_status_code,sum) > 10) 
> 3");
+        //4xx or 5xx > 10
+        rule.setExpression("sum(meter_status_code{_='4xx,5xx'} > 10) >= 3");
+        rule.setExpression("sum(meter_status_code > 10) >= 3");
         //illegal expression
         Assertions.assertThrows(IllegalExpressionException.class, () -> {
             rule.setExpression("what? sum(service_percent < 85) >= 3");
@@ -68,7 +78,7 @@ public class AlarmRuleTest {
 
         //not exist metric
         Assertions.assertEquals(
-            "Metric: [service_percent111] dose not exist.",
+            "Expression: sum(service_percent111 < 85) >= 3 error: Metric: 
[service_percent111] dose not exist.",
             Assertions.assertThrows(IllegalExpressionException.class, () -> {
                 rule.setExpression("sum(service_percent111 < 85) >= 3");
             }).getMessage()
@@ -92,7 +102,7 @@ public class AlarmRuleTest {
 
         //not a common or labeled metric
         Assertions.assertEquals(
-            "Metric dose not supported in alarm, metric: [record] is not a 
common or labeled metric.",
+            "Expression: sum(record < 85) > 1 error: Metric dose not supported 
in alarm, metric: [record] is not a common or labeled metric.",
             Assertions.assertThrows(IllegalExpressionException.class, () -> {
                 rule.setExpression("sum(record < 85) > 1");
             }).getMessage()
diff --git a/test/e2e-v2/cases/alarm/alarm-settings.yml 
b/test/e2e-v2/cases/alarm/alarm-settings.yml
index 4046d44740..40df6f0a75 100755
--- a/test/e2e-v2/cases/alarm/alarm-settings.yml
+++ b/test/e2e-v2/cases/alarm/alarm-settings.yml
@@ -25,12 +25,15 @@ rules:
       receivers: lisi
     hooks:
       - webhook.custom
-  # service sla > 1%
-  service_sla_rule:
-    expression: sum(service_sla > 100) >= 1
+  # service_percentile > 10ms
+  service_percentile_rule:
+    expression: sum(service_percentile{_='0,1,2,3,4'} > 10) >= 3
     period: 10
     silence-period: 1
-    message: Successful rate of service {name} is more than 1% in 1 minutes of 
last 10 minutes
+    message: Percentile response time of service {name} alarm in 3 minutes of 
last 10 minutes, due to more than one condition of p50 > 10, p75 > 10, p90 > 
10, p95 > 10, p99 > 10.
+    tags:
+      level: WARNING
+      receivers: lisi
     hooks:
       - webhook.none
   comp_rule:
diff --git a/test/e2e-v2/cases/alarm/expected/silence-after-graphql-warn.yml 
b/test/e2e-v2/cases/alarm/expected/silence-after-graphql-warn.yml
index 6fd21b956a..641150f9da 100644
--- a/test/e2e-v2/cases/alarm/expected/silence-after-graphql-warn.yml
+++ b/test/e2e-v2/cases/alarm/expected/silence-after-graphql-warn.yml
@@ -15,6 +15,30 @@
 
 msgs:
   {{- contains .msgs }}
+  - starttime: {{ gt .starttime 0 }}
+    scope: Service
+    id: ZTJlLXNlcnZpY2UtcHJvdmlkZXI=.1
+    message: Percentile response time of service e2e-service-provider alarm in 
3 minutes of last 10 minutes, due to more than one condition of p50 > 10, p75 > 
10, p90 > 10, p95 > 10, p99 > 10.
+    tags:
+      - key: level
+        value: WARNING
+      - key: receivers
+        value: lisi
+    events:
+      {{- contains .events }}
+      - uuid: {{ notEmpty .uuid }}
+        source:
+          service: e2e-service-provider
+          serviceinstance: ""
+          endpoint: ""
+        name: Alarm
+        type: ""
+        message: {{ notEmpty .message }}
+        parameters: []
+        starttime: {{ gt .starttime 0 }}
+        endtime: {{ gt .endtime 0 }}
+        layer: GENERAL
+      {{- end }}
   - starttime: {{ gt .starttime 0 }}
     scope: Service
     id: ZTJlLXNlcnZpY2UtcHJvdmlkZXI=.1
@@ -39,6 +63,30 @@ msgs:
         endtime: {{ gt .endtime 0 }}
         layer: GENERAL
       {{- end }}
+  - starttime: {{ gt .starttime 0 }}
+    scope: Service
+    id: ZTJlLXNlcnZpY2UtcHJvdmlkZXI=.1
+    message: Percentile response time of service e2e-service-provider alarm in 
3 minutes of last 10 minutes, due to more than one condition of p50 > 10, p75 > 
10, p90 > 10, p95 > 10, p99 > 10.
+    tags:
+      - key: level
+        value: WARNING
+      - key: receivers
+        value: lisi
+    events:
+      {{- contains .events }}
+      - uuid: {{ notEmpty .uuid }}
+        source:
+          service: e2e-service-provider
+          serviceinstance: ""
+          endpoint: ""
+        name: Alarm
+        type: ""
+        message: {{ notEmpty .message }}
+        parameters: []
+        starttime: {{ gt .starttime 0 }}
+        endtime: {{ gt .endtime 0 }}
+        layer: GENERAL
+      {{- end }}
   - starttime: {{ gt .starttime 0 }}
     scope: Service
     id: ZTJlLXNlcnZpY2UtcHJvdmlkZXI=.1
diff --git a/test/e2e-v2/cases/alarm/expected/silence-before-graphql-warn.yml 
b/test/e2e-v2/cases/alarm/expected/silence-before-graphql-warn.yml
index 4a6e55462e..e8d5c1ca88 100644
--- a/test/e2e-v2/cases/alarm/expected/silence-before-graphql-warn.yml
+++ b/test/e2e-v2/cases/alarm/expected/silence-before-graphql-warn.yml
@@ -15,6 +15,30 @@
 
 msgs:
   {{- contains .msgs }}
+  - starttime: {{ gt .starttime 0 }}
+    scope: Service
+    id: ZTJlLXNlcnZpY2UtcHJvdmlkZXI=.1
+    message: Percentile response time of service e2e-service-provider alarm in 
3 minutes of last 10 minutes, due to more than one condition of p50 > 10, p75 > 
10, p90 > 10, p95 > 10, p99 > 10.
+    tags:
+      - key: level
+        value: WARNING
+      - key: receivers
+        value: lisi
+    events:
+      {{- contains .events }}
+      - uuid: {{ notEmpty .uuid }}
+        source:
+          service: e2e-service-provider
+          serviceinstance: ""
+          endpoint: ""
+        name: Alarm
+        type: ""
+        message: {{ notEmpty .message }}
+        parameters: []
+        starttime: {{ gt .starttime 0 }}
+        endtime: {{ gt .endtime 0 }}
+        layer: GENERAL
+      {{- end }}
   - starttime: {{ gt .starttime 0 }}
     scope: Service
     id: ZTJlLXNlcnZpY2UtcHJvdmlkZXI=.1

Reply via email to