This is an automated email from the ASF dual-hosted git repository.

gongchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hertzbeat.git


The following commit(s) were added to refs/heads/master by this push:
     new 32d784e57 fix: use UTF-8 to solve Chineses bug (#3792)
32d784e57 is described below

commit 32d784e57e9ef47c401381fa7f25ec03564f028d
Author: zhou yong kang <[email protected]>
AuthorDate: Sun Oct 5 09:17:33 2025 +0800

    fix: use UTF-8 to solve Chineses bug (#3792)
    
    Co-authored-by: Tomsun28 <[email protected]>
---
 .../collect/prometheus/parser/OnlineParser.java    | 68 +++++++++++++++++++++-
 .../prometheus/parser/OnlineParserSingleTest.java  | 21 +++++++
 2 files changed, 87 insertions(+), 2 deletions(-)

diff --git 
a/hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParser.java
 
b/hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParser.java
index a68043816..bed4eb299 100644
--- 
a/hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParser.java
+++ 
b/hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParser.java
@@ -352,14 +352,49 @@ public class OnlineParser {
                         }
                     }
                 }
-            } else {
+            } else if (i <= 127) {
                 stringBuilder.append((char) i);
+            } else {
+                handleUtf8Character(i, inputStream, stringBuilder);
             }
             i = inputStream.read();
         }
         return new CharChecker(i);
     }
 
+    private static void handleUtf8Character(int firstByte, InputStream 
inputStream, StringBuilder stringBuilder) throws IOException {
+        List<Integer> bytes = new ArrayList<>();
+        bytes.add(firstByte);
+
+        int additionalBytes = getUtf8AdditionalByteCount(firstByte);
+
+        for (int j = 0; j < additionalBytes; j++) {
+            int nextByte = inputStream.read();
+            if (nextByte == -1) break;
+            bytes.add(nextByte);
+        }
+
+        byte[] byteArray = new byte[bytes.size()];
+        for (int j = 0; j < bytes.size(); j++) {
+            byteArray[j] = (byte) bytes.get(j).intValue();
+        }
+
+        try {
+            String utf8Chars = new String(byteArray, StandardCharsets.UTF_8);
+            stringBuilder.append(utf8Chars);
+        } catch (Exception e) {
+            stringBuilder.append((char) firstByte);
+        }
+    }
+
+    private static int getUtf8AdditionalByteCount(int firstByte) {
+        if ((firstByte & 0x80) == 0) return 0; // 0xxxxxxx - ASCII (shouldn't 
reach here)
+        if ((firstByte & 0xE0) == 0xC0) return 1; // 110xxxxx - 2 bytes total, 
1 additional
+        if ((firstByte & 0xF0) == 0xE0) return 2; // 1110xxxx - 3 bytes total, 
2 additional
+        if ((firstByte & 0xF8) == 0xF0) return 3; // 11110xxx - 4 bytes total, 
3 additional
+        return 0;
+    }
+
     private static CharChecker skipSpaces(InputStream inputStream) throws 
IOException, FormatException {
         int i = getChar(inputStream);
         while (i == ' ') {
@@ -416,9 +451,10 @@ public class OnlineParser {
         skipSpaces(inputStream).maybeQuotationMark().noElse();
         parseLabelValue(inputStream, 
stringBuilder).maybeQuotationMark().noElse();
         String labelValue = stringBuilder.toString();
-        if (!labelValue.equals(new 
String(labelValue.getBytes(StandardCharsets.UTF_8)))) {
+        if (!isValidLabelValue(labelValue)) {
             throw new FormatException();
         }
+
         label.setValue(labelValue);
         stringBuilder.delete(0, stringBuilder.length());
         labelList.add(label);
@@ -491,4 +527,32 @@ public class OnlineParser {
         metricFamily.getMetricList().add(metric);
         return new CharChecker(i);
     }
+
+    private static boolean isValidLabelValue(String labelValue) {
+        if (labelValue == null) {
+            return false;
+        }
+
+        //Check if all characters are ASCII (0-127)
+        boolean isAscii = true;
+        for (int i = 0; i < labelValue.length(); i++) {
+            char c = labelValue.charAt(i);
+            if (c > 127) {
+                isAscii = false;
+                break;
+            }
+        }
+
+        if (isAscii) {
+            return true;
+        }
+
+        try {
+            byte[] bytes = labelValue.getBytes(StandardCharsets.UTF_8);
+            String reconstructed = new String(bytes, StandardCharsets.UTF_8);
+            return labelValue.equals(reconstructed);
+        } catch (Exception e) {
+            return false;
+        }
+    }
 }
diff --git 
a/hertzbeat-collector/hertzbeat-collector-basic/src/test/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParserSingleTest.java
 
b/hertzbeat-collector/hertzbeat-collector-basic/src/test/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParserSingleTest.java
index 131df4eaa..3cc57bacc 100644
--- 
a/hertzbeat-collector/hertzbeat-collector-basic/src/test/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParserSingleTest.java
+++ 
b/hertzbeat-collector/hertzbeat-collector-basic/src/test/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParserSingleTest.java
@@ -444,6 +444,27 @@ public class OnlineParserSingleTest {
         assertEquals("NT AUTHORITY\nLocalService", 
metricFamily.getMetricList().get(0).getLabels().get(3).getValue());
     }
 
+    @Test
+    void testParseMetricsWithChineseLabels() throws Exception {
+        String str = 
"ST22{Dump_Name=\"Dump总数\",HostName=\"SAP_DEV\",instance_hostname=\"sapdev\"} 
2\n";
+        InputStream inputStream = new 
ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8));
+        Map<String, MetricFamily> metricFamilyMap = parseMetrics(inputStream, 
"ST22");
+
+        assertNotNull(metricFamilyMap);
+        MetricFamily metricFamily = metricFamilyMap.get("ST22");
+        assertNotNull(metricFamily);
+        assertEquals("ST22", metricFamily.getName());
+        assertEquals(1, metricFamily.getMetricList().size());
+        assertEquals(2.0, metricFamily.getMetricList().get(0).getValue());
+
+        // Verify Chinese label value is correctly parsed
+        MetricFamily.Label dumpNameLabel = 
metricFamily.getMetricList().get(0).getLabels().stream()
+            .filter(label -> "Dump_Name".equals(label.getName()))
+            .findFirst().orElse(null);
+        assertNotNull(dumpNameLabel);
+        assertEquals("Dump总数", dumpNameLabel.getValue());
+    }
+
     private Map<String, MetricFamily> parseMetrics(InputStream inputStream, 
String metric) throws IOException {
         return OnlineParser.parseMetrics(inputStream, metric);
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to