This is an automated email from the ASF dual-hosted git repository.
gongchao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hertzbeat.git
The following commit(s) were added to refs/heads/master by this push:
new 32d784e57 fix: use UTF-8 to solve Chineses bug (#3792)
32d784e57 is described below
commit 32d784e57e9ef47c401381fa7f25ec03564f028d
Author: zhou yong kang <[email protected]>
AuthorDate: Sun Oct 5 09:17:33 2025 +0800
fix: use UTF-8 to solve Chineses bug (#3792)
Co-authored-by: Tomsun28 <[email protected]>
---
.../collect/prometheus/parser/OnlineParser.java | 68 +++++++++++++++++++++-
.../prometheus/parser/OnlineParserSingleTest.java | 21 +++++++
2 files changed, 87 insertions(+), 2 deletions(-)
diff --git
a/hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParser.java
b/hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParser.java
index a68043816..bed4eb299 100644
---
a/hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParser.java
+++
b/hertzbeat-collector/hertzbeat-collector-basic/src/main/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParser.java
@@ -352,14 +352,49 @@ public class OnlineParser {
}
}
}
- } else {
+ } else if (i <= 127) {
stringBuilder.append((char) i);
+ } else {
+ handleUtf8Character(i, inputStream, stringBuilder);
}
i = inputStream.read();
}
return new CharChecker(i);
}
+ private static void handleUtf8Character(int firstByte, InputStream
inputStream, StringBuilder stringBuilder) throws IOException {
+ List<Integer> bytes = new ArrayList<>();
+ bytes.add(firstByte);
+
+ int additionalBytes = getUtf8AdditionalByteCount(firstByte);
+
+ for (int j = 0; j < additionalBytes; j++) {
+ int nextByte = inputStream.read();
+ if (nextByte == -1) break;
+ bytes.add(nextByte);
+ }
+
+ byte[] byteArray = new byte[bytes.size()];
+ for (int j = 0; j < bytes.size(); j++) {
+ byteArray[j] = (byte) bytes.get(j).intValue();
+ }
+
+ try {
+ String utf8Chars = new String(byteArray, StandardCharsets.UTF_8);
+ stringBuilder.append(utf8Chars);
+ } catch (Exception e) {
+ stringBuilder.append((char) firstByte);
+ }
+ }
+
+ private static int getUtf8AdditionalByteCount(int firstByte) {
+ if ((firstByte & 0x80) == 0) return 0; // 0xxxxxxx - ASCII (shouldn't
reach here)
+ if ((firstByte & 0xE0) == 0xC0) return 1; // 110xxxxx - 2 bytes total,
1 additional
+ if ((firstByte & 0xF0) == 0xE0) return 2; // 1110xxxx - 3 bytes total,
2 additional
+ if ((firstByte & 0xF8) == 0xF0) return 3; // 11110xxx - 4 bytes total,
3 additional
+ return 0;
+ }
+
private static CharChecker skipSpaces(InputStream inputStream) throws
IOException, FormatException {
int i = getChar(inputStream);
while (i == ' ') {
@@ -416,9 +451,10 @@ public class OnlineParser {
skipSpaces(inputStream).maybeQuotationMark().noElse();
parseLabelValue(inputStream,
stringBuilder).maybeQuotationMark().noElse();
String labelValue = stringBuilder.toString();
- if (!labelValue.equals(new
String(labelValue.getBytes(StandardCharsets.UTF_8)))) {
+ if (!isValidLabelValue(labelValue)) {
throw new FormatException();
}
+
label.setValue(labelValue);
stringBuilder.delete(0, stringBuilder.length());
labelList.add(label);
@@ -491,4 +527,32 @@ public class OnlineParser {
metricFamily.getMetricList().add(metric);
return new CharChecker(i);
}
+
+ private static boolean isValidLabelValue(String labelValue) {
+ if (labelValue == null) {
+ return false;
+ }
+
+ //Check if all characters are ASCII (0-127)
+ boolean isAscii = true;
+ for (int i = 0; i < labelValue.length(); i++) {
+ char c = labelValue.charAt(i);
+ if (c > 127) {
+ isAscii = false;
+ break;
+ }
+ }
+
+ if (isAscii) {
+ return true;
+ }
+
+ try {
+ byte[] bytes = labelValue.getBytes(StandardCharsets.UTF_8);
+ String reconstructed = new String(bytes, StandardCharsets.UTF_8);
+ return labelValue.equals(reconstructed);
+ } catch (Exception e) {
+ return false;
+ }
+ }
}
diff --git
a/hertzbeat-collector/hertzbeat-collector-basic/src/test/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParserSingleTest.java
b/hertzbeat-collector/hertzbeat-collector-basic/src/test/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParserSingleTest.java
index 131df4eaa..3cc57bacc 100644
---
a/hertzbeat-collector/hertzbeat-collector-basic/src/test/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParserSingleTest.java
+++
b/hertzbeat-collector/hertzbeat-collector-basic/src/test/java/org/apache/hertzbeat/collector/collect/prometheus/parser/OnlineParserSingleTest.java
@@ -444,6 +444,27 @@ public class OnlineParserSingleTest {
assertEquals("NT AUTHORITY\nLocalService",
metricFamily.getMetricList().get(0).getLabels().get(3).getValue());
}
+ @Test
+ void testParseMetricsWithChineseLabels() throws Exception {
+ String str =
"ST22{Dump_Name=\"Dump总数\",HostName=\"SAP_DEV\",instance_hostname=\"sapdev\"}
2\n";
+ InputStream inputStream = new
ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8));
+ Map<String, MetricFamily> metricFamilyMap = parseMetrics(inputStream,
"ST22");
+
+ assertNotNull(metricFamilyMap);
+ MetricFamily metricFamily = metricFamilyMap.get("ST22");
+ assertNotNull(metricFamily);
+ assertEquals("ST22", metricFamily.getName());
+ assertEquals(1, metricFamily.getMetricList().size());
+ assertEquals(2.0, metricFamily.getMetricList().get(0).getValue());
+
+ // Verify Chinese label value is correctly parsed
+ MetricFamily.Label dumpNameLabel =
metricFamily.getMetricList().get(0).getLabels().stream()
+ .filter(label -> "Dump_Name".equals(label.getName()))
+ .findFirst().orElse(null);
+ assertNotNull(dumpNameLabel);
+ assertEquals("Dump总数", dumpNameLabel.getValue());
+ }
+
private Map<String, MetricFamily> parseMetrics(InputStream inputStream,
String metric) throws IOException {
return OnlineParser.parseMetrics(inputStream, metric);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]