This is an automated email from the ASF dual-hosted git repository.
dockerzhang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push:
new 899aeabf5b [INLONG-11369][Sort] Fix the KV split error when there is a
escape char without before & and = in text (#11370)
899aeabf5b is described below
commit 899aeabf5b04648372ce1f14d22ce8b6c5845455
Author: Mingyu Bao <[email protected]>
AuthorDate: Mon Oct 21 14:11:35 2024 +0800
[INLONG-11369][Sort] Fix the KV split error when there is a escape char
without before & and = in text (#11370)
---
.../inlong/sort/formats/util/StringUtils.java | 90 ++++++++++++----------
.../sort/formats/common/StringUtilsTest.java | 21 +++--
.../apache/inlong/sort/formats/kv/KvUtilsTest.java | 2 +-
3 files changed, 66 insertions(+), 47 deletions(-)
diff --git
a/inlong-sort/sort-formats/format-common/src/main/java/org/apache/inlong/sort/formats/util/StringUtils.java
b/inlong-sort/sort-formats/format-common/src/main/java/org/apache/inlong/sort/formats/util/StringUtils.java
index 3ea6678ca1..000d7a7175 100644
---
a/inlong-sort/sort-formats/format-common/src/main/java/org/apache/inlong/sort/formats/util/StringUtils.java
+++
b/inlong-sort/sort-formats/format-common/src/main/java/org/apache/inlong/sort/formats/util/StringUtils.java
@@ -100,11 +100,17 @@ public class StringUtils {
*/
int kvState = STATE_KEY;
- char lastCh = 0;
+ char nextCh = 0;
for (int i = 0; i < text.length(); ++i) {
char ch = text.charAt(i);
+ if ((i + 1) < text.length()) {
+ nextCh = text.charAt(i + 1);
+ } else {
+ nextCh = 0;
+ }
if (ch == kvDelimiter) {
switch (state) {
+ // match previous kv delimiter first when there are more
than one kvDelimiter
case STATE_KEY:
key = stringBuilder.toString();
stringBuilder.setLength(0);
@@ -124,24 +130,19 @@ public class StringUtils {
} else if (ch == entryDelimiter) {
switch (state) {
case STATE_KEY:
- key = lastKey;
- if (lastValue == null) {
- value = ch + stringBuilder.toString();
- } else {
- value = lastValue + ch + stringBuilder.toString();
- }
- fields.put(key, value);
- lastKey = key;
- lastValue = value;
- stringBuilder.setLength(0);
+ stringBuilder.append(ch);
break;
case STATE_VALUE:
- value = stringBuilder.toString();
- fields.put(key, value);
- lastKey = key;
- lastValue = value;
- stringBuilder.setLength(0);
- state = STATE_KEY;
+ if (nextCh == entryDelimiter) {
+ stringBuilder.append(ch);
+ } else {
+ value = stringBuilder.toString();
+ fields.put(key, value);
+ lastKey = key;
+ lastValue = value;
+ stringBuilder.setLength(0);
+ state = STATE_KEY;
+ }
break;
case STATE_ESCAPING:
stringBuilder.append(ch);
@@ -154,12 +155,6 @@ public class StringUtils {
} else if (escapeChar != null && ch == escapeChar) {
switch (state) {
case STATE_KEY:
- if (lastCh != 0) {
- stringBuilder.append(lastCh);
- }
- kvState = state;
- state = STATE_ESCAPING;
- break;
case STATE_VALUE:
kvState = state;
state = STATE_ESCAPING;
@@ -175,12 +170,6 @@ public class StringUtils {
} else if (quoteChar != null && ch == quoteChar) {
switch (state) {
case STATE_KEY:
- if (lastCh != 0) {
- stringBuilder.append(lastCh);
- }
- kvState = state;
- state = STATE_QUOTING;
- break;
case STATE_VALUE:
kvState = state;
state = STATE_QUOTING;
@@ -196,20 +185,26 @@ public class StringUtils {
} else if (lineDelimiter != null && ch == lineDelimiter) {
switch (state) {
case STATE_KEY:
+ String remainingKey = stringBuilder.toString();
key = lastKey;
- stringBuilder.append(lastValue).append(lastCh);
+ stringBuilder.setLength(0);
+
stringBuilder.append(lastValue).append(entryDelimiter).append(remainingKey);
value = stringBuilder.toString();
fields.put(key, value);
+ Map<String, String> copyFields = new HashMap<>();
+ copyFields.putAll(fields);
+ lines.add(copyFields);
+ stringBuilder.setLength(0);
+ fields.clear();
lastKey = null;
lastValue = null;
- stringBuilder.setLength(0);
break;
case STATE_VALUE:
lastKey = null;
lastValue = null;
value = stringBuilder.toString();
fields.put(key, value);
- Map<String, String> copyFields = new HashMap<>();
+ copyFields = new HashMap<>();
copyFields.putAll(fields);
lines.add(copyFields);
stringBuilder.setLength(0);
@@ -226,14 +221,22 @@ public class StringUtils {
}
} else {
stringBuilder.append(ch);
+ switch (state) {
+ case STATE_ESCAPING:
+ state = kvState;
+ }
}
- lastCh = ch;
}
switch (state) {
case STATE_KEY:
if (lastKey != null && lastValue != null && text != null) {
- fields.put(lastKey, lastValue + lastCh);
+ String remainingKey = stringBuilder.toString();
+ key = lastKey;
+ stringBuilder.setLength(0);
+
stringBuilder.append(lastValue).append(entryDelimiter).append(remainingKey);
+ value = stringBuilder.toString();
+ fields.put(key, value);
}
lines.add(fields);
return lines;
@@ -244,14 +247,19 @@ public class StringUtils {
return lines;
case STATE_ESCAPING:
case STATE_QUOTING:
- value = stringBuilder.toString();
- String oldValue = fields.get(key);
- if (value != null && !"".equals(value)
- && oldValue != null && !"".equals(oldValue)) {
- fields.put(key, oldValue + value);
- } else if (value != null && !"".equals(value)) {
- fields.put(key, value);
+ switch (kvState) {
+ case STATE_VALUE:
+ value = stringBuilder.toString();
+ fields.put(key, value);
+ break;
+ case STATE_KEY:
+ if (lastKey != null) {
+ value = stringBuilder.toString();
+ String oldValue = fields.get(key);
+ fields.put(key, oldValue + entryDelimiter + value);
+ }
}
+
lines.add(fields);
return lines;
default:
diff --git
a/inlong-sort/sort-formats/format-common/src/test/java/org/apache/inlong/sort/formats/common/StringUtilsTest.java
b/inlong-sort/sort-formats/format-common/src/test/java/org/apache/inlong/sort/formats/common/StringUtilsTest.java
index fc64811a97..b9c88ed788 100644
---
a/inlong-sort/sort-formats/format-common/src/test/java/org/apache/inlong/sort/formats/common/StringUtilsTest.java
+++
b/inlong-sort/sort-formats/format-common/src/test/java/org/apache/inlong/sort/formats/common/StringUtilsTest.java
@@ -19,11 +19,13 @@ package org.apache.inlong.sort.formats.common;
import org.apache.inlong.sort.formats.util.StringUtils;
+import org.junit.Assert;
import org.junit.Test;
import java.util.List;
import java.util.Map;
+import static org.apache.inlong.sort.formats.util.StringUtils.splitKv;
import static org.junit.Assert.assertEquals;
public class StringUtilsTest {
@@ -55,17 +57,17 @@ public class StringUtilsTest {
'=', '\\', '\'', '\n');
assertEquals("=", map4.get(0).get("name"));
assertEquals("20&&", map4.get(0).get("age"));
- assertEquals("=", map4.get(0).get("name1"));
- assertEquals("20&&", map4.get(0).get("age1"));
+ assertEquals("=", map4.get(1).get("name1"));
+ assertEquals("20&&", map4.get(1).get("age1"));
String kvString5 =
"name==&age=20&&\nname1==&age1=20&&&value=aaa&dddd&";
List<Map<String, String>> map5 = StringUtils.splitKv(kvString5, '&',
'=', '\\', '\'', '\n');
assertEquals("=", map5.get(0).get("name"));
assertEquals("20&&", map5.get(0).get("age"));
- assertEquals("=", map5.get(0).get("name1"));
- assertEquals("20&&", map5.get(0).get("age1"));
- assertEquals("aaa&dddd&", map5.get(0).get("value"));
+ assertEquals("=", map5.get(1).get("name1"));
+ assertEquals("20&&", map5.get(1).get("age1"));
+ assertEquals("aaa&dddd&", map5.get(1).get("value"));
String kvString6 = "name==&age=20&&\\";
List<Map<String, String>> map6 = StringUtils.splitKv(kvString6, '&',
@@ -153,4 +155,13 @@ public class StringUtilsTest {
assertEquals("home", csv1Array4[2][1]);
assertEquals("home", csv1Array4[2][2]);
}
+
+ @Test
+ public void testKvScapeCharSplit() {
+ String text = "k1=v1&\nk\\2=v2\\&&k3=v3";
+ Map<String, String> kvMap = splitKv(text, '&', '=', '\\', null);
+ Assert.assertTrue(kvMap != null && kvMap.size() == 3);
+ Assert.assertTrue(kvMap.get("k3") != null);
+ Assert.assertTrue(kvMap.get("\nk2") != null);
+ }
}
diff --git
a/inlong-sort/sort-formats/format-row/format-kv/src/test/java/org/apache/inlong/sort/formats/kv/KvUtilsTest.java
b/inlong-sort/sort-formats/format-row/format-kv/src/test/java/org/apache/inlong/sort/formats/kv/KvUtilsTest.java
index 37bbe758aa..953d607f9c 100644
---
a/inlong-sort/sort-formats/format-row/format-kv/src/test/java/org/apache/inlong/sort/formats/kv/KvUtilsTest.java
+++
b/inlong-sort/sort-formats/format-row/format-kv/src/test/java/org/apache/inlong/sort/formats/kv/KvUtilsTest.java
@@ -210,7 +210,7 @@ public class KvUtilsTest {
public void testSplitDanglingKey2() {
Map<String, String> kvMap = splitKv("f1&f2=3", '&',
'=', null, null);
- Assert.assertEquals("3", kvMap.get("f2"));
+ Assert.assertEquals("3", kvMap.get("f1&f2"));
}
@Test