This is an automated email from the ASF dual-hosted git repository.
luchunliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push:
new b8c290a7e4 [INLONG-11931][Sort] Optimize Transform's CSV/KV parsing
(#11932)
b8c290a7e4 is described below
commit b8c290a7e4e056865d216a3e7e9d788f3c052dc7
Author: ChunLiang Lu <[email protected]>
AuthorDate: Tue Jul 15 23:54:17 2025 +0800
[INLONG-11931][Sort] Optimize Transform's CSV/KV parsing (#11932)
* [INLONG-11931][Sort] Optimize Transform's CSV/KV parsing
* fix comment
* fix comment
* fix test case
---
.../org/apache/inlong/sdk/transform/decode/CsvSourceDecoder.java | 2 +-
.../org/apache/inlong/sdk/transform/decode/KvSourceDecoder.java | 2 +-
.../sdk/transform/process/function/json/TestJsonQuoteFunction.java | 4 ++--
.../sdk/transform/process/function/json/TestJsonSetFunction.java | 2 +-
.../transform/process/function/json/TestJsonUnQuoteFunction.java | 6 +++---
5 files changed, 8 insertions(+), 8 deletions(-)
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/decode/CsvSourceDecoder.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/decode/CsvSourceDecoder.java
index 4eb13de398..4f9f6e672d 100644
---
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/decode/CsvSourceDecoder.java
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/decode/CsvSourceDecoder.java
@@ -59,7 +59,7 @@ public class CsvSourceDecoder extends SourceDecoder<String> {
@Override
public SourceData decode(String srcString, Context context) {
- String[][] rowValues = SplitUtils.splitCsv(srcString, delimiter,
escapeChar, '\"', '\n', true);
+ String[][] rowValues = KvUtils.splitCsv(srcString, delimiter,
escapeChar, null, '\n', false);
CsvSourceData sourceData = new CsvSourceData(context);
for (int i = 0; i < rowValues.length; i++) {
String[] fieldValues = rowValues[i];
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/decode/KvSourceDecoder.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/decode/KvSourceDecoder.java
index 049d7fb610..dee23a8d4f 100644
---
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/decode/KvSourceDecoder.java
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/decode/KvSourceDecoder.java
@@ -38,7 +38,7 @@ public class KvSourceDecoder extends SourceDecoder<String> {
private Character entryDelimiter = '&';
private Character kvDelimiter = '=';
private Character escapeChar = '\\';
- private Character quoteChar = '\"';
+ private Character quoteChar = null;
private Character lineDelimiter = '\n';
private Charset srcCharset = Charset.defaultCharset();
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/json/TestJsonQuoteFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/json/TestJsonQuoteFunction.java
index 2391cb4be5..d85eeae2dc 100644
---
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/json/TestJsonQuoteFunction.java
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/json/TestJsonQuoteFunction.java
@@ -52,7 +52,7 @@ public class TestJsonQuoteFunction extends
AbstractFunctionJsonTestBase {
data = "This is a \"quoted\" string|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
- Assert.assertEquals("result=\"This is a quoted string\"",
output.get(0));
+ Assert.assertEquals("result=\"This is a \\\"quoted\\\" string\"",
output.get(0));
// case3: json_quote('A back\slash:')
data = "A back\\slash:|xxd|cloud|7|3|3";
@@ -124,7 +124,7 @@ public class TestJsonQuoteFunction extends
AbstractFunctionJsonTestBase {
data = "This is a \"quoted\" string|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
- Assert.assertEquals("result=\"This is a quoted string\"",
output.get(0));
+ Assert.assertEquals("result=\"This is a \\\"quoted\\\" string\"",
output.get(0));
// case3: json_string('A back\slash:')
data = "A back\\slash:|xxd|cloud|7|3|3";
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/json/TestJsonSetFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/json/TestJsonSetFunction.java
index 119db13104..35bdf5bee7 100644
---
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/json/TestJsonSetFunction.java
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/json/TestJsonSetFunction.java
@@ -68,6 +68,6 @@ public class TestJsonSetFunction extends
AbstractFunctionJsonTestBase {
data = json_doc +
"|$|{\\\"newKey\\\":\\\"newValue\\\"}|\"$.newKey\"|\"new1\"";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
- Assert.assertEquals("result={\"newKey\":\"new1\"}", output.get(0));
+ Assert.assertEquals("result=", output.get(0));
}
}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/json/TestJsonUnQuoteFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/json/TestJsonUnQuoteFunction.java
index 6b8db8784a..28d66a2efc 100644
---
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/json/TestJsonUnQuoteFunction.java
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/json/TestJsonUnQuoteFunction.java
@@ -58,7 +58,7 @@ public class TestJsonUnQuoteFunction extends
AbstractFunctionJsonTestBase {
data = "\"A back\\slash:\"|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
- Assert.assertEquals("result=A back\\slash:", output.get(0));
+ Assert.assertEquals("result=A backslash:", output.get(0));
// case4: is_digit('')
data = "\"Column1\tColumn2\"|xxd|cloud|7|3|3";
@@ -70,13 +70,13 @@ public class TestJsonUnQuoteFunction extends
AbstractFunctionJsonTestBase {
data = "\"Quotes ' and double quotes \\\"\\\"|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
- Assert.assertEquals("result=Quotes ' and double quotes \\\"",
output.get(0));
+ Assert.assertEquals("result=\"Quotes ' and double quotes \"\"",
output.get(0));
// case4: is_digit('')
data = "\"Complex string with / and \\\"|xxd|cloud|7|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
- Assert.assertEquals("result=Complex string with / and \\",
output.get(0));
+ Assert.assertEquals("result=Complex string with / and ",
output.get(0));
// case4: is_digit('')
data = "\"Unicode test: ሴ噸\"|xxd|cloud|7|3|3";