This is an automated email from the ASF dual-hosted git repository.
luchunliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git
The following commit(s) were added to refs/heads/master by this push:
new d4f5f4674b [INLONG-11943][Sort] TransformFunction: url_encode supports
specifying character sets (#11946)
d4f5f4674b is described below
commit d4f5f4674bddb17dd8e1597dd5fd6cc1e86ae32b
Author: ChunLiang Lu <[email protected]>
AuthorDate: Thu Jul 24 09:33:32 2025 +0800
[INLONG-11943][Sort] TransformFunction: url_encode supports specifying
character sets (#11946)
---
.../process/function/string/UrlEncodeFunction.java | 28 ++++++++++++++++++----
.../function/string/TestUrlEncodeFunction.java | 27 +++++++++++++++++++++
2 files changed, 50 insertions(+), 5 deletions(-)
diff --git
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlEncodeFunction.java
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlEncodeFunction.java
index e7bf57ebcc..e5513508b1 100644
---
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlEncodeFunction.java
+++
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlEncodeFunction.java
@@ -24,10 +24,12 @@ import
org.apache.inlong.sdk.transform.process.function.TransformFunction;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;
+import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
+import java.util.List;
/**
* UrlEncodeFunction -> url_encode(str)
@@ -37,20 +39,24 @@ import java.nio.charset.StandardCharsets;
* - Return the result of translating 'str' into
‘application/x-www-form-urlencoded’ format using the UTF-8 encoding scheme.
*/
@TransformFunction(type = FunctionConstant.STRING_TYPE, names = {
- "url_encode"}, parameter = "(String str)", descriptions = {
+ "url_encode"}, parameter = "(String str[, String charset])",
descriptions = {
"- Return \"\" if 'str' is NULL, or there is an issue with the
decoding process(such as encountering an "
+
"illegal escape pattern), or the encoding scheme is
not supported;",
"- Return the result of translating 'str' into
'application/x-www-form-urlencoded' format using the " +
- "UTF-8 encoding scheme."
+ "charset(default:UTF-8) encoding scheme."
}, examples = {
- "url_encode('https://apache.inlong.com/search?q=java url
encode') = \"https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode\""})
+ "url_encode('https://apache.inlong.com/search?q=java url
encode') = \"https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode\"",
+ "url_encode('https://apache.inlong.com/search?q=java url
encode','UTF-8') =
\"https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode\""})
public class UrlEncodeFunction implements ValueParser {
private final ValueParser stringParser;
+ private final ValueParser charsetParser;
public UrlEncodeFunction(Function expr) {
- stringParser =
OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
+ List<Expression> params = expr.getParameters().getExpressions();
+ stringParser = OperatorTools.buildParser(params.get(0));
+ charsetParser = params.size() > 1 ?
OperatorTools.buildParser(params.get(1)) : null;
}
@Override
@@ -66,7 +72,19 @@ public class UrlEncodeFunction implements ValueParser {
}
try {
- return URLEncoder.encode(string,
StandardCharsets.UTF_8.toString());
+ if (charsetParser == null) {
+ return URLEncoder.encode(string,
StandardCharsets.UTF_8.toString());
+ } else {
+ Object charsetObj = charsetParser.parse(sourceData, rowIndex,
context);
+ if (charsetObj == null) {
+ return null;
+ }
+ String charset = OperatorTools.parseString(charsetObj);
+ if (charset == null) {
+ return null;
+ }
+ return URLEncoder.encode(string, charset);
+ }
} catch (Exception e) {
return null;
}
diff --git
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlEncodeFunction.java
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlEncodeFunction.java
index 8a12800a64..fc67650a71 100644
---
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlEncodeFunction.java
+++
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlEncodeFunction.java
@@ -54,4 +54,31 @@ public class TestUrlEncodeFunction extends
AbstractFunctionStringTestBase {
Assert.assertEquals(1, output2.size());
Assert.assertEquals(output2.get(0), "result=");
}
+
+ @Test
+ public void testUrlEncodeCharsetFunction() throws Exception {
+ String transformSql = "select url_encode(string1,'GBK') from source";
+ TransformConfig config = new TransformConfig(transformSql);
+ TransformProcessor<String, String> processor = TransformProcessor
+ .create(config,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+
+ // case1: url_encode('A160=汕头市&vuserid=&version_build=76','GBK')
+ List<String> output1 =
processor.transform("A160=汕头市&vuserid=&version_build=76|banana|cloud|1",
+ new HashMap<>());
+ Assert.assertEquals(1, output1.size());
+ Assert.assertEquals(output1.get(0),
"result=A160%3D%C9%C7%CD%B7%CA%D0%26vuserid%3D%26version_build%3D76");
+
+ String transformSql2 = "select url_encode(string1,'UTF-8') from
source";
+ TransformConfig config2 = new TransformConfig(transformSql2);
+ TransformProcessor<String, String> processor2 = TransformProcessor
+ .create(config2,
SourceDecoderFactory.createCsvDecoder(csvSource),
+ SinkEncoderFactory.createKvEncoder(kvSink));
+ // case2: url_encode(null)
+ List<String> output2 =
processor2.transform("A160=汕头市&vuserid=&version_build=76|banana|cloud|1",
+ new HashMap<>());
+ Assert.assertEquals(1, output2.size());
+ Assert.assertEquals(output2.get(0),
+
"result=A160%3D%E6%B1%95%E5%A4%B4%E5%B8%82%26vuserid%3D%26version_build%3D76");
+ }
}