This is an automated email from the ASF dual-hosted git repository.

luchunliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git


The following commit(s) were added to refs/heads/master by this push:
     new d4f5f4674b [INLONG-11943][Sort] TransformFunction: url_encode supports 
specifying character sets (#11946)
d4f5f4674b is described below

commit d4f5f4674bddb17dd8e1597dd5fd6cc1e86ae32b
Author: ChunLiang Lu <[email protected]>
AuthorDate: Thu Jul 24 09:33:32 2025 +0800

    [INLONG-11943][Sort] TransformFunction: url_encode supports specifying 
character sets (#11946)
---
 .../process/function/string/UrlEncodeFunction.java | 28 ++++++++++++++++++----
 .../function/string/TestUrlEncodeFunction.java     | 27 +++++++++++++++++++++
 2 files changed, 50 insertions(+), 5 deletions(-)

diff --git 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlEncodeFunction.java
 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlEncodeFunction.java
index e7bf57ebcc..e5513508b1 100644
--- 
a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlEncodeFunction.java
+++ 
b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/string/UrlEncodeFunction.java
@@ -24,10 +24,12 @@ import 
org.apache.inlong.sdk.transform.process.function.TransformFunction;
 import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
 import org.apache.inlong.sdk.transform.process.parser.ValueParser;
 
+import net.sf.jsqlparser.expression.Expression;
 import net.sf.jsqlparser.expression.Function;
 
 import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
+import java.util.List;
 
 /**
  * UrlEncodeFunction  ->  url_encode(str)
@@ -37,20 +39,24 @@ import java.nio.charset.StandardCharsets;
  * - Return the result of translating 'str' into 
‘application/x-www-form-urlencoded’ format using the UTF-8 encoding scheme.
  */
 @TransformFunction(type = FunctionConstant.STRING_TYPE, names = {
-        "url_encode"}, parameter = "(String str)", descriptions = {
+        "url_encode"}, parameter = "(String str[, String charset])", 
descriptions = {
                 "- Return \"\" if 'str' is NULL, or there is an issue with the 
decoding process(such as encountering an "
                         +
                         "illegal escape pattern), or the encoding scheme is 
not supported;",
                 "- Return the result of translating 'str' into 
'application/x-www-form-urlencoded' format using the " +
-                        "UTF-8 encoding scheme."
+                        "charset(default:UTF-8) encoding scheme."
         }, examples = {
-                "url_encode('https://apache.inlong.com/search?q=java url 
encode') = \"https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode\""})
+                "url_encode('https://apache.inlong.com/search?q=java url 
encode') = \"https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode\"",
+                "url_encode('https://apache.inlong.com/search?q=java url 
encode','UTF-8') = 
\"https%3A%2F%2Fapache.inlong.com%2Fsearch%3Fq%3Djava+url+encode\""})
 public class UrlEncodeFunction implements ValueParser {
 
     private final ValueParser stringParser;
+    private final ValueParser charsetParser;
 
     public UrlEncodeFunction(Function expr) {
-        stringParser = 
OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
+        List<Expression> params = expr.getParameters().getExpressions();
+        stringParser = OperatorTools.buildParser(params.get(0));
+        charsetParser = params.size() > 1 ? 
OperatorTools.buildParser(params.get(1)) : null;
     }
 
     @Override
@@ -66,7 +72,19 @@ public class UrlEncodeFunction implements ValueParser {
         }
 
         try {
-            return URLEncoder.encode(string, 
StandardCharsets.UTF_8.toString());
+            if (charsetParser == null) {
+                return URLEncoder.encode(string, 
StandardCharsets.UTF_8.toString());
+            } else {
+                Object charsetObj = charsetParser.parse(sourceData, rowIndex, 
context);
+                if (charsetObj == null) {
+                    return null;
+                }
+                String charset = OperatorTools.parseString(charsetObj);
+                if (charset == null) {
+                    return null;
+                }
+                return URLEncoder.encode(string, charset);
+            }
         } catch (Exception e) {
             return null;
         }
diff --git 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlEncodeFunction.java
 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlEncodeFunction.java
index 8a12800a64..fc67650a71 100644
--- 
a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlEncodeFunction.java
+++ 
b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestUrlEncodeFunction.java
@@ -54,4 +54,31 @@ public class TestUrlEncodeFunction extends 
AbstractFunctionStringTestBase {
         Assert.assertEquals(1, output2.size());
         Assert.assertEquals(output2.get(0), "result=");
     }
+
+    @Test
+    public void testUrlEncodeCharsetFunction() throws Exception {
+        String transformSql = "select url_encode(string1,'GBK') from source";
+        TransformConfig config = new TransformConfig(transformSql);
+        TransformProcessor<String, String> processor = TransformProcessor
+                .create(config, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+
+        // case1: url_encode('A160=汕头市&vuserid=&version_build=76','GBK')
+        List<String> output1 = 
processor.transform("A160=汕头市&vuserid=&version_build=76|banana|cloud|1",
+                new HashMap<>());
+        Assert.assertEquals(1, output1.size());
+        Assert.assertEquals(output1.get(0), 
"result=A160%3D%C9%C7%CD%B7%CA%D0%26vuserid%3D%26version_build%3D76");
+
+        String transformSql2 = "select url_encode(string1,'UTF-8') from 
source";
+        TransformConfig config2 = new TransformConfig(transformSql2);
+        TransformProcessor<String, String> processor2 = TransformProcessor
+                .create(config2, 
SourceDecoderFactory.createCsvDecoder(csvSource),
+                        SinkEncoderFactory.createKvEncoder(kvSink));
+        // case2: url_encode(null)
+        List<String> output2 = 
processor2.transform("A160=汕头市&vuserid=&version_build=76|banana|cloud|1",
+                new HashMap<>());
+        Assert.assertEquals(1, output2.size());
+        Assert.assertEquals(output2.get(0),
+                
"result=A160%3D%E6%B1%95%E5%A4%B4%E5%B8%82%26vuserid%3D%26version_build%3D76");
+    }
 }

Reply via email to