This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 5e1fd2ff35f [fix](Nereids): support Chinese characters set (#28256)
(#28357)
5e1fd2ff35f is described below
commit 5e1fd2ff35f541f59e5c51dcfa6d02a608a6ed7d
Author: jakevin <[email protected]>
AuthorDate: Sat Dec 16 19:36:07 2023 +0800
[fix](Nereids): support Chinese characters set (#28256) (#28357)
---
.../antlr4/org/apache/doris/nereids/DorisLexer.g4 | 5 ++---
.../nereids_syntax_p0/chinese_characters_set.out | 4 ++++
.../chinese_characters_set.groovy | 23 ++++++++++++++++++++++
3 files changed, 29 insertions(+), 3 deletions(-)
diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index b39fc69b44f..abf6e4bb27f 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
@@ -631,9 +631,8 @@ fragment DIGIT
fragment LETTER
: [a-zA-Z$_] // these are the "java letters" below 0x7F
- | // covers all characters above 0x7F which are not a surrogate
- ~[\u0000-\u007F\uD800-\uDBFF]
- {Character.isJavaIdentifierStart(_input.LA(-1))}?
+ | ~[\u0000-\u007F\uD800-\uDBFF] // covers all characters above 0x7F which
are not a surrogate
+ | [\uD800-\uDBFF] [\uDC00-\uDFFF] // covers UTF-16 surrogate pairs
encodings for U+10000 to U+10FFFF
;
SIMPLE_COMMENT
diff --git a/regression-test/data/nereids_syntax_p0/chinese_characters_set.out
b/regression-test/data/nereids_syntax_p0/chinese_characters_set.out
new file mode 100644
index 00000000000..b66acea9495
--- /dev/null
+++ b/regression-test/data/nereids_syntax_p0/chinese_characters_set.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !chinese_characters_set --
+1
+
diff --git
a/regression-test/suites/nereids_syntax_p0/chinese_characters_set.groovy
b/regression-test/suites/nereids_syntax_p0/chinese_characters_set.groovy
new file mode 100644
index 00000000000..8cfb1141e47
--- /dev/null
+++ b/regression-test/suites/nereids_syntax_p0/chinese_characters_set.groovy
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("chinese_characters_set") {
+ sql "SET enable_nereids_planner=true"
+ sql "SET enable_fallback_to_original_planner=false"
+
+ qt_chinese_characters_set "SELECT 1 as 中文字符(!。,?;¥"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]