This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9ecc5aeedc8 [Fix](delete) Support delete when column name is Unicode
(#39381)
9ecc5aeedc8 is described below
commit 9ecc5aeedc8805ea94da6e6d3619b78e78f3012f
Author: zclllhhjj <[email protected]>
AuthorDate: Mon Aug 19 12:43:17 2024 +0800
[Fix](delete) Support delete when column name is Unicode (#39381)
## Proposed changes
Issue Number: close #xxx
before:
```sql
mysql> delete from table_7298276 where 中文列名1 > '2023-08-17' and 中文列名2 >
'-68' and 中文列名3 in ("77", "0", "-35", "-8", "93", "-87", "42", "24", "57",
"74");
ERROR 1105 (HY000): errCode = 2, detailMessage = delete job failed,
errmsg:10019: [(10.16.10.8)[INVALID_ARGUMENT]failed to parse condition_str,
condtion=TCondition {
01: column_name (string) =
"\xe4\xb8\xad\xe6\x96\x87\xe5\x88\x97\xe5\x90\x8d1",
02: condition_op (string) = ">",
03: condition_values (list) = list<string>[1] {
[0] = "2023-08-17",
},
04: column_unique_id (i32) = 0,
05: marked_by_runtime_filter (bool) = false,
1000: compound_type (i32) = 0,
}]
```
now:
```sql
mysql> delete from table_7298276 where 中文列名1 > '2012-08-17' and 中文列名2 > -68
and 中文列名3 in (1,2,3);
Query OK, 0 rows affected (0.14 sec)
```
---
be/src/olap/delete_handler.cpp | 49 ++++++++++------------
be/test/olap/delete_handler_test.cpp | 4 ++
.../data/delete_p0/test_delete_unicode.out | 6 +++
.../suites/delete_p0/test_delete_unicode.groovy | 39 +++++++++++++++++
4 files changed, 70 insertions(+), 28 deletions(-)
diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
index 66859a069cc..10c6f50b300 100644
--- a/be/src/olap/delete_handler.cpp
+++ b/be/src/olap/delete_handler.cpp
@@ -21,8 +21,6 @@
#include <gen_cpp/olap_file.pb.h>
#include <thrift/protocol/TDebugProtocol.h>
-#include <boost/regex.hpp>
-#include <sstream>
#include <string>
#include <vector>
@@ -40,12 +38,10 @@
using apache::thrift::ThriftDebugString;
using std::vector;
using std::string;
-using std::stringstream;
using ::google::protobuf::RepeatedPtrField;
namespace doris {
-using namespace ErrorCode;
// construct sub condition from TCondition
std::string construct_sub_predicate(const TCondition& condition) {
@@ -314,38 +310,35 @@ Status DeleteHandler::parse_condition(const
DeleteSubPredicatePB& sub_cond, TCon
// value: matches "1597751948193618247 and length(source)<1;\n;\n"
//
// For more info, see DeleteHandler::construct_sub_predicates
-// FIXME(gavin): support unicode. And this is a tricky implementation, it
should
-// not be the final resolution, refactor it.
+// FIXME(gavin): This is a tricky implementation, it should not be the final
resolution, refactor it.
const char* const CONDITION_STR_PATTERN =
- // .----------------- column-name ----------------.
.----------------------- operator ------------------------. .------------
value ----------.
-
R"(([_a-zA-Z@0-9\s/][.a-zA-Z0-9_+-/?@#$%^&*"\s,:]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:
IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
- // '----------------- group 1 --------------------'
'--------------------- group 2 ---------------------------' | '-- group 4--'
|
- // match any of: =
!= >> << >= <= *= " IS " '----------- group 3 ---------'
- //
match **ANY THING** without(4)
- //
or with(3) single quote
-boost::regex DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN);
+ // .----------------- column-name --------------------------.
.----------------------- operator ------------------------. .------------
value ----------.
+
R"(([_a-zA-Z@0-9\s/\p{L}][.a-zA-Z0-9_+-/?@#$%^&*"\s,:\p{L}]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:
IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
+ // '----------------- group 1 ------------------------------'
'--------------------- group 2 ---------------------------' | '-- group 4--'
|
+ // match
any of: = != >> << >= <= *= " IS " '----------- group 3
---------'
+ //
match **ANY THING**
without(4)
+ //
or with(3) single quote
// clang-format on
+RE2 DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN);
Status DeleteHandler::parse_condition(const std::string& condition_str,
TCondition* condition) {
- bool matched = false;
- boost::smatch what;
- try {
- VLOG_NOTICE << "condition_str: " << condition_str;
- matched = boost::regex_match(condition_str, what,
DELETE_HANDLER_REGEX) &&
- condition_str.size() == what[0].str().size(); // exact match
- } catch (boost::regex_error& e) {
- VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << ";
error=" << e.what()
- << "]";
- }
+ std::string col_name, op, value, g4;
+
+ bool matched = RE2::FullMatch(condition_str, DELETE_HANDLER_REGEX,
&col_name, &op, &value,
+ &g4); // exact match
+
if (!matched) {
- return Status::Error<ErrorCode::INVALID_ARGUMENT>("fail to sub
condition. condition={}",
- condition_str);
+ return Status::InvalidArgument("fail to sub condition. condition={}",
condition_str);
}
- condition->column_name = what[1].str();
- condition->condition_op = what[2].str() == " IS " ? "IS" : what[2].str();
+ condition->column_name = col_name;
+ condition->condition_op = op == " IS " ? "IS" : op;
// match string with single quotes, a = b or a = 'b'
- condition->condition_values.push_back(what[3 + !!what[4].matched].str());
+ if (!g4.empty()) {
+ condition->condition_values.push_back(g4);
+ } else {
+ condition->condition_values.push_back(value);
+ }
VLOG_NOTICE << "parsed condition_str: col_name={" <<
condition->column_name << "} op={"
<< condition->condition_op << "} val={" <<
condition->condition_values.back()
<< "}";
diff --git a/be/test/olap/delete_handler_test.cpp
b/be/test/olap/delete_handler_test.cpp
index 335c163930d..0d45d28c284 100644
--- a/be/test/olap/delete_handler_test.cpp
+++ b/be/test/olap/delete_handler_test.cpp
@@ -1225,6 +1225,10 @@ TEST_F(TestDeleteHandler, TestParseDeleteCondition) {
{R"(a IS b IS NOT NULL)", true, gen_cond(R"(a IS b)", "IS", R"(NOT
NULL)" )}, // test " IS " in column name
{R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:=hell)", true,
gen_cond(R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:)", "=", R"(hell)")}, //
hellbound column name
{R"(this is a col very
loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon
colum name=long)", true, gen_cond(R"(this is a col very
loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon
colum name)", "=", R"(long)")}, // test " IS " in column name
+ {R"(中文列名1=b)" , true, gen_cond(R"(中文列名1)", "=" , R"(b)"
)}, // Chinese case
+ {R"(错!!误!=b)" , false, gen_cond(R"(abc)" , "!=", R"(b)"
)}, // illegal character
+ {R"(##错误<=b)" , false, gen_cond(R"(abc)" , "<=", R"(b)"
)}, // illegal prefix
+ {R"(κάνεις지내세요>>b)" , true, gen_cond(R"(κάνεις지내세요)", ">>", R"(b)"
)}, // other languages
};
for (auto& i : test_input) { test(i); }
}
diff --git a/regression-test/data/delete_p0/test_delete_unicode.out
b/regression-test/data/delete_p0/test_delete_unicode.out
new file mode 100644
index 00000000000..c0cb04a2a1d
--- /dev/null
+++ b/regression-test/data/delete_p0/test_delete_unicode.out
@@ -0,0 +1,6 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql1 --
+2020-12-12 1 1 1
+
+-- !sql2 --
+
diff --git a/regression-test/suites/delete_p0/test_delete_unicode.groovy
b/regression-test/suites/delete_p0/test_delete_unicode.groovy
new file mode 100644
index 00000000000..9dd5f589a07
--- /dev/null
+++ b/regression-test/suites/delete_p0/test_delete_unicode.groovy
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_delete_unicode") {
+ sql "set enable_unicode_name_support=true;"
+
+ sql """
+ CREATE TABLE `table_7298276` (
+ `中文列名1` date NOT NULL,
+ `中文列名2` int NOT NULL,
+ `中文列名3` bigint NOT NULL,
+ `中文列名4` largeint NOT NULL,
+ INDEX 中文列名2 (`中文列名2`) USING INVERTED,
+ INDEX 中文列名4 (`中文列名4`) USING INVERTED
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`中文列名1`, `中文列名2`, `中文列名3`)
+ DISTRIBUTED BY HASH(`中文列名1`, `中文列名2`, `中文列名3`) BUCKETS 4
+ properties("replication_num" = "1");
+ """
+
+ sql """ insert into table_7298276 values ('2020-12-12',1,1,1);"""
+ qt_sql1 "select * from table_7298276;"
+ sql "delete from table_7298276 where 中文列名1 > '2012-08-17' and 中文列名2 > -68
and 中文列名3 in (1,2,3);"
+ qt_sql2 "select * from table_7298276;"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]