This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a36f090e505 [fix](scan) Incorrect scan keys lead to wrong query
results. (#40814)
a36f090e505 is described below
commit a36f090e505718e8413fe6c3ed2b6b189d8cac4e
Author: Jerry Hu <[email protected]>
AuthorDate: Thu Sep 19 13:57:14 2024 +0800
[fix](scan) Incorrect scan keys lead to wrong query results. (#40814)
## Proposed changes
```
mysql [doris_14555]>select * from table_9436528_3;
+------+------+------+------+------------------------+--------------------+------+
| col1 | col2 | col3 | col5 | col4 | col6 |
col7 |
+------+------+------+------+------------------------+--------------------+------+
| -100 | 1 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.904282 |
NULL |
| -100 | 1 | 92 | 1 | 2024-02-16 04:37:37.00 | 23423423.0324234 |
NULL |
| -100 | 0 | -82 | 0 | 2023-11-11 10:49:43.00 | 840968969.872149 |
NULL |
```
wrong result:
```
mysql [doris_14555]>select * from table_9436528_3 where col1 <= -100 and
col2 in (true, false) and col3 = -82;
+------+------+------+------+------------------------+--------------------+------+
| col1 | col2 | col3 | col5 | col4 | col6 |
col7 |
+------+------+------+------+------------------------+--------------------+------+
| -100 | 1 | -82 | 1 | 2024-02-16 04:37:37.00 | -1299962421.904282 |
NULL |
| -100 | 1 | 92 | 1 | 2024-02-16 04:37:37.00 | 23423423.0324234 |
NULL |
+------+------+------+------+------------------------+--------------------+------+
```
---
be/src/exec/olap_common.h | 6 +-
be/src/pipeline/exec/olap_scan_operator.cpp | 18 ++--
.../correctness/test_scan_keys_with_bool_type.out | 43 ++++++++
.../test_scan_keys_with_bool_type.groovy | 110 +++++++++++++++++++++
4 files changed, 169 insertions(+), 8 deletions(-)
diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index 9ba1b81d1b8..a4180938dfc 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -375,7 +375,7 @@ public:
template <PrimitiveType primitive_type>
Status extend_scan_key(ColumnValueRange<primitive_type>& range, int32_t
max_scan_key_num,
- bool* exact_value, bool* eos);
+ bool* exact_value, bool* eos, bool* should_break);
Status get_key_range(std::vector<std::unique_ptr<OlapScanRange>>*
key_range);
@@ -993,7 +993,8 @@ bool
ColumnValueRange<primitive_type>::has_intersection(ColumnValueRange<primiti
template <PrimitiveType primitive_type>
Status OlapScanKeys::extend_scan_key(ColumnValueRange<primitive_type>& range,
- int32_t max_scan_key_num, bool*
exact_value, bool* eos) {
+ int32_t max_scan_key_num, bool*
exact_value, bool* eos,
+ bool* should_break) {
using CppType = typename PrimitiveTypeTraits<primitive_type>::CppType;
using ConstIterator = typename std::set<CppType>::const_iterator;
@@ -1017,6 +1018,7 @@ Status
OlapScanKeys::extend_scan_key(ColumnValueRange<primitive_type>& range,
range.convert_to_range_value();
*exact_value = false;
} else {
+ *should_break = true;
return Status::OK();
}
}
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp
b/be/src/pipeline/exec/olap_scan_operator.cpp
index b335e2309a3..09e999d4737 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -480,9 +480,13 @@ Status OlapScanLocalState::_build_key_ranges_and_filters()
{
// we use `exact_range` to identify a key range is an exact range or
not when we convert
// it to `_scan_keys`. If `exact_range` is true, we can just discard
it from `_olap_filters`.
bool exact_range = true;
+
+ // If the `_scan_keys` cannot extend by the range of column, should
stop.
+ bool should_break = false;
+
bool eos = false;
- for (int column_index = 0;
- column_index < column_names.size() &&
!_scan_keys.has_range_value() && !eos;
+ for (int column_index = 0; column_index < column_names.size() &&
+ !_scan_keys.has_range_value() && !eos &&
!should_break;
++column_index) {
auto iter =
_colname_to_value_range.find(column_names[column_index]);
if (_colname_to_value_range.end() == iter) {
@@ -496,8 +500,9 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() {
// but the original range may be converted to olap
filters, if it's not a exact_range.
auto temp_range = range;
if (range.get_fixed_value_size() <=
p._max_pushdown_conditions_per_column) {
- RETURN_IF_ERROR(_scan_keys.extend_scan_key(
- temp_range, p._max_scan_key_num,
&exact_range, &eos));
+ RETURN_IF_ERROR(
+ _scan_keys.extend_scan_key(temp_range,
p._max_scan_key_num,
+ &exact_range,
&eos, &should_break));
if (exact_range) {
_colname_to_value_range.erase(iter->first);
}
@@ -505,8 +510,9 @@ Status OlapScanLocalState::_build_key_ranges_and_filters() {
// if exceed max_pushdown_conditions_per_column,
use whole_value_rang instead
// and will not erase from
_colname_to_value_range, it must be not exact_range
temp_range.set_whole_value_range();
- RETURN_IF_ERROR(_scan_keys.extend_scan_key(
- temp_range, p._max_scan_key_num,
&exact_range, &eos));
+ RETURN_IF_ERROR(
+ _scan_keys.extend_scan_key(temp_range,
p._max_scan_key_num,
+ &exact_range,
&eos, &should_break));
}
return Status::OK();
},
diff --git a/regression-test/data/correctness/test_scan_keys_with_bool_type.out
b/regression-test/data/correctness/test_scan_keys_with_bool_type.out
new file mode 100644
index 00000000000..d0448a74ed7
--- /dev/null
+++ b/regression-test/data/correctness/test_scan_keys_with_bool_type.out
@@ -0,0 +1,43 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select1 --
+-100 false -82 false 2023-11-11T10:49:43 8.40968969872149E8
\N
+-100 true -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+-100 true 92 true 2024-02-16T04:37:37 2.34234230324234E7
\N
+
+-- !select2 --
+-100 false -82 false 2023-11-11T10:49:43 8.40968969872149E8
\N
+-100 true -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+
+-- !select3 --
+-100 false -82 false 2023-11-11T10:49:43 8.40968969872149E8
\N
+-100 true -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+
+-- !select3 --
+-100 0 -82 false 2023-11-11T10:49:43 8.40968969872149E8
\N
+-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+-100 1 92 true 2024-02-16T04:37:37 2.34234230324234E7
\N
+-100 2 -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+
+-- !select4 --
+-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+-100 2 -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+
+-- !select5 --
+-100 0 -82 false 2023-11-11T10:49:43 8.40968969872149E8
\N
+-100 1 -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+-100 2 -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+
+-- !select6 --
+-100 a -82 false 2023-11-11T10:49:43 8.40968969872149E8
\N
+-100 b -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+-100 b 92 true 2024-02-16T04:37:37 2.34234230324234E7
\N
+-100 c 92 true 2024-02-16T04:37:37 2.34234230324234E7
\N
+
+-- !select7 --
+-100 a -82 false 2023-11-11T10:49:43 8.40968969872149E8
\N
+-100 b -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+
+-- !select8 --
+-100 a -82 false 2023-11-11T10:49:43 8.40968969872149E8
\N
+-100 b -82 true 2024-02-16T04:37:37 -1.299962421904282E9
\N
+
diff --git
a/regression-test/suites/correctness/test_scan_keys_with_bool_type.groovy
b/regression-test/suites/correctness/test_scan_keys_with_bool_type.groovy
new file mode 100644
index 00000000000..1eaa6058236
--- /dev/null
+++ b/regression-test/suites/correctness/test_scan_keys_with_bool_type.groovy
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_scan_keys_with_bool_type") {
+ sql """ DROP TABLE IF EXISTS test_scan_keys_with_bool_type """
+
+ sql """
+ CREATE TABLE `test_scan_keys_with_bool_type` (
+ `col1` tinyint NOT NULL,
+ `col2` boolean NOT NULL,
+ `col3` tinyint NOT NULL,
+ `col5` boolean REPLACE NOT NULL,
+ `col4` datetime(2) REPLACE NOT NULL,
+ `col6` double REPLACE_IF_NOT_NULL NULL,
+ `col7` datetime(3) REPLACE_IF_NOT_NULL NULL
+ ) ENGINE=OLAP
+ AGGREGATE KEY(`col1`, `col2`, `col3`)
+ DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "disable_auto_compaction" = "true"
+ );
+ """
+
+ sql """ insert into test_scan_keys_with_bool_type values
+ ( -100 , 0 , -82 , 0 , '2023-11-11 10:49:43.00' ,
840968969.872149 , NULL ),
+ ( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' ,
-1299962421.904282 , NULL ),
+ ( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' ,
23423423.0324234 , NULL );
+ """
+
+ qt_select1 " select * from test_scan_keys_with_bool_type order by 1, 2, 3,
4, 5, 6, 7; "
+ qt_select2 " select * from test_scan_keys_with_bool_type where col1 <=
-100 and col2 in (true, false) and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
+ qt_select3 " select * from test_scan_keys_with_bool_type where col1 <=
-100 and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
+ sql """ DROP TABLE IF EXISTS test_scan_keys_with_bool_type2 """
+
+ sql """
+ CREATE TABLE `test_scan_keys_with_bool_type2` (
+ `col1` tinyint NOT NULL,
+ `col2` int NOT NULL,
+ `col3` tinyint NOT NULL,
+ `col5` boolean REPLACE NOT NULL,
+ `col4` datetime(2) REPLACE NOT NULL,
+ `col6` double REPLACE_IF_NOT_NULL NULL,
+ `col7` datetime(3) REPLACE_IF_NOT_NULL NULL
+ ) ENGINE=OLAP
+ AGGREGATE KEY(`col1`, `col2`, `col3`)
+ DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "disable_auto_compaction" = "true"
+ );
+ """
+
+ sql """ insert into test_scan_keys_with_bool_type2 values
+ ( -100 , 0 , -82 , 0 , '2023-11-11 10:49:43.00' ,
840968969.872149 , NULL ),
+ ( -100 , 1 , -82 , 1 , '2024-02-16 04:37:37.00' ,
-1299962421.904282 , NULL ),
+ ( -100 , 2 , -82 , 1 , '2024-02-16 04:37:37.00' ,
-1299962421.904282 , NULL ),
+ ( -100 , 1 , 92 , 1 , '2024-02-16 04:37:37.00' ,
23423423.0324234 , NULL );
+ """
+
+ qt_select3 " select * from test_scan_keys_with_bool_type2 order by 1, 2,
3, 4, 5, 6, 7; "
+ qt_select4 " select * from test_scan_keys_with_bool_type2 where col1 <=
-100 and col2 in (1, 2) and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
+ qt_select5 " select * from test_scan_keys_with_bool_type2 where col1 <=
-100 and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
+
+
+ sql """ DROP TABLE IF EXISTS test_scan_keys_with_bool_type3 """
+
+ sql """
+ CREATE TABLE `test_scan_keys_with_bool_type3` (
+ `col1` tinyint NOT NULL,
+ `col2` char NOT NULL,
+ `col3` tinyint NOT NULL,
+ `col5` boolean REPLACE NOT NULL,
+ `col4` datetime(2) REPLACE NOT NULL,
+ `col6` double REPLACE_IF_NOT_NULL NULL,
+ `col7` datetime(3) REPLACE_IF_NOT_NULL NULL
+ ) ENGINE=OLAP
+ AGGREGATE KEY(`col1`, `col2`, `col3`)
+ DISTRIBUTED BY HASH(`col1`, `col2`, `col3`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "disable_auto_compaction" = "true"
+ );
+ """
+
+ sql """ insert into test_scan_keys_with_bool_type3 values
+ ( -100 , 'a' , -82 , 0 , '2023-11-11 10:49:43.00' ,
840968969.872149 , NULL ),
+ ( -100 , 'b', -82 , 1 , '2024-02-16 04:37:37.00' ,
-1299962421.904282 , NULL ),
+ ( -100 , 'b' , 92 , 1 , '2024-02-16 04:37:37.00' ,
23423423.0324234 , NULL ),
+ ( -100 , 'c' , 92 , 1 , '2024-02-16 04:37:37.00' ,
23423423.0324234 , NULL );
+ """
+
+ qt_select6 " select * from test_scan_keys_with_bool_type3 order by 1, 2,
3, 4, 5, 6, 7; "
+ qt_select7 " select * from test_scan_keys_with_bool_type3 where col1 <=
-100 and col2 in ('a', 'b') and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
+ qt_select8 " select * from test_scan_keys_with_bool_type3 where col1 <=
-100 and col3 = -82 order by 1, 2, 3, 4, 5, 6, 7; "
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]