This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new c9a299e9147 [fix](columns) fix bug found by UT and add regression test 
(#48554) (#48690)
c9a299e9147 is described below

commit c9a299e9147b9c9753d28a1c945357231b86ad98
Author: TengJianPing <[email protected]>
AuthorDate: Thu Mar 6 09:33:33 2025 +0800

    [fix](columns) fix bug found by UT and add regression test (#48554) (#48690)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR:  Pick #48554
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [x] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [x] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [x] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/vec/columns/column_string.cpp               |  33 ++++++++-------
 be/src/vec/columns/column_string.h                 |   1 +
 be/src/vec/columns/columns_number.h                |   1 -
 be/src/vec/core/field.cpp                          |   6 +--
 .../data_types/serde/data_type_decimal_serde.cpp   |   2 +-
 regression-test/data/query_p0/sort/heap_sort.csv   |   7 ++++
 regression-test/data/query_p0/sort/heap_sort.out   | Bin 0 -> 193 bytes
 .../suites/query_p0/sort/heap_sort.groovy          |  46 +++++++++++++++++++++
 8 files changed, 76 insertions(+), 20 deletions(-)

diff --git a/be/src/vec/columns/column_string.cpp 
b/be/src/vec/columns/column_string.cpp
index 8cdc44da02a..9dbc53e1246 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -124,9 +124,9 @@ void ColumnStr<T>::insert_range_from_ignore_overflow(const 
doris::vectorized::IC
 
     const auto& src_concrete = assert_cast<const ColumnStr<T>&>(src);
     if (start + length > src_concrete.offsets.size()) {
-        throw doris::Exception(
-                doris::ErrorCode::INTERNAL_ERROR,
-                "Parameter out of bound in IColumnStr<T>::insert_range_from 
method.");
+        throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+                               "Parameter out of bound in "
+                               
"IColumnStr<T>::insert_range_from_ignore_overflow method.");
     }
 
     size_t nested_offset = src_concrete.offset_at(start);
@@ -265,11 +265,11 @@ void ColumnStr<T>::update_crcs_with_value(uint32_t* 
__restrict hashes, doris::Pr
 
 template <typename T>
 ColumnPtr ColumnStr<T>::filter(const IColumn::Filter& filt, ssize_t 
result_size_hint) const {
-    if (offsets.size() == 0) {
-        return ColumnStr<T>::create();
-    }
-
     if constexpr (std::is_same_v<UInt32, T>) {
+        if (offsets.size() == 0) {
+            return ColumnStr<T>::create();
+        }
+
         auto res = ColumnStr<T>::create();
         Chars& res_chars = res->chars;
         IColumn::Offsets& res_offsets = res->offsets;
@@ -285,13 +285,13 @@ ColumnPtr ColumnStr<T>::filter(const IColumn::Filter& 
filt, ssize_t result_size_
 
 template <typename T>
 size_t ColumnStr<T>::filter(const IColumn::Filter& filter) {
-    CHECK_EQ(filter.size(), offsets.size());
-    if (offsets.size() == 0) {
-        resize(0);
-        return 0;
-    }
-
     if constexpr (std::is_same_v<UInt32, T>) {
+        CHECK_EQ(filter.size(), offsets.size());
+        if (offsets.size() == 0) {
+            resize(0);
+            return 0;
+        }
+
         auto res = filter_arrays_impl<UInt8, IColumn::Offset>(chars, offsets, 
filter);
         sanity_check_simple();
         return res;
@@ -631,8 +631,11 @@ void ColumnStr<T>::compare_internal(size_t rhs_row_id, 
const IColumn& rhs, int n
         size_t end = simd::find_one(cmp_res, begin + 1);
         for (size_t row_id = begin; row_id < end; row_id++) {
             auto value_a = get_data_at(row_id);
-            int res = memcmp_small_allow_overflow15(value_a.data, 
value_a.size, cmp_base.data,
-                                                    cmp_base.size);
+            // need to covert to unsigned char, orelse the compare semantic is 
not consistent
+            // with other member functions, e.g. get_permutation and 
compare_at,
+            // and will result wrong result.
+            int res = memcmp_small_allow_overflow15((Char*)value_a.data, 
value_a.size,
+                                                    (Char*)cmp_base.data, 
cmp_base.size);
             if (res * direction < 0) {
                 filter[row_id] = 1;
                 cmp_res[row_id] = 1;
diff --git a/be/src/vec/columns/column_string.h 
b/be/src/vec/columns/column_string.h
index 9106e67b2b5..81a495eabd1 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -222,6 +222,7 @@ public:
     void insert_many_strings_without_reserve(const StringRef* strings, size_t 
num) {
         Char* data = chars.data();
         size_t offset = chars.size();
+        data += offset;
         size_t length = 0;
 
         const char* ptr = strings[0].data;
diff --git a/be/src/vec/columns/columns_number.h 
b/be/src/vec/columns/columns_number.h
index 33dbe4c8c63..9c0005e9d5d 100644
--- a/be/src/vec/columns/columns_number.h
+++ b/be/src/vec/columns/columns_number.h
@@ -32,7 +32,6 @@ using ColumnUInt8 = ColumnVector<UInt8>;
 using ColumnUInt16 = ColumnVector<UInt16>;
 using ColumnUInt32 = ColumnVector<UInt32>;
 using ColumnUInt64 = ColumnVector<UInt64>;
-using ColumnUInt128 = ColumnVector<UInt128>;
 
 using ColumnInt8 = ColumnVector<Int8>;
 using ColumnInt16 = ColumnVector<Int16>;
diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp
index e652fc2dc9e..ac92605007d 100644
--- a/be/src/vec/core/field.cpp
+++ b/be/src/vec/core/field.cpp
@@ -174,14 +174,14 @@ DECLARE_DECIMAL_COMPARISON(Decimal256)
 
 template <>
 bool decimal_equal(Decimal128V3 x, Decimal128V3 y, UInt32 xs, UInt32 ys) {
-    return dec_equal(Decimal128V2(x.value), Decimal128V2(y.value), xs, ys);
+    return dec_equal(x, y, xs, ys);
 }
 template <>
 bool decimal_less(Decimal128V3 x, Decimal128V3 y, UInt32 xs, UInt32 ys) {
-    return dec_less(Decimal128V2(x.value), Decimal128V2(y.value), xs, ys);
+    return dec_less(x, y, xs, ys);
 }
 template <>
 bool decimal_less_or_equal(Decimal128V3 x, Decimal128V3 y, UInt32 xs, UInt32 
ys) {
-    return dec_less_or_equal(Decimal128V2(x.value), Decimal128V2(y.value), xs, 
ys);
+    return dec_less_or_equal(x, y, xs, ys);
 }
 } // namespace doris::vectorized
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp 
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index acb09ee773e..d98f6cae2b0 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -79,7 +79,7 @@ Status 
DataTypeDecimalSerDe<T>::deserialize_one_cell_from_json(IColumn& column,
         return Status::OK();
     }
     return Status::InvalidArgument("parse decimal fail, string: '{}', 
primitive type: '{}'",
-                                   std::string(rb.position(), 
rb.count()).c_str(),
+                                   std::string(slice.data, slice.size).c_str(),
                                    get_primitive_type());
 }
 
diff --git a/regression-test/data/query_p0/sort/heap_sort.csv 
b/regression-test/data/query_p0/sort/heap_sort.csv
new file mode 100644
index 00000000000..8b7670cf735
--- /dev/null
+++ b/regression-test/data/query_p0/sort/heap_sort.csv
@@ -0,0 +1,7 @@
+1;中文测试a
+9;a
+3;中文测试b
+2;b
+5;c
+6;d
+1;e
diff --git a/regression-test/data/query_p0/sort/heap_sort.out 
b/regression-test/data/query_p0/sort/heap_sort.out
new file mode 100644
index 00000000000..6d8b24cc800
Binary files /dev/null and b/regression-test/data/query_p0/sort/heap_sort.out 
differ
diff --git a/regression-test/suites/query_p0/sort/heap_sort.groovy 
b/regression-test/suites/query_p0/sort/heap_sort.groovy
new file mode 100644
index 00000000000..f470d4a4f8d
--- /dev/null
+++ b/regression-test/suites/query_p0/sort/heap_sort.groovy
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// The cases is copied from https://github.com/trinodb/trino/tree/master
+// 
/testing/trino-product-tests/src/main/resources/sql-tests/testcases/aggregate
+// and modified by Doris.
+
+suite("heap_sort") {
+    sql """
+        drop table if exists test_heap_sort;
+    """
+    sql """
+        create table if not exists test_heap_sort (
+            f1 int,
+            f2 varchar(65533)
+        ) properties("replication_num"="1", "disable_auto_compaction"="true");
+    """
+    streamLoad {
+        table "test_heap_sort"
+
+        set 'column_separator', ';'
+
+        file 'heap_sort.csv'
+
+        time 10000 // limit inflight 10s
+    }
+    sql """ set parallel_pipeline_task_num = 1; """
+    sql """ set batch_size = 3; """
+    // sql """ set force_sort_algorithm = "heap"; """
+    qt_select_1 "select * from test_heap_sort order by f1, f2;"
+    qt_select_2 "select * from test_heap_sort order by f2 limit 3;"
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to