(doris) branch master updated: [chore](explain) Add algorithm item to VSORT explainition and modify dump_data of Block (#38543)

panxiaolei Wed, 31 Jul 2024 19:08:11 -0700

This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 8ee24cb379d [chore](explain) Add algorithm item to VSORT explainition 
and modify dump_data of Block (#38543)
8ee24cb379d is described below

commit 8ee24cb379d36ecf08fbfb928ffc53873beb908e
Author: zclllhhjj <[email protected]>
AuthorDate: Thu Aug 1 10:07:59 2024 +0800

    [chore](explain) Add algorithm item to VSORT explainition and modify 
dump_data of Block (#38543)
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    1.  add algorithm record in `VSORT`'s explain string:
    
    before:
    ```sql
    |   1:VSORT(101)                 |
    |   |  order by: xxx             |
    |   |  offset: 0                 |
    |   |  distribute expr lists:    |
    |   |  tuple ids: 2              |
    ```
    after:
    ```sql
    |   1:VSORT(101)                 |
    |   |  order by: xxx             |
    |   |  algorithm: full sort      |
    |   |  offset: 0                 |
    |   |  distribute expr lists:    |
    |   |  tuple ids: 2              |
    ```
    2. add a new parameter to make `Block::dump_data()` could work when the
    nullity of the type of data is different from that of column. it's
    useful when in some function call under
    `default_implementation_for_nulls()`
---
 be/src/vec/core/block.cpp                                   | 12 ++++++++++--
 be/src/vec/core/block.h                                     | 12 +++++++++---
 be/src/vec/functions/function.cpp                           |  3 ++-
 .../src/main/java/org/apache/doris/planner/SortNode.java    | 13 +++++++++++++
 4 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index 35b8c0eb69a..fabd468ca4c 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -53,6 +53,7 @@
 #include "vec/columns/columns_number.h"
 #include "vec/common/assert_cast.h"
 #include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_nullable.h"
 
 class SipHash;
 
@@ -476,7 +477,7 @@ std::string Block::dump_types() const {
     return out;
 }
 
-std::string Block::dump_data(size_t begin, size_t row_limit) const {
+std::string Block::dump_data(size_t begin, size_t row_limit, bool 
allow_null_mismatch) const {
     std::vector<std::string> headers;
     std::vector<size_t> headers_size;
     for (const auto& it : data) {
@@ -515,7 +516,14 @@ std::string Block::dump_data(size_t begin, size_t 
row_limit) const {
             }
             std::string s;
             if (data[i].column) {
-                s = data[i].to_string(row_num);
+                if (data[i].type->is_nullable() && 
!data[i].column->is_nullable()) {
+                    assert(allow_null_mismatch);
+                    s = assert_cast<const 
DataTypeNullable*>(data[i].type.get())
+                                ->get_nested_type()
+                                ->to_string(*data[i].column, row_num);
+                } else {
+                    s = data[i].to_string(row_num);
+                }
             }
             if (s.length() > headers_size[i]) {
                 s = s.substr(0, headers_size[i] - 3) + "...";
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index 6f50ff0035a..741039e1465 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -255,14 +255,20 @@ public:
 
     bool empty() const { return rows() == 0; }
 
-    /** Updates SipHash of the Block, using update method of columns.
+    /** 
+      * Updates SipHash of the Block, using update method of columns.
       * Returns hash for block, that could be used to differentiate blocks
       *  with same structure, but different data.
       */
     void update_hash(SipHash& hash) const;
 
-    /** Get block data in string. */
-    std::string dump_data(size_t begin = 0, size_t row_limit = 100) const;
+    /** 
+     *  Get block data in string. 
+     *  If code is in default_implementation_for_nulls or something likely, 
type and column's nullity could
+     *   temporarily be not same. set allow_null_mismatch to true to dump it 
correctly.
+    */
+    std::string dump_data(size_t begin = 0, size_t row_limit = 100,
+                          bool allow_null_mismatch = false) const;
 
     static std::string dump_column(ColumnPtr col, DataTypePtr type) {
         ColumnWithTypeAndName type_name {col, type, ""};
diff --git a/be/src/vec/functions/function.cpp 
b/be/src/vec/functions/function.cpp
index cfc6a39f397..1fea4c70fc1 100644
--- a/be/src/vec/functions/function.cpp
+++ b/be/src/vec/functions/function.cpp
@@ -216,7 +216,8 @@ Status 
PreparedFunctionImpl::default_implementation_for_nulls(
         }
         RETURN_IF_ERROR(execute_without_low_cardinality_columns(context, 
block, new_args, result,
                                                                 block.rows(), 
dry_run));
-        // after run with nested, wrap them in null.
+        // After run with nested, wrap them in null. Before this, 
block.get_by_position(result).type
+        // is not compatible with get_by_position(result).column
         block.get_by_position(result).column = wrap_in_nullable(
                 block.get_by_position(result).column, block, args, result, 
input_rows_count);
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
index 4cdc04d1f1b..5a8f9f628f8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
@@ -198,6 +198,19 @@ public class SortNode extends PlanNode {
         if (useTwoPhaseReadOpt) {
             output.append(detailPrefix + "OPT TWO PHASE\n");
         }
+
+        output.append(detailPrefix + "algorithm: ");
+        boolean isFixedLength = info.getOrderingExprs().stream().allMatch(e -> 
!e.getType().isStringType()
+                && !e.getType().isCollectionType());
+        if (limit > 0 && limit + offset < 1024 && (useTwoPhaseReadOpt || 
hasRuntimePredicate
+                || isFixedLength)) {
+            output.append("heap sort\n");
+        } else if (limit > 0 && !isFixedLength && limit + offset < 256) {
+            output.append("topn sort\n");
+        } else {
+            output.append("full sort\n");
+        }
+
         output.append(detailPrefix).append("offset: 
").append(offset).append("\n");
         return output.toString();
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch master updated: [chore](explain) Add algorithm item to VSORT explainition and modify dump_data of Block (#38543)

Reply via email to