This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8ee24cb379d [chore](explain) Add algorithm item to VSORT explainition
and modify dump_data of Block (#38543)
8ee24cb379d is described below
commit 8ee24cb379d36ecf08fbfb928ffc53873beb908e
Author: zclllhhjj <[email protected]>
AuthorDate: Thu Aug 1 10:07:59 2024 +0800
[chore](explain) Add algorithm item to VSORT explainition and modify
dump_data of Block (#38543)
## Proposed changes
Issue Number: close #xxx
1. add algorithm record in `VSORT`'s explain string:
before:
```sql
| 1:VSORT(101) |
| | order by: xxx |
| | offset: 0 |
| | distribute expr lists: |
| | tuple ids: 2 |
```
after:
```sql
| 1:VSORT(101) |
| | order by: xxx |
| | algorithm: full sort |
| | offset: 0 |
| | distribute expr lists: |
| | tuple ids: 2 |
```
2. add a new parameter to make `Block::dump_data()` could work when the
nullity of the type of data is different from that of column. it's
useful when in some function call under
`default_implementation_for_nulls()`
---
be/src/vec/core/block.cpp | 12 ++++++++++--
be/src/vec/core/block.h | 12 +++++++++---
be/src/vec/functions/function.cpp | 3 ++-
.../src/main/java/org/apache/doris/planner/SortNode.java | 13 +++++++++++++
4 files changed, 34 insertions(+), 6 deletions(-)
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index 35b8c0eb69a..fabd468ca4c 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -53,6 +53,7 @@
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_nullable.h"
class SipHash;
@@ -476,7 +477,7 @@ std::string Block::dump_types() const {
return out;
}
-std::string Block::dump_data(size_t begin, size_t row_limit) const {
+std::string Block::dump_data(size_t begin, size_t row_limit, bool
allow_null_mismatch) const {
std::vector<std::string> headers;
std::vector<size_t> headers_size;
for (const auto& it : data) {
@@ -515,7 +516,14 @@ std::string Block::dump_data(size_t begin, size_t
row_limit) const {
}
std::string s;
if (data[i].column) {
- s = data[i].to_string(row_num);
+ if (data[i].type->is_nullable() &&
!data[i].column->is_nullable()) {
+ assert(allow_null_mismatch);
+ s = assert_cast<const
DataTypeNullable*>(data[i].type.get())
+ ->get_nested_type()
+ ->to_string(*data[i].column, row_num);
+ } else {
+ s = data[i].to_string(row_num);
+ }
}
if (s.length() > headers_size[i]) {
s = s.substr(0, headers_size[i] - 3) + "...";
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index 6f50ff0035a..741039e1465 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -255,14 +255,20 @@ public:
bool empty() const { return rows() == 0; }
- /** Updates SipHash of the Block, using update method of columns.
+ /**
+ * Updates SipHash of the Block, using update method of columns.
* Returns hash for block, that could be used to differentiate blocks
* with same structure, but different data.
*/
void update_hash(SipHash& hash) const;
- /** Get block data in string. */
- std::string dump_data(size_t begin = 0, size_t row_limit = 100) const;
+ /**
+ * Get block data in string.
+ * If code is in default_implementation_for_nulls or something likely,
type and column's nullity could
+ * temporarily be not same. set allow_null_mismatch to true to dump it
correctly.
+ */
+ std::string dump_data(size_t begin = 0, size_t row_limit = 100,
+ bool allow_null_mismatch = false) const;
static std::string dump_column(ColumnPtr col, DataTypePtr type) {
ColumnWithTypeAndName type_name {col, type, ""};
diff --git a/be/src/vec/functions/function.cpp
b/be/src/vec/functions/function.cpp
index cfc6a39f397..1fea4c70fc1 100644
--- a/be/src/vec/functions/function.cpp
+++ b/be/src/vec/functions/function.cpp
@@ -216,7 +216,8 @@ Status
PreparedFunctionImpl::default_implementation_for_nulls(
}
RETURN_IF_ERROR(execute_without_low_cardinality_columns(context,
block, new_args, result,
block.rows(),
dry_run));
- // after run with nested, wrap them in null.
+ // After run with nested, wrap them in null. Before this,
block.get_by_position(result).type
+ // is not compatible with get_by_position(result).column
block.get_by_position(result).column = wrap_in_nullable(
block.get_by_position(result).column, block, args, result,
input_rows_count);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
index 4cdc04d1f1b..5a8f9f628f8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java
@@ -198,6 +198,19 @@ public class SortNode extends PlanNode {
if (useTwoPhaseReadOpt) {
output.append(detailPrefix + "OPT TWO PHASE\n");
}
+
+ output.append(detailPrefix + "algorithm: ");
+ boolean isFixedLength = info.getOrderingExprs().stream().allMatch(e ->
!e.getType().isStringType()
+ && !e.getType().isCollectionType());
+ if (limit > 0 && limit + offset < 1024 && (useTwoPhaseReadOpt ||
hasRuntimePredicate
+ || isFixedLength)) {
+ output.append("heap sort\n");
+ } else if (limit > 0 && !isFixedLength && limit + offset < 256) {
+ output.append("topn sort\n");
+ } else {
+ output.append("full sort\n");
+ }
+
output.append(detailPrefix).append("offset:
").append(offset).append("\n");
return output.toString();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]