Gabriel39 commented on code in PR #15642:
URL: https://github.com/apache/doris/pull/15642#discussion_r1071057702
##########
be/src/vec/exprs/vslot_ref.cpp:
##########
@@ -50,6 +50,12 @@ Status VSlotRef::prepare(doris::RuntimeState* state, const
doris::RowDescriptor&
if (slot_desc == nullptr) {
return Status::InternalError("couldn't resolve slot descriptor {}",
_slot_id);
}
+ if (slot_desc->invalid()) {
+ // invalid slot should be ignored manually
+ _column_id = -1;
+ _column_name = &slot_desc->col_name();
Review Comment:
move this line to line 53 and remove line 56 and 64
##########
be/src/vec/exec/vexchange_node.cpp:
##########
@@ -45,10 +48,14 @@ Status VExchangeNode::init(const TPlanNode& tnode,
RuntimeState* state) {
if (!_is_merging) {
return Status::OK();
}
-
RETURN_IF_ERROR(_vsort_exec_exprs.init(tnode.exchange_node.sort_info,
_pool));
_is_asc_order = tnode.exchange_node.sort_info.is_asc_order;
_nulls_first = tnode.exchange_node.sort_info.nulls_first;
+
+ if (tnode.exchange_node.__isset.nodes_info) {
+ _nodes_info = _pool->add(new
DorisNodesInfo(tnode.exchange_node.nodes_info));
+ }
+ _use_two_phase_read = tnode.exchange_node.sort_info.use_two_phase_read;
Review Comment:
```suggestion
_use_two_phase_read =
tnode.exchange_node.sort_info.__isset.use_two_phase_read &&
tnode.exchange_node.sort_info.use_two_phase_read;
```
##########
be/src/vec/exec/vexchange_node.cpp:
##########
@@ -122,6 +157,7 @@ Status VExchangeNode::get_next(RuntimeState* state, Block*
block, bool* eos) {
}
COUNTER_SET(_rows_returned_counter, _num_rows_returned);
}
+ RETURN_IF_ERROR(second_phase_fetch_data(state, block));
Review Comment:
I think `status` should be check before calling `second_phase_fetch_data`.
##########
be/src/vec/exec/vexchange_node.cpp:
##########
@@ -89,6 +96,28 @@ Status VExchangeNode::open(RuntimeState* state) {
return Status::OK();
}
+Status VExchangeNode::second_phase_fetch_data(RuntimeState* state, Block*
final_block) {
+ if (!_use_two_phase_read) {
+ return Status::OK();
+ }
+ if (final_block->rows() == 0) {
+ return Status::OK();
+ }
+ auto row_id_col = final_block->get_by_position(final_block->columns() - 1);
+ MonotonicStopWatch watch;
+ watch.start();
+ auto tuple_desc = _row_descriptor.tuple_descriptors()[0];
+ RowIDFetcher id_fetcher(tuple_desc, state);
+ RETURN_IF_ERROR(id_fetcher.init(_nodes_info));
+ MutableBlock materialized_block(_row_descriptor.tuple_descriptors(),
final_block->rows());
+ // fetch will sort block by sequence of ROWID_COL
+ RETURN_IF_ERROR(id_fetcher.fetch(row_id_col.column, &materialized_block));
+ // Notice swap may change the structure of final_block
+ final_block->swap(materialized_block.to_block());
+ LOG(INFO) << "fetch_id finished, cost(ms):" << watch.elapsed_time() / 1000
/ 1000;
Review Comment:
Use a profiler instead
##########
gensrc/thrift/Descriptors.thrift:
##########
@@ -51,6 +51,8 @@ struct TSlotDescriptor {
9: required i32 slotIdx
10: required bool isMaterialized
11: optional i32 col_unique_id = -1
+ 12: optional bool is_key = false
+ 13: optional bool is_invalid = false
Review Comment:
add some comments for `is_invalid`. To be honest, I think this is a bit
confusing. How about to rename it to `should_ignore` or `need_materialize`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]