Copilot commented on code in PR #58679:
URL: https://github.com/apache/doris/pull/58679#discussion_r2584146205
##########
be/src/vec/exec/jni_connector.cpp:
##########
@@ -313,10 +313,9 @@ Status JniConnector::_fill_block(Block* block, size_t
num_rows) {
SCOPED_RAW_TIMER(&_fill_block_watcher);
JNIEnv* env = nullptr;
RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env));
- // todo: maybe do not need to build name to index map every time
- auto name_to_pos_map = block->get_name_to_pos_map();
for (int i = 0; i < _column_names.size(); ++i) {
- auto& column_with_type_and_name =
block->get_by_position(name_to_pos_map[_column_names[i]]);
+ auto& column_with_type_and_name =
+
block->get_by_position(_col_name_to_block_idx->at(_column_names[i]));
Review Comment:
Potential null pointer dereference. `_col_name_to_block_idx` is used without
checking if it's nullptr. If `set_col_name_to_block_idx()` is not called before
`_fill_block()`, this will crash. Add a null check or ensure initialization in
the constructor.
##########
be/src/vec/exec/scan/scanner_context.cpp:
##########
@@ -487,43 +512,45 @@ int32_t
ScannerContext::_get_margin(std::unique_lock<std::mutex>& transfer_lock,
Status ScannerContext::schedule_scan_task(std::shared_ptr<ScanTask>
current_scan_task,
std::unique_lock<std::mutex>&
transfer_lock,
std::unique_lock<std::shared_mutex>&
scheduler_lock) {
+ std::cout << "ScannerContext::schedule_scan_task()" << std::endl;
if (current_scan_task &&
(!current_scan_task->cached_blocks.empty() ||
current_scan_task->is_eos())) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Scanner scheduler
logical error.");
}
std::list<std::shared_ptr<ScanTask>> tasks_to_submit;
- int32_t margin = _get_margin(transfer_lock, scheduler_lock);
-
- // margin is less than zero. Means this scan operator could not submit any
scan task for now.
- if (margin <= 0) {
- // Be careful with current scan task.
- // We need to add it back to task queue to make sure it could be
resubmitted.
- if (current_scan_task) {
- // This usually happens when we should downgrade the concurrency.
- _pending_scanners.push(current_scan_task);
- VLOG_DEBUG << fmt::format(
- "{} push back scanner to task queue, because diff <= 0,
task_queue size "
- "{}, _num_scheduled_scanners {}",
- ctx_id, _tasks_queue.size(), _num_scheduled_scanners);
- }
-
-#ifndef NDEBUG
- // This DCHECK is necessary.
- // We need to make sure each scan operator could have at least 1 scan
tasks.
- // Or this scan operator will not be re-scheduled.
- if (!_pending_scanners.empty() && _num_scheduled_scanners == 0 &&
_tasks_queue.empty()) {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Scanner
scheduler logical error.");
- }
-#endif
-
- return Status::OK();
- }
+ // int32_t margin = _get_margin(transfer_lock, scheduler_lock);
+
+ // // margin is less than zero. Means this scan operator could not
submit any scan task for now.
+ // if (margin <= 0) {
+ // // Be careful with current scan task.
+ // // We need to add it back to task queue to make sure it could
be resubmitted.
+ // if (current_scan_task) {
+ // // This usually happens when we should downgrade the
concurrency.
+ // _pending_scanners.push(current_scan_task);
+ // VLOG_DEBUG << fmt::format(
+ // "{} push back scanner to task queue, because diff
<= 0, task_queue size "
+ // "{}, _num_scheduled_scanners {}",
+ // ctx_id, _tasks_queue.size(),
_num_scheduled_scanners);
+ // }
+
+ // #ifndef NDEBUG
+ // // This DCHECK is necessary.
+ // // We need to make sure each scan operator could have at least
1 scan tasks.
+ // // Or this scan operator will not be re-scheduled.
+ // if (!_pending_scanners.empty() && _num_scheduled_scanners == 0
&& _tasks_queue.empty()) {
+ // throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Scanner
scheduler logical error.");
+ // }
+ // #endif
+
+ // return Status::OK();
+ // }
Review Comment:
Large blocks of commented-out code (lines 523-548) should be removed. This
includes critical logic for checking margins and concurrency limits that
appears to have been disabled. If this is intentional, it needs proper
documentation explaining why this critical logic was disabled.
```suggestion
// [Removed: previously disabled margin and concurrency check logic. If
needed, see git history for details.]
```
##########
be/src/vec/exec/format/orc/vorc_reader.cpp:
##########
@@ -1334,10 +1336,9 @@ Status OrcReader::_fill_partition_columns(
const std::unordered_map<std::string, std::tuple<std::string, const
SlotDescriptor*>>&
partition_columns) {
DataTypeSerDe::FormatOptions _text_formatOptions;
- // todo: maybe do not need to build name to index map every time
- auto name_to_pos_map = block->get_name_to_pos_map();
for (const auto& kv : partition_columns) {
- auto col_ptr =
block->get_by_position(name_to_pos_map[kv.first]).column->assume_mutable();
+ auto col_ptr =
block->get_by_position((*_col_name_to_block_idx)[kv.first])
+ .column->assume_mutable();
Review Comment:
Potential null pointer dereference. `_col_name_to_block_idx` is dereferenced
without null checking. If not properly initialized, this will crash. Add a null
check or ensure it's always set before use.
##########
be/src/vec/exec/scan/scanner_context.cpp:
##########
@@ -487,43 +512,45 @@ int32_t
ScannerContext::_get_margin(std::unique_lock<std::mutex>& transfer_lock,
Status ScannerContext::schedule_scan_task(std::shared_ptr<ScanTask>
current_scan_task,
std::unique_lock<std::mutex>&
transfer_lock,
std::unique_lock<std::shared_mutex>&
scheduler_lock) {
+ std::cout << "ScannerContext::schedule_scan_task()" << std::endl;
if (current_scan_task &&
(!current_scan_task->cached_blocks.empty() ||
current_scan_task->is_eos())) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Scanner scheduler
logical error.");
}
std::list<std::shared_ptr<ScanTask>> tasks_to_submit;
- int32_t margin = _get_margin(transfer_lock, scheduler_lock);
-
- // margin is less than zero. Means this scan operator could not submit any
scan task for now.
- if (margin <= 0) {
- // Be careful with current scan task.
- // We need to add it back to task queue to make sure it could be
resubmitted.
- if (current_scan_task) {
- // This usually happens when we should downgrade the concurrency.
- _pending_scanners.push(current_scan_task);
- VLOG_DEBUG << fmt::format(
- "{} push back scanner to task queue, because diff <= 0,
task_queue size "
- "{}, _num_scheduled_scanners {}",
- ctx_id, _tasks_queue.size(), _num_scheduled_scanners);
- }
-
-#ifndef NDEBUG
- // This DCHECK is necessary.
- // We need to make sure each scan operator could have at least 1 scan
tasks.
- // Or this scan operator will not be re-scheduled.
- if (!_pending_scanners.empty() && _num_scheduled_scanners == 0 &&
_tasks_queue.empty()) {
- throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Scanner
scheduler logical error.");
- }
-#endif
-
- return Status::OK();
- }
+ // int32_t margin = _get_margin(transfer_lock, scheduler_lock);
+
+ // // margin is less than zero. Means this scan operator could not
submit any scan task for now.
+ // if (margin <= 0) {
+ // // Be careful with current scan task.
+ // // We need to add it back to task queue to make sure it could
be resubmitted.
+ // if (current_scan_task) {
+ // // This usually happens when we should downgrade the
concurrency.
+ // _pending_scanners.push(current_scan_task);
+ // VLOG_DEBUG << fmt::format(
+ // "{} push back scanner to task queue, because diff
<= 0, task_queue size "
+ // "{}, _num_scheduled_scanners {}",
+ // ctx_id, _tasks_queue.size(),
_num_scheduled_scanners);
+ // }
+
+ // #ifndef NDEBUG
+ // // This DCHECK is necessary.
+ // // We need to make sure each scan operator could have at least
1 scan tasks.
+ // // Or this scan operator will not be re-scheduled.
+ // if (!_pending_scanners.empty() && _num_scheduled_scanners == 0
&& _tasks_queue.empty()) {
+ // throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Scanner
scheduler logical error.");
+ // }
+ // #endif
+
+ // return Status::OK();
+ // }
bool first_pull = true;
- while (margin-- > 0) {
+ // while (margin-- > 0) {
+ while (true) {
Review Comment:
Changing `while (margin-- > 0)` to `while (true)` creates an infinite loop.
This is a critical bug that will cause the scanner to run indefinitely without
respecting concurrency limits. The margin check was there to limit how many
scan tasks could be submitted.
```suggestion
int32_t margin = _max_scan_concurrency - cast_set<int32_t>(
_tasks_queue.size() + _num_scheduled_scanners +
tasks_to_submit.size());
while (margin-- > 0) {
```
##########
be/src/vec/exec/format/parquet/vparquet_group_reader.cpp:
##########
@@ -391,32 +391,32 @@ Status RowGroupReader::_read_column_data(Block* block,
FilterMap& filter_map) {
size_t batch_read_rows = 0;
bool has_eof = false;
- // todo: maybe do not need to build name to index map every time
- auto name_to_idx = block->get_name_to_pos_map();
for (auto& read_col_name : table_columns) {
- auto& column_with_type_and_name =
block->safe_get_by_position(name_to_idx[read_col_name]);
+ auto& column_with_type_and_name =
+
block->safe_get_by_position((*_col_name_to_block_idx)[read_col_name]);
Review Comment:
Potential null pointer dereference. `_col_name_to_block_idx` is dereferenced
without null checking. If not properly initialized, this will crash. Add a null
check or ensure it's always set before use.
##########
be/src/vec/exec/scan/scanner_context.cpp:
##########
@@ -583,13 +610,13 @@ Status
ScannerContext::schedule_scan_task(std::shared_ptr<ScanTask> current_scan
std::shared_ptr<ScanTask> ScannerContext::_pull_next_scan_task(
std::shared_ptr<ScanTask> current_scan_task, int32_t
current_concurrency) {
- if (current_concurrency >= _max_scan_concurrency) {
- VLOG_DEBUG << fmt::format(
- "ScannerContext {} current concurrency {} >=
_max_scan_concurrency {}, skip "
- "pull",
- ctx_id, current_concurrency, _max_scan_concurrency);
- return nullptr;
- }
+ // if (current_concurrency >= _max_scan_concurrency) {
+ // VLOG_DEBUG << fmt::format(
+ // "ScannerContext {} current concurrency {} >=
_max_scan_concurrency {}, skip "
+ // "pull",
+ // ctx_id, current_concurrency, _max_scan_concurrency);
+ // return nullptr;
+ // }
Review Comment:
The concurrency limit check has been commented out. This appears to be
debugging/experimental code that should not be merged. The commented code
controlled important resource management.
```suggestion
if (current_concurrency >= _max_scan_concurrency) {
VLOG_DEBUG << fmt::format(
"ScannerContext {} current concurrency {} >=
_max_scan_concurrency {}, skip "
"pull",
ctx_id, current_concurrency, _max_scan_concurrency);
return nullptr;
}
```
##########
be/src/vec/exec/format/table/iceberg_reader.h:
##########
@@ -211,8 +217,10 @@ class IcebergOrcReader final : public IcebergTableReader {
}
Status init_reader(
- const std::vector<std::string>& file_col_names, const
VExprContextSPtrs& conjuncts,
- const TupleDescriptor* tuple_descriptor, const RowDescriptor*
row_descriptor,
+ const std::vector<std::string>& file_col_names,
+ std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
+ const VExprContextSPtrs& conjuncts, const TupleDescriptor*
tuple_descriptor,
+ const RowDescriptor* row_descriptor,
const std::unordered_map<std::string, int>* colname_to_slot_id,
const VExprContextSPtrs* not_single_slot_filter_conjuncts,
const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts);
Review Comment:
The signature of `init_reader` has been changed to include
`col_name_to_block_idx` parameter, but the implementation needs to store this
pointer in the `_col_name_to_block_idx` member variable. Add
`_col_name_to_block_idx = col_name_to_block_idx;` at the start of the
init_reader implementation to avoid null pointer dereferences.
##########
be/src/vec/exec/format/table/iceberg_reader.cpp:
##########
@@ -236,23 +236,23 @@ Status IcebergTableReader::_expand_block_if_need(Block*
block) {
return Status::InternalError("Wrong expand column '{}'", col.name);
}
names.insert(col.name);
+ (*_col_name_to_block_idx)[col.name] =
static_cast<uint32_t>(block->columns());
block->insert(col);
}
return Status::OK();
}
Status IcebergTableReader::_shrink_block_if_need(Block* block) {
- // todo: maybe do not need to build name to index map every time
- auto name_to_pos_map = block->get_name_to_pos_map();
std::set<size_t> positions_to_erase;
for (const std::string& expand_col : _expand_col_names) {
- if (!name_to_pos_map.contains(expand_col)) {
+ if (!_col_name_to_block_idx->contains(expand_col)) {
return Status::InternalError("Wrong erase column '{}', block: {}",
expand_col,
block->dump_names());
}
- positions_to_erase.emplace(name_to_pos_map[expand_col]);
+ positions_to_erase.emplace((*_col_name_to_block_idx)[expand_col]);
}
block->erase(positions_to_erase);
+ _col_name_to_block_idx->erase(_expand_col_names.begin(),
_expand_col_names.end());
Review Comment:
The `erase` method signature is incorrect. `std::unordered_map::erase()`
doesn't accept iterator pairs like this. It should be a loop erasing individual
keys. Example: `for (const auto& name : _expand_col_names) {
_col_name_to_block_idx->erase(name); }`
```suggestion
for (const auto& name : _expand_col_names) {
_col_name_to_block_idx->erase(name);
}
```
##########
be/src/vec/exec/scan/file_scanner.cpp:
##########
@@ -537,8 +537,10 @@ Status FileScanner::_init_src_block(Block* block) {
if (!_is_load) {
_src_block_ptr = block;
- // todo: maybe do not need to build name to index map every time
- _src_block_name_to_idx = block->get_name_to_pos_map();
+ // Build name to index map only once on first call
+ if (_src_block_name_to_idx.empty()) {
+ _src_block_name_to_idx = block->get_name_to_pos_map();
+ }
Review Comment:
The empty check for `_src_block_name_to_idx` is insufficient. The map can be
non-empty from a previous file but may have different columns in a new file.
This could lead to incorrect column mappings when reading multiple files with
different schemas. Consider clearing and rebuilding the map when
`_src_block_ptr` changes or validating that the map matches the current block's
columns.
```suggestion
// Always rebuild name to index map to match the current block's
columns
_src_block_name_to_idx.clear();
_src_block_name_to_idx = block->get_name_to_pos_map();
```
##########
be/src/vec/exec/format/table/equality_delete.cpp:
##########
@@ -43,14 +43,11 @@ Status SimpleEqualityDelete::_build_set() {
return Status::OK();
}
-Status SimpleEqualityDelete::filter_data_block(Block* data_block) {
+Status SimpleEqualityDelete::filter_data_block(
+ Block* data_block, const std::unordered_map<std::string, uint32_t>*
col_name_to_block_idx) {
SCOPED_TIMER(equality_delete_time);
Review Comment:
Potential null pointer dereference. `col_name_to_block_idx` is used with
`->at()` without checking if it's nullptr. Add a null check before
dereferencing.
```suggestion
SCOPED_TIMER(equality_delete_time);
if (col_name_to_block_idx == nullptr) {
return Status::InternalError("col_name_to_block_idx is nullptr in
filter_data_block");
}
```
##########
be/src/vec/exec/format/table/iceberg_reader.h:
##########
@@ -165,8 +169,10 @@ class IcebergParquetReader final : public
IcebergTableReader {
: IcebergTableReader(std::move(file_format_reader), profile,
state, params, range,
kv_cache, io_ctx, meta_cache) {}
Status init_reader(
- const std::vector<std::string>& file_col_names, const
VExprContextSPtrs& conjuncts,
- const TupleDescriptor* tuple_descriptor, const RowDescriptor*
row_descriptor,
+ const std::vector<std::string>& file_col_names,
+ std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
+ const VExprContextSPtrs& conjuncts, const TupleDescriptor*
tuple_descriptor,
+ const RowDescriptor* row_descriptor,
const std::unordered_map<std::string, int>* colname_to_slot_id,
const VExprContextSPtrs* not_single_slot_filter_conjuncts,
const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts);
Review Comment:
The signature of `init_reader` has been changed to include
`col_name_to_block_idx` parameter, but the implementation in iceberg_reader.cpp
may not have been updated accordingly. Additionally, the implementation needs
to store this pointer in the `_col_name_to_block_idx` member variable to avoid
null pointer dereferences in `_expand_block_if_need` and
`_shrink_block_if_need`. Add `_col_name_to_block_idx = col_name_to_block_idx;`
at the start of the init_reader implementation.
##########
be/src/vec/exec/scan/scanner_context.cpp:
##########
@@ -300,28 +303,50 @@ Status ScannerContext::get_block_from_queue(RuntimeState*
state, vectorized::Blo
return_free_block(std::move(current_block));
}
- VLOG_DEBUG << fmt::format(
- "ScannerContext {} get block from queue, task_queue size {},
current scan "
- "task remaing cached_block size {}, eos {}, scheduled tasks
{}",
- ctx_id, _tasks_queue.size(), scan_task->cached_blocks.size(),
scan_task->is_eos(),
- _num_scheduled_scanners);
-
- if (scan_task->cached_blocks.empty()) {
- // All Cached blocks are consumed, pop this task from task_queue.
+ // VLOG_DEBUG << fmt::format(
+ // "ScannerContext {} get block from queue, task_queue size
{}, current scan "
+ // "task remaing cached_block size {}, eos {}, scheduled tasks
{}",
+ // ctx_id, _tasks_queue.size(),
scan_task->cached_blocks.size(), scan_task->is_eos(),
+ // _num_scheduled_scanners);
+ else {
if (!_tasks_queue.empty()) {
_tasks_queue.pop_front();
}
if (scan_task->is_eos()) {
// 1. if eos, record a finished scanner.
_num_finished_scanners++;
+ std::cout << "ScannerContext::get_block_from_queue() eos
_num_finished_scanners: "
+ << _num_finished_scanners << std::endl;
RETURN_IF_ERROR(
_scanner_scheduler->schedule_scan_task(shared_from_this(), nullptr, l));
} else {
+ std::cout << "ScannerContext::get_block_from_queue() not eos
resubmit scan task"
+ << std::endl;
RETURN_IF_ERROR(
_scanner_scheduler->schedule_scan_task(shared_from_this(), scan_task, l));
}
}
+ // if (scan_task->cached_blocks.empty()) {
+ // // All Cached blocks are consumed, pop this task from
task_queue.
+ // if (!_tasks_queue.empty()) {
+ // _tasks_queue.pop_front();
+ // }
+
+ // if (scan_task->is_eos()) {
+ // // 1. if eos, record a finished scanner.
+ // _num_finished_scanners++;
+ // std::cout << "ScannerContext::get_block_from_queue() eos
_num_finished_scanners: "
+ // << _num_finished_scanners << std::endl;
+ // RETURN_IF_ERROR(
+ //
_scanner_scheduler->schedule_scan_task(shared_from_this(), nullptr, l));
+ // } else {
+ // std::cout << "ScannerContext::get_block_from_queue() not
eos resubmit scan task"
+ // << std::endl;
+ // RETURN_IF_ERROR(
+ //
_scanner_scheduler->schedule_scan_task(shared_from_this(), scan_task, l));
+ // }
+ // }
Review Comment:
Large blocks of commented-out code (lines 330-349) should be removed rather
than left in the codebase. If this code needs to be preserved for reference, it
should be documented in commit history instead.
```suggestion
```
##########
be/src/olap/push_handler.cpp:
##########
@@ -646,9 +646,9 @@ Status PushBrokerReader::_get_next_reader() {
_io_ctx.get(),
_runtime_state.get());
init_status = parquet_reader->init_reader(
- _all_col_names, _push_down_exprs, _real_tuple_desc,
_default_val_row_desc.get(),
- _col_name_to_slot_id, &_not_single_slot_filter_conjuncts,
- &_slot_id_to_filter_conjuncts,
+ _all_col_names, nullptr, _push_down_exprs, _real_tuple_desc,
Review Comment:
Passing `nullptr` as the `col_name_to_block_idx` parameter will cause null
pointer dereferences in ParquetReader and its group readers when they try to
use `_col_name_to_block_idx`. Either create and pass a valid map, or update all
the reader implementations to handle nullptr gracefully.
##########
be/src/vec/exec/format/table/transactional_hive_reader.cpp:
##########
@@ -119,11 +122,14 @@ Status
TransactionalHiveReader::get_next_block_inner(Block* block, size_t* read_
DataTypePtr data_type = get_data_type_with_default_argument(
DataTypeFactory::instance().create_data_type(i.type, false));
MutableColumnPtr data_column = data_type->create_column();
+ (*_col_name_to_block_idx)[i.column_lower_case] =
static_cast<uint32_t>(block->columns());
block->insert(
ColumnWithTypeAndName(std::move(data_column), data_type,
i.column_lower_case));
}
auto res = _file_format_reader->get_next_block(block, read_rows, eof);
Block::erase_useless_column(block, block->columns() -
TransactionalHive::READ_PARAMS.size());
+ _col_name_to_block_idx->erase(READ_ROW_COLUMN_NAMES_LOWER_CASE.begin(),
+ READ_ROW_COLUMN_NAMES_LOWER_CASE.end());
Review Comment:
The `erase` method signature is incorrect. `std::unordered_map::erase()`
doesn't accept iterator pairs like this. It should be a loop erasing individual
keys or use a helper function. This code will fail to compile. Example: `for
(const auto& name : READ_ROW_COLUMN_NAMES_LOWER_CASE) {
_col_name_to_block_idx->erase(name); }`
```suggestion
for (const auto& name : READ_ROW_COLUMN_NAMES_LOWER_CASE) {
_col_name_to_block_idx->erase(name);
}
```
##########
be/test/vec/exec/format/table/iceberg/iceberg_reader_test.cpp:
##########
@@ -695,13 +699,16 @@ TEST_F(IcebergReaderTest, read_iceberg_orc_file) {
VExprContextSPtrs conjuncts; // Empty conjuncts for this test
std::vector<std::string> table_col_names = {"name", "profile"};
const RowDescriptor* row_descriptor = nullptr;
- const std::unordered_map<std::string, int>* colname_to_slot_id = nullptr;
+ std::unordered_map<std::string, uint32_t> col_name_to_block_idx = {
+ {"name", 0},
+ {"profile", 1},
+ };
const VExprContextSPtrs* not_single_slot_filter_conjuncts = nullptr;
const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts = nullptr;
Review Comment:
The variable `colname_to_slot_id` is used but not declared in this test
function. This will cause a compilation error. Add `const
std::unordered_map<std::string, int>* colname_to_slot_id = nullptr;` before
line 709.
```suggestion
const std::unordered_map<int, VExprContextSPtrs>*
slot_id_to_filter_conjuncts = nullptr;
const std::unordered_map<std::string, int>* colname_to_slot_id = nullptr;
```
##########
be/src/vec/exec/format/table/equality_delete.cpp:
##########
@@ -106,25 +103,25 @@ Status MultiEqualityDelete::_build_set() {
return Status::OK();
}
-Status MultiEqualityDelete::filter_data_block(Block* data_block) {
+Status MultiEqualityDelete::filter_data_block(
+ Block* data_block, const std::unordered_map<std::string, uint32_t>*
col_name_to_block_idx) {
SCOPED_TIMER(equality_delete_time);
size_t column_index = 0;
- // todo: maybe do not need to build name to index map every time
- auto name_to_pos_map = data_block->get_name_to_pos_map();
for (auto delete_col : _delete_block->get_columns_with_type_and_name()) {
const std::string& column_name = delete_col.name;
- auto column_and_type =
data_block->safe_get_by_position(name_to_pos_map[column_name]);
- if (name_to_pos_map.contains(column_name) == false) {
+ if (!col_name_to_block_idx->contains(column_name)) {
return Status::InternalError("Column '{}' not found in data block:
{}", column_name,
data_block->dump_structure());
}
+ auto column_and_type =
+
data_block->safe_get_by_position(col_name_to_block_idx->at(column_name));
if (!delete_col.type->equals(*column_and_type.type)) {
return Status::InternalError(
"Not support type change in column '{}', src type: {},
target type: {}",
column_name, delete_col.type->get_name(),
column_and_type.type->get_name());
}
- _data_column_index[column_index++] = name_to_pos_map[column_name];
+ _data_column_index[column_index++] =
col_name_to_block_idx->at(column_name);
Review Comment:
Potential null pointer dereference. `col_name_to_block_idx` is used with
`->contains()` and `->at()` without checking if it's nullptr. Add a null check
before dereferencing.
##########
be/src/vec/exec/scan/scanner_context.cpp:
##########
@@ -300,28 +303,50 @@ Status ScannerContext::get_block_from_queue(RuntimeState*
state, vectorized::Blo
return_free_block(std::move(current_block));
}
- VLOG_DEBUG << fmt::format(
- "ScannerContext {} get block from queue, task_queue size {},
current scan "
- "task remaing cached_block size {}, eos {}, scheduled tasks
{}",
- ctx_id, _tasks_queue.size(), scan_task->cached_blocks.size(),
scan_task->is_eos(),
- _num_scheduled_scanners);
-
- if (scan_task->cached_blocks.empty()) {
- // All Cached blocks are consumed, pop this task from task_queue.
+ // VLOG_DEBUG << fmt::format(
+ // "ScannerContext {} get block from queue, task_queue size
{}, current scan "
+ // "task remaing cached_block size {}, eos {}, scheduled tasks
{}",
+ // ctx_id, _tasks_queue.size(),
scan_task->cached_blocks.size(), scan_task->is_eos(),
+ // _num_scheduled_scanners);
+ else {
Review Comment:
The VLOG_DEBUG statement has been commented out but the logic that follows
has been changed from `if (scan_task->cached_blocks.empty())` to `else`. This
creates an `else` without a corresponding `if` block, which is a syntax error.
The commented-out VLOG_DEBUG should either be removed entirely or the control
flow should be fixed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]