This is an automated email from the ASF dual-hosted git repository.

Gabriel39 pushed a commit to branch refact_reader_branch
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/refact_reader_branch by this 
push:
     new 0fb11e4e0c3 cast for schema change (#63477)
0fb11e4e0c3 is described below

commit 0fb11e4e0c3751baeec63421d37cbec6bd7dd479
Author: Gabriel <[email protected]>
AuthorDate: Thu May 21 17:18:54 2026 +0800

    cast for schema change (#63477)
---
 be/src/exprs/vslot_ref.h                           |   2 +-
 be/src/format/reader/column_mapper.cpp             |  41 ++--
 be/src/format/reader/column_mapper.h               |   8 +-
 be/src/format/reader/expr/cast.cpp                 | 131 +++++++++++++
 .../vslot_ref.h => format/reader/expr/cast.h}      |  60 ++----
 be/src/format/reader/expr/slot_ref.h               |  16 +-
 be/src/format/reader/file_reader.h                 |   7 +
 be/src/format/reader/table_reader.h                |  10 +-
 be/test/format/reader/expr/cast_test.cpp           | 210 +++++++++++++++++++++
 be/test/format/reader/expr/table_expr_test.cpp     |  11 +-
 10 files changed, 422 insertions(+), 74 deletions(-)

diff --git a/be/src/exprs/vslot_ref.h b/be/src/exprs/vslot_ref.h
index 3ac9f641c19..ceb702728eb 100644
--- a/be/src/exprs/vslot_ref.h
+++ b/be/src/exprs/vslot_ref.h
@@ -31,7 +31,7 @@ class TExprNode;
 class Block;
 class VExprContext;
 
-class VSlotRef MOCK_REMOVE(final) : public VExpr {
+class VSlotRef : public VExpr {
     ENABLE_FACTORY_CREATOR(VSlotRef);
 
 public:
diff --git a/be/src/format/reader/column_mapper.cpp 
b/be/src/format/reader/column_mapper.cpp
index 7006365b054..7510413d07f 100644
--- a/be/src/format/reader/column_mapper.cpp
+++ b/be/src/format/reader/column_mapper.cpp
@@ -20,20 +20,20 @@
 #include <vector>
 
 #include "common/status.h"
-#include "expr/slot_ref.h"
+#include "format/reader/expr/cast.h"
+#include "format/reader/expr/slot_ref.h"
 #include "format/reader/file_reader.h"
 #include "format/reader/table_reader.h"
 
 namespace doris::reader {
 
+static constexpr const char* ROW_LINEAGE_ROW_ID = "_row_id";
+static constexpr const char* ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER = 
"_last_updated_sequence_number";
+
 Status TableColumnMapper::create_mapping(const std::vector<TableColumn>& 
projected_columns,
-                                         std::vector<SchemaField> block_schema,
                                          const std::map<std::string, Field>& 
partition_values,
                                          const std::vector<SchemaField>& 
file_schema) {
-    // 真实实现会做 field id/name matching、类型转换、复杂列 child mapping、缺失列
-    // default/partition/generated 表达式构造。
     _mappings.clear();
-    block_schema.clear();
     for (const auto& table_column : projected_columns) {
         ColumnMapping mapping;
         mapping.table_column_id = table_column.id;
@@ -43,24 +43,31 @@ Status TableColumnMapper::create_mapping(const 
std::vector<TableColumn>& project
             mapping.file_type = file_field->type;
             mapping.is_trivial = _is_same_type(mapping.table_type, 
mapping.file_type);
             if (!mapping.is_trivial) {
-                // TODO:
-                return Status::NotSupported(
-                        "column mapping with type conversion is not supported 
yet: table column "
-                        "'{}' (id={}, type={}) vs file column (id={}, 
type={})",
-                        table_column.name, mapping.table_column_id, 
mapping.table_type->get_name(),
-                        mapping.file_column_id.value(), 
mapping.file_type->get_name());
+                // 1. Data type mismatch (caused by schema evolution) and 
casting is needed.
+                auto expr = Cast::create_shared(mapping.table_type);
+                
expr->add_child(TableSlotRef::create_shared(mapping.file_column_id.value(),
+                                                            
mapping.file_column_id.value(), -1,
+                                                            mapping.file_type, 
file_field->name));
+                mapping.projection = VExprContext::create_shared(expr);
             } else {
+                // 2. Data type matches, trivial mapping.
                 mapping.projection = 
VExprContext::create_shared(TableSlotRef::create_shared(
-                        *mapping.file_column_id, block_schema.size(), -1, 
mapping.table_type));
+                        mapping.file_column_id.value(), 
mapping.file_column_id.value(), -1,
+                        mapping.file_type, file_field->name));
             }
-            block_schema.push_back(SchemaField {
-                    mapping.file_column_id.value(), table_column.name, 
mapping.table_type, {}});
-        } else if (table_column.default_expr != nullptr) {
-            mapping.is_constant = true;
-            mapping.default_expr = table_column.default_expr;
         } else if (table_column.is_partition_key && 
partition_values.count(table_column.name) > 0) {
+            // 3. Partition column, use partition value as a constant mapping. 
Note that partition column may also have default expression, but partition 
value should take precedence if it exists.
             mapping.default_expr = 
VExprContext::create_shared(TableLiteral::create_shared(
                     mapping.table_type, 
partition_values.at(table_column.name)));
+        } else if (table_column.default_expr != nullptr) {
+            // 4. Table column does not exist in file (column adding by schema 
evolution), which has a default expression, use it as a constant mapping.
+            mapping.is_constant = true;
+            mapping.default_expr = table_column.default_expr;
+        } else if (table_column.name == ROW_LINEAGE_ROW_ID) {
+            // 5. Virtual column, use special mapping to indicate it should be 
materialized by table reader instead of read from file or evaluated from 
expression.
+            mapping.virtual_column_type = TableVirtualColumnType::ROW_ID;
+        } else if (table_column.name == ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER) {
+            mapping.virtual_column_type = 
TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER;
         } else {
             if (table_column.is_partition_key) {
                 return Status::InvalidArgument(
diff --git a/be/src/format/reader/column_mapper.h 
b/be/src/format/reader/column_mapper.h
index 4c6b510ff0e..d0d8076798b 100644
--- a/be/src/format/reader/column_mapper.h
+++ b/be/src/format/reader/column_mapper.h
@@ -40,6 +40,12 @@ enum class TableColumnMappingMode {
     BY_NAME,
 };
 
+enum TableVirtualColumnType {
+    INVALID = 0, // not a virtual column
+    ROW_ID = 1,
+    LAST_UPDATED_SEQUENCE_NUMBER = 2,
+};
+
 // 单个 table column 到 file column 的映射结果。
 // 这是 table 层和 file 层的核心边界对象。
 struct ColumnMapping {
@@ -59,6 +65,7 @@ struct ColumnMapping {
     std::vector<ColumnMapping> child_mappings;
     bool is_trivial = false;
     bool is_constant = false;
+    TableVirtualColumnType virtual_column_type = 
TableVirtualColumnType::INVALID;
     VExprContextSPtr default_expr;
 };
 
@@ -81,7 +88,6 @@ public:
     // 输出的 ColumnMapping 描述 table column 如何从 file column、常量列或表达式得到;
     // 后续 projection、filter localization 和 table block finalize 都应复用这份映射。
     virtual Status create_mapping(const std::vector<TableColumn>& 
projected_columns,
-                                  std::vector<SchemaField> block_schema,
                                   const std::map<std::string, Field>& 
partition_values,
                                   const std::vector<SchemaField>& file_schema);
 
diff --git a/be/src/format/reader/expr/cast.cpp 
b/be/src/format/reader/expr/cast.cpp
new file mode 100644
index 00000000000..69af83c9e77
--- /dev/null
+++ b/be/src/format/reader/expr/cast.cpp
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format/reader/expr/cast.h"
+
+#include <fmt/format.h>
+#include <gen_cpp/Exprs_types.h>
+#include <glog/logging.h>
+
+#include <ostream>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/block/column_with_type_and_name.h"
+#include "core/block/columns_with_type_and_name.h"
+#include "exprs/function/simple_function_factory.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vliteral.h"
+
+namespace doris {
+
+Status Cast::prepare(RuntimeState* state, const RowDescriptor& desc, 
VExprContext* context) {
+    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
+    if (_children.size() != 1) {
+        return Status::InternalError(
+                fmt::format("Cast should have exactly 1 child expr, but got 
{}", _children.size()));
+    }
+    ColumnsWithTypeAndName argument_template;
+    argument_template.reserve(_children.size());
+    if (_children[0]->is_literal()) {
+        // For some functions, he needs some literal columns to derive the 
return type.
+        auto literal_node = std::dynamic_pointer_cast<VLiteral>(_children[0]);
+        argument_template.emplace_back(literal_node->get_column_ptr(), 
_children[0]->data_type(),
+                                       _children[0]->expr_name());
+    } else {
+        argument_template.emplace_back(nullptr, _children[0]->data_type(),
+                                       _children[0]->expr_name());
+    }
+
+    _expr_name = fmt::format("CAST(arguments={},return={})", 
_children[0]->data_type()->get_name(),
+                             _data_type->get_name());
+    // get the function. won't prepare function.
+    _function = SimpleFunctionFactory::instance().get_function(
+            "CAST", argument_template, _data_type,
+            {.new_version_unix_timestamp = 
state->query_options().new_version_unix_timestamp},
+            state->be_exec_version());
+    if (_function == nullptr) {
+        return Status::InternalError("Could not find function {} ", 
_expr_name);
+    }
+    VExpr::register_function_context(state, context);
+    _prepare_finished = true;
+    return Status::OK();
+}
+
+Status Cast::open(RuntimeState* state, VExprContext* context,
+                  FunctionContext::FunctionStateScope scope) {
+    DCHECK(_prepare_finished);
+    for (auto& i : _children) {
+        RETURN_IF_ERROR(i->open(state, context, scope));
+    }
+    RETURN_IF_ERROR(VExpr::init_function_context(state, context, scope, 
_function));
+    if (scope == FunctionContext::FRAGMENT_LOCAL) {
+        RETURN_IF_ERROR(VExpr::get_const_col(context, nullptr));
+    }
+    _open_finished = true;
+    return Status::OK();
+}
+
+void Cast::close(VExprContext* context, FunctionContext::FunctionStateScope 
scope) {
+    VExpr::close_function_context(context, scope, _function);
+    VExpr::close(context, scope);
+}
+
+Status Cast::execute_column_impl(VExprContext* context, const Block* block,
+                                 const Selector* selector, size_t count,
+                                 ColumnPtr& result_column) const {
+    return _do_execute(context, block, selector, count, result_column);
+}
+
+std::string Cast::debug_string() const {
+    return _expr_name;
+}
+
+Status Cast::_do_execute(VExprContext* context, const Block* block, const 
Selector* selector,
+                         size_t count, ColumnPtr& result_column) const {
+    DCHECK(_open_finished || block == nullptr) << debug_string();
+    if (_children.size() != 1) {
+        return Status::InternalError(
+                fmt::format("Cast should have exactly 1 child expr, but got 
{}", _children.size()));
+    }
+    if (is_const_and_have_executed()) { // const have executed in open function
+        result_column = get_result_from_const(count);
+        return Status::OK();
+    }
+
+    Block temp_block;
+    ColumnNumbers args(1);
+
+    ColumnPtr tmp_arg_column;
+    RETURN_IF_ERROR(_children[0]->execute_column(context, block, selector, 
count, tmp_arg_column));
+    auto arg_type = _children[0]->execute_type(block);
+    temp_block.insert({tmp_arg_column, arg_type, _children[0]->expr_name()});
+    args[0] = 0;
+
+    uint32_t num_columns_without_result = temp_block.columns();
+    // prepare a column to save result
+    temp_block.insert({nullptr, _data_type, _expr_name});
+
+    RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), 
temp_block, args,
+                                       num_columns_without_result, count));
+    result_column = 
temp_block.get_by_position(num_columns_without_result).column;
+    DCHECK_EQ(result_column->size(), count);
+    RETURN_IF_ERROR(result_column->column_self_check());
+    return Status::OK();
+}
+
+} // namespace doris
diff --git a/be/src/exprs/vslot_ref.h b/be/src/format/reader/expr/cast.h
similarity index 50%
copy from be/src/exprs/vslot_ref.h
copy to be/src/format/reader/expr/cast.h
index 3ac9f641c19..7d8ca437ba3 100644
--- a/be/src/exprs/vslot_ref.h
+++ b/be/src/format/reader/expr/cast.h
@@ -16,72 +16,44 @@
 // under the License.
 
 #pragma once
+
 #include <string>
 
 #include "common/object_pool.h"
 #include "common/status.h"
+#include "exprs/function_context.h"
 #include "exprs/vexpr.h"
 
 namespace doris {
-class SlotDescriptor;
 class RowDescriptor;
 class RuntimeState;
 class TExprNode;
-
 class Block;
 class VExprContext;
+} // namespace doris
+
+namespace doris {
 
-class VSlotRef MOCK_REMOVE(final) : public VExpr {
-    ENABLE_FACTORY_CREATOR(VSlotRef);
+class Cast final : public VExpr {
+    ENABLE_FACTORY_CREATOR(Cast);
 
 public:
-    VSlotRef(const TExprNode& node);
-    VSlotRef(const SlotDescriptor* desc);
-#ifdef BE_TEST
-    VSlotRef() = default;
-    void set_column_id(int column_id) { _column_id = column_id; }
-    void set_slot_id(int slot_id) { _slot_id = slot_id; }
-#endif
+    Cast(const DataTypePtr& type) { _data_type = type; }
+    ~Cast() override = default;
     Status prepare(RuntimeState* state, const RowDescriptor& desc, 
VExprContext* context) override;
     Status open(RuntimeState* state, VExprContext* context,
                 FunctionContext::FunctionStateScope scope) override;
-    Status execute(VExprContext* context, Block* block, int* result_column_id) 
const override;
+    void close(VExprContext* context, FunctionContext::FunctionStateScope 
scope) override;
     Status execute_column_impl(VExprContext* context, const Block* block, 
const Selector* selector,
                                size_t count, ColumnPtr& result_column) const 
override;
-    DataTypePtr execute_type(const Block* block) const override;
-
-    const std::string& expr_name() const override;
-    std::string expr_label() override;
     std::string debug_string() const override;
-    bool is_constant() const override { return false; }
-
-    int column_id() const { return _column_id; }
-
-    MOCK_FUNCTION int slot_id() const { return _slot_id; }
-
-    bool equals(const VExpr& other) override;
-
-    size_t estimate_memory(const size_t rows) override { return 0; }
-
-    void collect_slot_column_ids(std::set<int>& column_ids) const override {
-        column_ids.insert(_column_id);
-    }
-
-    MOCK_FUNCTION const std::string& column_name() const { return 
*_column_name; }
-
-    uint64_t get_digest(uint64_t seed) const override;
-
-    double execute_cost() const override { return 0.0; }
-
-protected:
-    VSlotRef(int slot_id, int column_id, int column_uniq_id)
-            : _slot_id(slot_id), _column_id(column_id), 
_column_uniq_id(column_uniq_id) {}
+    uint64_t get_digest(uint64_t seed) const override { return 0; }
+    const std::string& expr_name() const override { return _expr_name; }
 
 private:
-    int _slot_id;
-    int _column_id;
-    int _column_uniq_id = -1;
-    const std::string* _column_name = nullptr;
-    const std::string _column_label;
+    Status _do_execute(VExprContext* context, const Block* block, const 
Selector* selector,
+                       size_t count, ColumnPtr& result_column) const;
+    std::string _expr_name;
+    FunctionBasePtr _function;
 };
 } // namespace doris
diff --git a/be/src/format/reader/expr/slot_ref.h 
b/be/src/format/reader/expr/slot_ref.h
index 6b5d027602e..fd4782a1bdd 100644
--- a/be/src/format/reader/expr/slot_ref.h
+++ b/be/src/format/reader/expr/slot_ref.h
@@ -26,14 +26,26 @@ class TableSlotRef : public VSlotRef {
     ENABLE_FACTORY_CREATOR(TableSlotRef);
 
 public:
-    TableSlotRef(int slot_id, int column_id, int column_uniq_id, const 
DataTypePtr& type)
-            : VSlotRef(slot_id, column_id, column_uniq_id) {
+    TableSlotRef(int slot_id, int column_id, int column_uniq_id, const 
DataTypePtr& type,
+                 const std::string& column_name)
+            : VSlotRef(slot_id, column_id, column_uniq_id), 
_cname(column_name) {
         _data_type = type;
     }
 
     Status prepare(RuntimeState* state, const RowDescriptor& desc, 
VExprContext* context) override {
+        if (_prepared) {
+            return Status::OK();
+        }
+        _prepared = true;
+        _prepare_finished = true;
         return Status::OK();
     }
+
+    const std::string& expr_name() const override { return _cname; }
+    const std::string& column_name() const override { return _cname; }
+
+private:
+    const std::string _cname;
 };
 
 } // namespace doris
diff --git a/be/src/format/reader/file_reader.h 
b/be/src/format/reader/file_reader.h
index 6dfbb4a8420..96ace67d8de 100644
--- a/be/src/format/reader/file_reader.h
+++ b/be/src/format/reader/file_reader.h
@@ -42,6 +42,12 @@ namespace doris::reader {
 
 using ColumnId = int32_t;
 
+enum ColumnType {
+    DATA_COLUMN = 0, // normal data column
+    ROW_NUMBER = 1,  // row number in a file
+    FILE_NAME = 2,   // file name
+};
+
 // 文件本地 schema 字段。
 // 这是 FileReader 暴露给 table 层的 file-local schema 视图,不携带 table/global
 // schema 语义。Iceberg field id、name mapping、default/generated/partition 列都不在
@@ -51,6 +57,7 @@ struct SchemaField {
     std::string name;
     DataTypePtr type;
     std::vector<SchemaField> children;
+    ColumnType column_type = ColumnType::DATA_COLUMN;
 };
 
 // 已经 localize 到文件 schema 的过滤条件。
diff --git a/be/src/format/reader/table_reader.h 
b/be/src/format/reader/table_reader.h
index d14e1e78261..c3744427aa0 100644
--- a/be/src/format/reader/table_reader.h
+++ b/be/src/format/reader/table_reader.h
@@ -232,11 +232,10 @@ protected:
     // 打开当前具体 reader。
     // 子类在这里基于当前 split/task 初始化底层 FileReader。
     virtual Status open_reader() {
-        std::vector<SchemaField> file_schema;
-        RETURN_IF_ERROR(_data_reader.reader->get_schema(&file_schema));
-        
RETURN_IF_ERROR(_data_reader.column_mapper.create_mapping(_options.projected_columns,
-                                                                  
_data_reader.block_schema,
-                                                                  
_partition_values, file_schema));
+        _data_reader.block_schema.clear();
+        
RETURN_IF_ERROR(_data_reader.reader->get_schema(&_data_reader.block_schema));
+        RETURN_IF_ERROR(_data_reader.column_mapper.create_mapping(
+                _options.projected_columns, _partition_values, 
_data_reader.block_schema));
 
         FileScanRequest file_request;
         RETURN_IF_ERROR(_data_reader.column_mapper.create_scan_request(
@@ -270,7 +269,6 @@ protected:
     struct DataReader {
         std::unique_ptr<FileReader> reader;
         TableColumnMapper column_mapper;
-        // Schema of blocks from file reader.
         std::vector<SchemaField> block_schema;
     };
     DataReader _data_reader;
diff --git a/be/test/format/reader/expr/cast_test.cpp 
b/be/test/format/reader/expr/cast_test.cpp
new file mode 100644
index 00000000000..4f215418953
--- /dev/null
+++ b/be/test/format/reader/expr/cast_test.cpp
@@ -0,0 +1,210 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format/reader/expr/cast.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/field.h"
+#include "exprs/vexpr_context.h"
+#include "format/reader/column_mapper.h"
+#include "format/reader/expr/literal.h"
+#include "format/reader/expr/slot_ref.h"
+#include "format/reader/file_reader.h"
+#include "format/reader/table_reader.h"
+#include "runtime/descriptors.h"
+#include "testutil/column_helper.h"
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris {
+
+class CastTest : public testing::Test {
+protected:
+    void SetUp() override { state.set_enable_strict_cast(true); }
+
+    static VExprContextSPtr create_context(const DataTypePtr& return_type,
+                                           const DataTypePtr& child_type, int 
child_column_id = 0) {
+        auto cast = Cast::create_shared(return_type);
+        cast->add_child(TableSlotRef::create_shared(child_column_id, 
child_column_id, -1,
+                                                    child_type, 
"source_column"));
+        return VExprContext::create_shared(cast);
+    }
+
+    Status prepare_open_execute(VExprContext* context, Block* block, int* 
result_column_id) {
+        RETURN_IF_ERROR(context->prepare(&state, RowDescriptor()));
+        RETURN_IF_ERROR(context->open(&state));
+        return context->execute(block, result_column_id);
+    }
+
+    MockRuntimeState state;
+};
+
+TEST_F(CastTest, CastIntSlotToBigInt) {
+    auto source_type = std::make_shared<DataTypeInt32>();
+    auto return_type = std::make_shared<DataTypeInt64>();
+    auto context = create_context(return_type, source_type);
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({1, -2, 
3}));
+
+    int result_column_id = -1;
+    auto status = prepare_open_execute(context.get(), &block, 
&result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    ASSERT_EQ(result_column_id, 1);
+    ASSERT_EQ(block.columns(), 2);
+    EXPECT_EQ(block.get_by_position(result_column_id).type, return_type);
+    const auto& result_column =
+            assert_cast<const 
ColumnInt64&>(*block.get_by_position(result_column_id).column);
+    EXPECT_EQ(result_column.get_data()[0], 1);
+    EXPECT_EQ(result_column.get_data()[1], -2);
+    EXPECT_EQ(result_column.get_data()[2], 3);
+
+    context->close();
+}
+
+TEST_F(CastTest, CastStringSlotToNullableInt) {
+    state.set_enable_strict_cast(false);
+    auto source_type = std::make_shared<DataTypeString>();
+    auto return_type = 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    auto context = create_context(return_type, source_type);
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeString>({"10", 
"bad", "-3"}));
+
+    int result_column_id = -1;
+    auto status = prepare_open_execute(context.get(), &block, 
&result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& nullable_column =
+            assert_cast<const 
ColumnNullable&>(*block.get_by_position(result_column_id).column);
+    const auto& result_column =
+            assert_cast<const 
ColumnInt32&>(nullable_column.get_nested_column());
+    const auto& null_map = nullable_column.get_null_map_data();
+    EXPECT_EQ(result_column.get_data()[0], 10);
+    EXPECT_EQ(result_column.get_data()[2], -3);
+    EXPECT_EQ(null_map[0], 0);
+    EXPECT_EQ(null_map[1], 1);
+    EXPECT_EQ(null_map[2], 0);
+
+    context->close();
+}
+
+TEST_F(CastTest, CastLiteralToString) {
+    auto source_type = std::make_shared<DataTypeInt32>();
+    auto return_type = std::make_shared<DataTypeString>();
+    auto cast = Cast::create_shared(return_type);
+    cast->add_child(TableLiteral::create_shared(source_type, 
Field::create_field<TYPE_INT>(123)));
+    auto context = VExprContext::create_shared(cast);
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({1, 2, 
3}));
+
+    int result_column_id = -1;
+    auto status = prepare_open_execute(context.get(), &block, 
&result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& result = block.get_by_position(result_column_id);
+    EXPECT_EQ(result.type->to_string(*result.column, 0), "123");
+    EXPECT_EQ(result.type->to_string(*result.column, 1), "123");
+    EXPECT_EQ(result.type->to_string(*result.column, 2), "123");
+
+    context->close();
+}
+
+TEST_F(CastTest, EmptyBlockAppendsEmptyResultColumn) {
+    auto source_type = std::make_shared<DataTypeInt32>();
+    auto return_type = std::make_shared<DataTypeInt64>();
+    auto context = create_context(return_type, source_type);
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({}));
+
+    int result_column_id = -1;
+    auto status = prepare_open_execute(context.get(), &block, 
&result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    ASSERT_EQ(result_column_id, 1);
+    EXPECT_EQ(block.get_by_position(result_column_id).column->size(), 0);
+
+    context->close();
+}
+
+TEST_F(CastTest, PrepareRejectsMissingChild) {
+    auto cast = Cast::create_shared(std::make_shared<DataTypeInt64>());
+    VExprContext context(cast);
+
+    auto status = context.prepare(&state, RowDescriptor());
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("exactly 1 child expr"), 
std::string::npos);
+}
+
+TEST_F(CastTest, PrepareRejectsMultipleChildren) {
+    auto child_type = std::make_shared<DataTypeInt32>();
+    auto cast = Cast::create_shared(std::make_shared<DataTypeInt64>());
+    cast->add_child(TableSlotRef::create_shared(0, 0, -1, child_type, "c0"));
+    cast->add_child(TableSlotRef::create_shared(1, 1, -1, child_type, "c1"));
+    VExprContext context(cast);
+
+    auto status = context.prepare(&state, RowDescriptor());
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("exactly 1 child expr"), 
std::string::npos);
+}
+
+TEST_F(CastTest, ColumnMapperBuildsCastProjectionForTypeMismatch) {
+    reader::TableColumnMapper mapper;
+    reader::TableColumn table_column;
+    table_column.id = 7;
+    table_column.name = "value";
+    table_column.type = std::make_shared<DataTypeInt64>();
+    std::vector<reader::TableColumn> projected_columns {table_column};
+
+    reader::SchemaField file_field;
+    file_field.id = 0;
+    file_field.name = "value";
+    file_field.type = std::make_shared<DataTypeInt32>();
+    std::vector<reader::SchemaField> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    const auto& mapping = mapper.mappings()[0];
+    EXPECT_FALSE(mapping.is_trivial);
+    ASSERT_NE(mapping.projection, nullptr);
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({11, 
22}));
+    int result_column_id = -1;
+    status = prepare_open_execute(mapping.projection.get(), &block, 
&result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& result_column =
+            assert_cast<const 
ColumnInt64&>(*block.get_by_position(result_column_id).column);
+    EXPECT_EQ(result_column.get_data()[0], 11);
+    EXPECT_EQ(result_column.get_data()[1], 22);
+
+    mapping.projection->close();
+}
+
+} // namespace doris
diff --git a/be/test/format/reader/expr/table_expr_test.cpp 
b/be/test/format/reader/expr/table_expr_test.cpp
index dd831071483..3caca73c6c5 100644
--- a/be/test/format/reader/expr/table_expr_test.cpp
+++ b/be/test/format/reader/expr/table_expr_test.cpp
@@ -79,11 +79,14 @@ TEST(TableLiteralTest, ExecuteAppendsConstColumnToBlock) {
 
 TEST(TableSlotRefTest, KeepsSlotColumnIdsAndType) {
     auto type = std::make_shared<DataTypeInt32>();
-    auto slot_ref = TableSlotRef::create_shared(10, 20, 30, type);
+    std::string name = "file_col";
+    auto slot_ref = TableSlotRef::create_shared(10, 20, 30, type, name);
 
     EXPECT_EQ(slot_ref->slot_id(), 10);
     EXPECT_EQ(slot_ref->column_id(), 20);
     EXPECT_EQ(slot_ref->data_type(), type);
+    EXPECT_EQ(slot_ref->expr_name(), "file_col");
+    EXPECT_EQ(slot_ref->column_name(), "file_col");
     EXPECT_FALSE(slot_ref->is_constant());
 
     std::set<int> column_ids;
@@ -94,14 +97,16 @@ TEST(TableSlotRefTest, KeepsSlotColumnIdsAndType) {
 
 TEST(TableSlotRefTest, PrepareDoesNotRequireRowDescriptor) {
     auto type = std::make_shared<DataTypeInt32>();
-    auto slot_ref = TableSlotRef::create_shared(10, 20, 30, type);
+    std::string name = "";
+    auto slot_ref = TableSlotRef::create_shared(10, 20, 30, type, name);
 
     EXPECT_TRUE(slot_ref->prepare(nullptr, RowDescriptor(), nullptr).ok());
 }
 
 TEST(TableSlotRefTest, ExecuteReturnsReferencedColumnId) {
     auto type = std::make_shared<DataTypeInt32>();
-    auto slot_ref = TableSlotRef::create_shared(10, 1, 30, type);
+    std::string name = "";
+    auto slot_ref = TableSlotRef::create_shared(10, 1, 30, type, name);
     Block block;
     block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({1, 2, 
3}));
     block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({4, 5, 
6}));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to