This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9d3c35bc9b5 [refactor](be) Derive get_storage_field_type from 
primitive type (#64341)
9d3c35bc9b5 is described below

commit 9d3c35bc9b56385860ae829c8bbb0fc28e2234fe
Author: Chenyang Sun <[email protected]>
AuthorDate: Wed Jun 10 18:11:00 2026 +0800

    [refactor](be) Derive get_storage_field_type from primitive type (#64341)
    
    1. Make IDataType::get_storage_field_type() derive the field type from
    the primitive type
    2. SegmentIterator::_is_char_type (and _vec_init_char_column_id) is
    deleted
    
    
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [x] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
    
    Co-authored-by: Claude Opus 4.8 <[email protected]>
---
 be/src/core/data_type/data_type.cpp                |  5 ++
 be/src/core/data_type/data_type.h                  |  4 +-
 be/src/core/data_type/data_type_agg_state.h        |  4 --
 be/src/core/data_type/data_type_array.h            |  4 --
 be/src/core/data_type/data_type_bitmap.h           |  4 --
 be/src/core/data_type/data_type_date.h             |  3 --
 .../core/data_type/data_type_date_or_datetime_v2.h |  6 ---
 be/src/core/data_type/data_type_date_time.h        |  4 --
 be/src/core/data_type/data_type_hll.h              |  4 --
 be/src/core/data_type/data_type_ipv4.h             |  4 --
 be/src/core/data_type/data_type_ipv6.h             |  3 --
 be/src/core/data_type/data_type_jsonb.h            |  3 --
 be/src/core/data_type/data_type_map.h              |  3 --
 be/src/core/data_type/data_type_number_base.h      | 53 ----------------------
 be/src/core/data_type/data_type_quantilestate.h    |  3 --
 be/src/core/data_type/data_type_string.h           |  4 --
 be/src/core/data_type/data_type_struct.h           |  3 --
 be/src/core/data_type/data_type_variant.h          |  3 --
 be/src/storage/segment/segment_iterator.cpp        | 22 +--------
 be/src/storage/segment/segment_iterator.h          |  3 --
 20 files changed, 9 insertions(+), 133 deletions(-)

diff --git a/be/src/core/data_type/data_type.cpp 
b/be/src/core/data_type/data_type.cpp
index a11c17a6cd2..94f7b6c38c9 100644
--- a/be/src/core/data_type/data_type.cpp
+++ b/be/src/core/data_type/data_type.cpp
@@ -34,6 +34,7 @@
 #include "core/data_type/define_primitive_type.h"
 #include "core/data_type_serde/data_type_serde.h"
 #include "core/field.h"
+#include "storage/tablet/tablet_schema.h"
 
 namespace doris {
 class BufferWritable;
@@ -45,6 +46,10 @@ IDataType::IDataType() = default;
 
 IDataType::~IDataType() = default;
 
+doris::FieldType IDataType::get_storage_field_type() const {
+    return TabletColumn::get_field_type_by_type(get_primitive_type());
+}
+
 String IDataType::get_name() const {
     return do_get_name();
 }
diff --git a/be/src/core/data_type/data_type.h 
b/be/src/core/data_type/data_type.h
index 9fb341302c0..253bd8e49f9 100644
--- a/be/src/core/data_type/data_type.h
+++ b/be/src/core/data_type/data_type.h
@@ -83,7 +83,9 @@ public:
     virtual const std::string get_family_name() const = 0;
     virtual PrimitiveType get_primitive_type() const = 0;
 
-    virtual doris::FieldType get_storage_field_type() const = 0;
+    // Derived from the primitive type by default (e.g. TYPE_CHAR -> 
OLAP_FIELD_TYPE_CHAR).
+    // Types without a direct 1:1 mapping override this.
+    virtual doris::FieldType get_storage_field_type() const;
     std::string to_string(const IColumn& column, size_t row_num,
                           const DataTypeSerDe::FormatOptions& options) const;
     // get specific serializer or deserializer
diff --git a/be/src/core/data_type/data_type_agg_state.h 
b/be/src/core/data_type/data_type_agg_state.h
index f1513c97c76..378315f9512 100644
--- a/be/src/core/data_type/data_type_agg_state.h
+++ b/be/src/core/data_type/data_type_agg_state.h
@@ -85,10 +85,6 @@ public:
 
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_AGG_STATE; }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_AGG_STATE;
-    }
-
     const DataTypes& get_sub_types() const { return _sub_types; }
 
     void to_pb_column_meta(PColumnMeta* col_meta) const override {
diff --git a/be/src/core/data_type/data_type_array.h 
b/be/src/core/data_type/data_type_array.h
index 5ddc5236c41..26479859121 100644
--- a/be/src/core/data_type/data_type_array.h
+++ b/be/src/core/data_type/data_type_array.h
@@ -59,10 +59,6 @@ public:
 
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_ARRAY; }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_ARRAY;
-    }
-
     std::string do_get_name() const override { return "Array(" + 
nested->get_name() + ")"; }
 
     const std::string get_family_name() const override { return "Array"; }
diff --git a/be/src/core/data_type/data_type_bitmap.h 
b/be/src/core/data_type/data_type_bitmap.h
index c47e9b30a78..5e7fb8b67ba 100644
--- a/be/src/core/data_type/data_type_bitmap.h
+++ b/be/src/core/data_type/data_type_bitmap.h
@@ -54,10 +54,6 @@ public:
     const std::string get_family_name() const override { return "BitMap"; }
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_BITMAP; }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_BITMAP;
-    }
-
     int64_t get_uncompressed_serialized_bytes(const IColumn& column,
                                               int be_exec_version) const 
override;
     char* serialize(const IColumn& column, char* buf, int be_exec_version) 
const override;
diff --git a/be/src/core/data_type/data_type_date.h 
b/be/src/core/data_type/data_type_date.h
index d048eff6aaa..8acdb2f20e1 100644
--- a/be/src/core/data_type/data_type_date.h
+++ b/be/src/core/data_type/data_type_date.h
@@ -42,9 +42,6 @@ public:
     static constexpr PrimitiveType PType = TYPE_DATE;
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_DATE; }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_DATE;
-    }
     const std::string get_family_name() const override { return "Date"; }
     std::string do_get_name() const override { return "Date"; }
 
diff --git a/be/src/core/data_type/data_type_date_or_datetime_v2.h 
b/be/src/core/data_type/data_type_date_or_datetime_v2.h
index 80b04000c66..0e16644863e 100644
--- a/be/src/core/data_type/data_type_date_or_datetime_v2.h
+++ b/be/src/core/data_type/data_type_date_or_datetime_v2.h
@@ -54,9 +54,6 @@ public:
     static constexpr PrimitiveType PType = TYPE_DATEV2;
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_DATEV2; }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_DATEV2;
-    }
     const std::string get_family_name() const override { return "DateV2"; }
     std::string do_get_name() const override { return "DateV2"; }
 
@@ -111,9 +108,6 @@ public:
         scalar_type->set_scale(_scale);
     }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
-    }
     const std::string get_family_name() const override { return "DateTimeV2"; }
     std::string do_get_name() const override {
         return "DateTimeV2(" + std::to_string(_scale) + ")";
diff --git a/be/src/core/data_type/data_type_date_time.h 
b/be/src/core/data_type/data_type_date_time.h
index 0cdfda2405d..8e4d49913af 100644
--- a/be/src/core/data_type/data_type_date_time.h
+++ b/be/src/core/data_type/data_type_date_time.h
@@ -69,10 +69,6 @@ public:
     std::string do_get_name() const override { return "DateTime"; }
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_DATETIME; }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_DATETIME;
-    }
-
     bool equals(const IDataType& rhs) const override;
 #ifdef BE_TEST
     /// TODO: remove this in the future
diff --git a/be/src/core/data_type/data_type_hll.h 
b/be/src/core/data_type/data_type_hll.h
index 3c4b632cc14..fb172e7e2b3 100644
--- a/be/src/core/data_type/data_type_hll.h
+++ b/be/src/core/data_type/data_type_hll.h
@@ -54,10 +54,6 @@ public:
 
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_HLL; }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_HLL;
-    }
-
     int64_t get_uncompressed_serialized_bytes(const IColumn& column,
                                               int be_exec_version) const 
override;
     char* serialize(const IColumn& column, char* buf, int be_exec_version) 
const override;
diff --git a/be/src/core/data_type/data_type_ipv4.h 
b/be/src/core/data_type/data_type_ipv4.h
index 488e297736f..d9658047baa 100644
--- a/be/src/core/data_type/data_type_ipv4.h
+++ b/be/src/core/data_type/data_type_ipv4.h
@@ -45,10 +45,6 @@ public:
     const std::string get_family_name() const override { return "IPv4"; }
     std::string do_get_name() const override { return "IPv4"; }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_IPV4;
-    }
-
     bool equals(const IDataType& rhs) const override;
 
     Field get_field(const TExprNode& node) const override;
diff --git a/be/src/core/data_type/data_type_ipv6.h 
b/be/src/core/data_type/data_type_ipv6.h
index cc48d3c7284..2ab1b157631 100644
--- a/be/src/core/data_type/data_type_ipv6.h
+++ b/be/src/core/data_type/data_type_ipv6.h
@@ -42,9 +42,6 @@ namespace doris {
 class DataTypeIPv6 final : public DataTypeNumberBase<PrimitiveType::TYPE_IPV6> 
{
 public:
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_IPV6; }
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_IPV6;
-    }
     const std::string get_family_name() const override { return "IPv6"; }
     std::string do_get_name() const override { return "IPv6"; }
 
diff --git a/be/src/core/data_type/data_type_jsonb.h 
b/be/src/core/data_type/data_type_jsonb.h
index 7cd866d2c62..483ef874fe9 100644
--- a/be/src/core/data_type/data_type_jsonb.h
+++ b/be/src/core/data_type/data_type_jsonb.h
@@ -50,9 +50,6 @@ public:
 
     const std::string get_family_name() const override { return "JSONB"; }
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_JSONB; }
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_JSONB;
-    }
 
     int64_t get_uncompressed_serialized_bytes(const IColumn& column,
                                               int be_exec_version) const 
override;
diff --git a/be/src/core/data_type/data_type_map.h 
b/be/src/core/data_type/data_type_map.h
index ccb155af330..b442b64da9b 100644
--- a/be/src/core/data_type/data_type_map.h
+++ b/be/src/core/data_type/data_type_map.h
@@ -56,9 +56,6 @@ public:
 
     DataTypeMap(const DataTypePtr& key_type_, const DataTypePtr& value_type_);
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_MAP; }
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_MAP;
-    }
 
     std::string do_get_name() const override {
         return "Map(" + key_type->get_name() + ", " + value_type->get_name() + 
")";
diff --git a/be/src/core/data_type/data_type_number_base.h 
b/be/src/core/data_type/data_type_number_base.h
index cc87f8d6b04..f3fe2544196 100644
--- a/be/src/core/data_type/data_type_number_base.h
+++ b/be/src/core/data_type/data_type_number_base.h
@@ -66,59 +66,6 @@ public:
         return T;
     }
 
-    doris::FieldType get_storage_field_type() const override {
-        // Doris does not support uint8 at present, use uint8 as boolean type
-        if constexpr (T == TYPE_BOOLEAN) {
-            return doris::FieldType::OLAP_FIELD_TYPE_BOOL;
-        }
-        if constexpr (T == TYPE_TINYINT) {
-            return doris::FieldType::OLAP_FIELD_TYPE_TINYINT;
-        }
-        if constexpr (T == TYPE_SMALLINT) {
-            return doris::FieldType::OLAP_FIELD_TYPE_SMALLINT;
-        }
-        if constexpr (T == TYPE_INT) {
-            return doris::FieldType::OLAP_FIELD_TYPE_INT;
-        }
-        if constexpr (T == TYPE_BIGINT) {
-            return doris::FieldType::OLAP_FIELD_TYPE_BIGINT;
-        }
-        if constexpr (T == TYPE_LARGEINT) {
-            return doris::FieldType::OLAP_FIELD_TYPE_LARGEINT;
-        }
-        if constexpr (T == TYPE_FLOAT) {
-            return doris::FieldType::OLAP_FIELD_TYPE_FLOAT;
-        }
-        if constexpr (T == TYPE_DOUBLE) {
-            return doris::FieldType::OLAP_FIELD_TYPE_DOUBLE;
-        }
-        if constexpr (T == TYPE_DATE) {
-            return doris::FieldType::OLAP_FIELD_TYPE_DATE;
-        }
-        if constexpr (T == TYPE_DATETIME) {
-            return doris::FieldType::OLAP_FIELD_TYPE_DATETIME;
-        }
-        if constexpr (T == TYPE_DATEV2) {
-            return doris::FieldType::OLAP_FIELD_TYPE_DATEV2;
-        }
-        if constexpr (T == TYPE_DATETIMEV2) {
-            return doris::FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
-        }
-        if constexpr (T == TYPE_TIMESTAMPTZ) {
-            return doris::FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ;
-        }
-        if constexpr (T == TYPE_IPV4) {
-            return doris::FieldType::OLAP_FIELD_TYPE_IPV4;
-        }
-        if constexpr (T == TYPE_IPV6) {
-            return doris::FieldType::OLAP_FIELD_TYPE_IPV6;
-        }
-        if constexpr (T == TYPE_TIMEV2) {
-            return doris::FieldType::OLAP_FIELD_TYPE_TIMEV2;
-        }
-        throw Exception(Status::FatalError("__builtin_unreachable"));
-    }
-
     Field get_field(const TExprNode& node) const override;
 
     int64_t get_uncompressed_serialized_bytes(const IColumn& column,
diff --git a/be/src/core/data_type/data_type_quantilestate.h 
b/be/src/core/data_type/data_type_quantilestate.h
index 370f9bcb7d4..d469f6b3588 100644
--- a/be/src/core/data_type/data_type_quantilestate.h
+++ b/be/src/core/data_type/data_type_quantilestate.h
@@ -51,9 +51,6 @@ public:
     const std::string get_family_name() const override { return 
"QuantileState"; }
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_QUANTILE_STATE; }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
-    }
     int64_t get_uncompressed_serialized_bytes(const IColumn& column,
                                               int be_exec_version) const 
override;
     char* serialize(const IColumn& column, char* buf, int be_exec_version) 
const override;
diff --git a/be/src/core/data_type/data_type_string.h 
b/be/src/core/data_type/data_type_string.h
index 32385b7afb1..bfd86f7d9de 100644
--- a/be/src/core/data_type/data_type_string.h
+++ b/be/src/core/data_type/data_type_string.h
@@ -55,10 +55,6 @@ public:
     }
     PrimitiveType get_primitive_type() const override { return 
_primitive_type; }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_STRING;
-    }
-
     int64_t get_uncompressed_serialized_bytes(const IColumn& column,
                                               int be_exec_version) const 
override;
     char* serialize(const IColumn& column, char* buf, int be_exec_version) 
const override;
diff --git a/be/src/core/data_type/data_type_struct.h 
b/be/src/core/data_type/data_type_struct.h
index 657364efbca..d4e4abd7a88 100644
--- a/be/src/core/data_type/data_type_struct.h
+++ b/be/src/core/data_type/data_type_struct.h
@@ -70,9 +70,6 @@ public:
     DataTypeStruct(const DataTypes& elems, const Strings& names);
     PrimitiveType get_primitive_type() const override { return 
PrimitiveType::TYPE_STRUCT; }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_STRUCT;
-    }
     std::string do_get_name() const override;
     const std::string get_family_name() const override { return "Struct"; }
 
diff --git a/be/src/core/data_type/data_type_variant.h 
b/be/src/core/data_type/data_type_variant.h
index f8ec3484d6b..3f4e08a0ada 100644
--- a/be/src/core/data_type/data_type_variant.h
+++ b/be/src/core/data_type/data_type_variant.h
@@ -60,9 +60,6 @@ public:
     String do_get_name() const override { return name; }
     const std::string get_family_name() const override { return "Variant"; }
 
-    doris::FieldType get_storage_field_type() const override {
-        return doris::FieldType::OLAP_FIELD_TYPE_VARIANT;
-    }
     Status check_column(const IColumn& column) const override {
         return check_column_non_nested_type<ColumnVariant>(column);
     }
diff --git a/be/src/storage/segment/segment_iterator.cpp 
b/be/src/storage/segment/segment_iterator.cpp
index dc6930777f5..c1ba1b17778 100644
--- a/be/src/storage/segment/segment_iterator.cpp
+++ b/be/src/storage/segment/segment_iterator.cpp
@@ -654,21 +654,16 @@ Status SegmentIterator::_lazy_init(Block* block) {
     }
     _current_return_columns.resize(_schema->columns().size());
 
-    _vec_init_char_column_id();
     for (size_t i = 0; i < _schema->column_ids().size(); i++) {
         ColumnId cid = _schema->column_ids()[i];
         const auto* column_desc = _schema->column(cid);
         if (_is_pred_column[cid]) {
             auto storage_column_type = _storage_name_and_type[cid].second;
-            // Char type is special , since char type's computational datatype 
is same with string,
-            // both are DataTypeString, but DataTypeString only return 
FieldType::OLAP_FIELD_TYPE_STRING
-            // in get_storage_field_type.
             RETURN_IF_CATCH_EXCEPTION(
                     // Here, cid will not go out of bounds
                     // because the size of _current_return_columns equals 
_schema->tablet_columns().size()
                     _current_return_columns[cid] = 
Schema::get_predicate_column_ptr(
-                            _is_char_type[cid] ? 
FieldType::OLAP_FIELD_TYPE_CHAR
-                                               : 
storage_column_type->get_storage_field_type(),
+                            storage_column_type->get_storage_field_type(),
                             storage_column_type->is_nullable(), 
_opts.io_ctx.reader_type));
             _current_return_columns[cid]->set_rowset_segment_id(
                     {_segment->rowset_id(), _segment->id()});
@@ -2201,21 +2196,6 @@ bool 
SegmentIterator::_can_evaluated_by_vectorized(std::shared_ptr<ColumnPredica
     }
 }
 
-void SegmentIterator::_vec_init_char_column_id() {
-    if (!_is_char_type.empty()) {
-        return;
-    }
-    _is_char_type.resize(_schema->columns().size(), false);
-    for (size_t i = 0; i < _schema->num_column_ids(); i++) {
-        auto cid = _schema->column_id(i);
-        const TabletColumn* column_desc = _schema->column(cid);
-
-        if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_CHAR) {
-            _is_char_type[cid] = true;
-        }
-    }
-}
-
 bool SegmentIterator::_prune_column(ColumnId cid, MutableColumnPtr& column, 
bool fill_defaults,
                                     size_t num_of_defaults) {
     if (_need_read_data(cid)) {
diff --git a/be/src/storage/segment/segment_iterator.h 
b/be/src/storage/segment/segment_iterator.h
index c7faf3fdb51..8fd143867ed 100644
--- a/be/src/storage/segment/segment_iterator.h
+++ b/be/src/storage/segment/segment_iterator.h
@@ -204,7 +204,6 @@ private:
     bool _is_literal_node(const TExprNodeType::type& node_type);
 
     Status _vec_init_lazy_materialization();
-    void _vec_init_char_column_id();
 
     uint32_t segment_id() const { return _segment->id(); }
     uint32_t num_rows() const { return _segment->num_rows(); }
@@ -427,8 +426,6 @@ private:
 
     io::FileReaderSPtr _file_reader;
 
-    std::vector<bool> _is_char_type;
-
     // used for compaction, record selectd rowids of current batch
     uint16_t _selected_size;
     std::vector<uint16_t> _sel_rowid_idx;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to