This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9d3c35bc9b5 [refactor](be) Derive get_storage_field_type from
primitive type (#64341)
9d3c35bc9b5 is described below
commit 9d3c35bc9b56385860ae829c8bbb0fc28e2234fe
Author: Chenyang Sun <[email protected]>
AuthorDate: Wed Jun 10 18:11:00 2026 +0800
[refactor](be) Derive get_storage_field_type from primitive type (#64341)
1. Make IDataType::get_storage_field_type() derive the field type from
the primitive type
2. SegmentIterator::_is_char_type (and _vec_init_char_column_id) is
deleted
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [x] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
Co-authored-by: Claude Opus 4.8 <[email protected]>
---
be/src/core/data_type/data_type.cpp | 5 ++
be/src/core/data_type/data_type.h | 4 +-
be/src/core/data_type/data_type_agg_state.h | 4 --
be/src/core/data_type/data_type_array.h | 4 --
be/src/core/data_type/data_type_bitmap.h | 4 --
be/src/core/data_type/data_type_date.h | 3 --
.../core/data_type/data_type_date_or_datetime_v2.h | 6 ---
be/src/core/data_type/data_type_date_time.h | 4 --
be/src/core/data_type/data_type_hll.h | 4 --
be/src/core/data_type/data_type_ipv4.h | 4 --
be/src/core/data_type/data_type_ipv6.h | 3 --
be/src/core/data_type/data_type_jsonb.h | 3 --
be/src/core/data_type/data_type_map.h | 3 --
be/src/core/data_type/data_type_number_base.h | 53 ----------------------
be/src/core/data_type/data_type_quantilestate.h | 3 --
be/src/core/data_type/data_type_string.h | 4 --
be/src/core/data_type/data_type_struct.h | 3 --
be/src/core/data_type/data_type_variant.h | 3 --
be/src/storage/segment/segment_iterator.cpp | 22 +--------
be/src/storage/segment/segment_iterator.h | 3 --
20 files changed, 9 insertions(+), 133 deletions(-)
diff --git a/be/src/core/data_type/data_type.cpp
b/be/src/core/data_type/data_type.cpp
index a11c17a6cd2..94f7b6c38c9 100644
--- a/be/src/core/data_type/data_type.cpp
+++ b/be/src/core/data_type/data_type.cpp
@@ -34,6 +34,7 @@
#include "core/data_type/define_primitive_type.h"
#include "core/data_type_serde/data_type_serde.h"
#include "core/field.h"
+#include "storage/tablet/tablet_schema.h"
namespace doris {
class BufferWritable;
@@ -45,6 +46,10 @@ IDataType::IDataType() = default;
IDataType::~IDataType() = default;
+doris::FieldType IDataType::get_storage_field_type() const {
+ return TabletColumn::get_field_type_by_type(get_primitive_type());
+}
+
String IDataType::get_name() const {
return do_get_name();
}
diff --git a/be/src/core/data_type/data_type.h
b/be/src/core/data_type/data_type.h
index 9fb341302c0..253bd8e49f9 100644
--- a/be/src/core/data_type/data_type.h
+++ b/be/src/core/data_type/data_type.h
@@ -83,7 +83,9 @@ public:
virtual const std::string get_family_name() const = 0;
virtual PrimitiveType get_primitive_type() const = 0;
- virtual doris::FieldType get_storage_field_type() const = 0;
+ // Derived from the primitive type by default (e.g. TYPE_CHAR ->
OLAP_FIELD_TYPE_CHAR).
+ // Types without a direct 1:1 mapping override this.
+ virtual doris::FieldType get_storage_field_type() const;
std::string to_string(const IColumn& column, size_t row_num,
const DataTypeSerDe::FormatOptions& options) const;
// get specific serializer or deserializer
diff --git a/be/src/core/data_type/data_type_agg_state.h
b/be/src/core/data_type/data_type_agg_state.h
index f1513c97c76..378315f9512 100644
--- a/be/src/core/data_type/data_type_agg_state.h
+++ b/be/src/core/data_type/data_type_agg_state.h
@@ -85,10 +85,6 @@ public:
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_AGG_STATE; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_AGG_STATE;
- }
-
const DataTypes& get_sub_types() const { return _sub_types; }
void to_pb_column_meta(PColumnMeta* col_meta) const override {
diff --git a/be/src/core/data_type/data_type_array.h
b/be/src/core/data_type/data_type_array.h
index 5ddc5236c41..26479859121 100644
--- a/be/src/core/data_type/data_type_array.h
+++ b/be/src/core/data_type/data_type_array.h
@@ -59,10 +59,6 @@ public:
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_ARRAY; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_ARRAY;
- }
-
std::string do_get_name() const override { return "Array(" +
nested->get_name() + ")"; }
const std::string get_family_name() const override { return "Array"; }
diff --git a/be/src/core/data_type/data_type_bitmap.h
b/be/src/core/data_type/data_type_bitmap.h
index c47e9b30a78..5e7fb8b67ba 100644
--- a/be/src/core/data_type/data_type_bitmap.h
+++ b/be/src/core/data_type/data_type_bitmap.h
@@ -54,10 +54,6 @@ public:
const std::string get_family_name() const override { return "BitMap"; }
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_BITMAP; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_BITMAP;
- }
-
int64_t get_uncompressed_serialized_bytes(const IColumn& column,
int be_exec_version) const
override;
char* serialize(const IColumn& column, char* buf, int be_exec_version)
const override;
diff --git a/be/src/core/data_type/data_type_date.h
b/be/src/core/data_type/data_type_date.h
index d048eff6aaa..8acdb2f20e1 100644
--- a/be/src/core/data_type/data_type_date.h
+++ b/be/src/core/data_type/data_type_date.h
@@ -42,9 +42,6 @@ public:
static constexpr PrimitiveType PType = TYPE_DATE;
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_DATE; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_DATE;
- }
const std::string get_family_name() const override { return "Date"; }
std::string do_get_name() const override { return "Date"; }
diff --git a/be/src/core/data_type/data_type_date_or_datetime_v2.h
b/be/src/core/data_type/data_type_date_or_datetime_v2.h
index 80b04000c66..0e16644863e 100644
--- a/be/src/core/data_type/data_type_date_or_datetime_v2.h
+++ b/be/src/core/data_type/data_type_date_or_datetime_v2.h
@@ -54,9 +54,6 @@ public:
static constexpr PrimitiveType PType = TYPE_DATEV2;
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_DATEV2; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_DATEV2;
- }
const std::string get_family_name() const override { return "DateV2"; }
std::string do_get_name() const override { return "DateV2"; }
@@ -111,9 +108,6 @@ public:
scalar_type->set_scale(_scale);
}
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
- }
const std::string get_family_name() const override { return "DateTimeV2"; }
std::string do_get_name() const override {
return "DateTimeV2(" + std::to_string(_scale) + ")";
diff --git a/be/src/core/data_type/data_type_date_time.h
b/be/src/core/data_type/data_type_date_time.h
index 0cdfda2405d..8e4d49913af 100644
--- a/be/src/core/data_type/data_type_date_time.h
+++ b/be/src/core/data_type/data_type_date_time.h
@@ -69,10 +69,6 @@ public:
std::string do_get_name() const override { return "DateTime"; }
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_DATETIME; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_DATETIME;
- }
-
bool equals(const IDataType& rhs) const override;
#ifdef BE_TEST
/// TODO: remove this in the future
diff --git a/be/src/core/data_type/data_type_hll.h
b/be/src/core/data_type/data_type_hll.h
index 3c4b632cc14..fb172e7e2b3 100644
--- a/be/src/core/data_type/data_type_hll.h
+++ b/be/src/core/data_type/data_type_hll.h
@@ -54,10 +54,6 @@ public:
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_HLL; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_HLL;
- }
-
int64_t get_uncompressed_serialized_bytes(const IColumn& column,
int be_exec_version) const
override;
char* serialize(const IColumn& column, char* buf, int be_exec_version)
const override;
diff --git a/be/src/core/data_type/data_type_ipv4.h
b/be/src/core/data_type/data_type_ipv4.h
index 488e297736f..d9658047baa 100644
--- a/be/src/core/data_type/data_type_ipv4.h
+++ b/be/src/core/data_type/data_type_ipv4.h
@@ -45,10 +45,6 @@ public:
const std::string get_family_name() const override { return "IPv4"; }
std::string do_get_name() const override { return "IPv4"; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_IPV4;
- }
-
bool equals(const IDataType& rhs) const override;
Field get_field(const TExprNode& node) const override;
diff --git a/be/src/core/data_type/data_type_ipv6.h
b/be/src/core/data_type/data_type_ipv6.h
index cc48d3c7284..2ab1b157631 100644
--- a/be/src/core/data_type/data_type_ipv6.h
+++ b/be/src/core/data_type/data_type_ipv6.h
@@ -42,9 +42,6 @@ namespace doris {
class DataTypeIPv6 final : public DataTypeNumberBase<PrimitiveType::TYPE_IPV6>
{
public:
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_IPV6; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_IPV6;
- }
const std::string get_family_name() const override { return "IPv6"; }
std::string do_get_name() const override { return "IPv6"; }
diff --git a/be/src/core/data_type/data_type_jsonb.h
b/be/src/core/data_type/data_type_jsonb.h
index 7cd866d2c62..483ef874fe9 100644
--- a/be/src/core/data_type/data_type_jsonb.h
+++ b/be/src/core/data_type/data_type_jsonb.h
@@ -50,9 +50,6 @@ public:
const std::string get_family_name() const override { return "JSONB"; }
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_JSONB; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_JSONB;
- }
int64_t get_uncompressed_serialized_bytes(const IColumn& column,
int be_exec_version) const
override;
diff --git a/be/src/core/data_type/data_type_map.h
b/be/src/core/data_type/data_type_map.h
index ccb155af330..b442b64da9b 100644
--- a/be/src/core/data_type/data_type_map.h
+++ b/be/src/core/data_type/data_type_map.h
@@ -56,9 +56,6 @@ public:
DataTypeMap(const DataTypePtr& key_type_, const DataTypePtr& value_type_);
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_MAP; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_MAP;
- }
std::string do_get_name() const override {
return "Map(" + key_type->get_name() + ", " + value_type->get_name() +
")";
diff --git a/be/src/core/data_type/data_type_number_base.h
b/be/src/core/data_type/data_type_number_base.h
index cc87f8d6b04..f3fe2544196 100644
--- a/be/src/core/data_type/data_type_number_base.h
+++ b/be/src/core/data_type/data_type_number_base.h
@@ -66,59 +66,6 @@ public:
return T;
}
- doris::FieldType get_storage_field_type() const override {
- // Doris does not support uint8 at present, use uint8 as boolean type
- if constexpr (T == TYPE_BOOLEAN) {
- return doris::FieldType::OLAP_FIELD_TYPE_BOOL;
- }
- if constexpr (T == TYPE_TINYINT) {
- return doris::FieldType::OLAP_FIELD_TYPE_TINYINT;
- }
- if constexpr (T == TYPE_SMALLINT) {
- return doris::FieldType::OLAP_FIELD_TYPE_SMALLINT;
- }
- if constexpr (T == TYPE_INT) {
- return doris::FieldType::OLAP_FIELD_TYPE_INT;
- }
- if constexpr (T == TYPE_BIGINT) {
- return doris::FieldType::OLAP_FIELD_TYPE_BIGINT;
- }
- if constexpr (T == TYPE_LARGEINT) {
- return doris::FieldType::OLAP_FIELD_TYPE_LARGEINT;
- }
- if constexpr (T == TYPE_FLOAT) {
- return doris::FieldType::OLAP_FIELD_TYPE_FLOAT;
- }
- if constexpr (T == TYPE_DOUBLE) {
- return doris::FieldType::OLAP_FIELD_TYPE_DOUBLE;
- }
- if constexpr (T == TYPE_DATE) {
- return doris::FieldType::OLAP_FIELD_TYPE_DATE;
- }
- if constexpr (T == TYPE_DATETIME) {
- return doris::FieldType::OLAP_FIELD_TYPE_DATETIME;
- }
- if constexpr (T == TYPE_DATEV2) {
- return doris::FieldType::OLAP_FIELD_TYPE_DATEV2;
- }
- if constexpr (T == TYPE_DATETIMEV2) {
- return doris::FieldType::OLAP_FIELD_TYPE_DATETIMEV2;
- }
- if constexpr (T == TYPE_TIMESTAMPTZ) {
- return doris::FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ;
- }
- if constexpr (T == TYPE_IPV4) {
- return doris::FieldType::OLAP_FIELD_TYPE_IPV4;
- }
- if constexpr (T == TYPE_IPV6) {
- return doris::FieldType::OLAP_FIELD_TYPE_IPV6;
- }
- if constexpr (T == TYPE_TIMEV2) {
- return doris::FieldType::OLAP_FIELD_TYPE_TIMEV2;
- }
- throw Exception(Status::FatalError("__builtin_unreachable"));
- }
-
Field get_field(const TExprNode& node) const override;
int64_t get_uncompressed_serialized_bytes(const IColumn& column,
diff --git a/be/src/core/data_type/data_type_quantilestate.h
b/be/src/core/data_type/data_type_quantilestate.h
index 370f9bcb7d4..d469f6b3588 100644
--- a/be/src/core/data_type/data_type_quantilestate.h
+++ b/be/src/core/data_type/data_type_quantilestate.h
@@ -51,9 +51,6 @@ public:
const std::string get_family_name() const override { return
"QuantileState"; }
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_QUANTILE_STATE; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE;
- }
int64_t get_uncompressed_serialized_bytes(const IColumn& column,
int be_exec_version) const
override;
char* serialize(const IColumn& column, char* buf, int be_exec_version)
const override;
diff --git a/be/src/core/data_type/data_type_string.h
b/be/src/core/data_type/data_type_string.h
index 32385b7afb1..bfd86f7d9de 100644
--- a/be/src/core/data_type/data_type_string.h
+++ b/be/src/core/data_type/data_type_string.h
@@ -55,10 +55,6 @@ public:
}
PrimitiveType get_primitive_type() const override { return
_primitive_type; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_STRING;
- }
-
int64_t get_uncompressed_serialized_bytes(const IColumn& column,
int be_exec_version) const
override;
char* serialize(const IColumn& column, char* buf, int be_exec_version)
const override;
diff --git a/be/src/core/data_type/data_type_struct.h
b/be/src/core/data_type/data_type_struct.h
index 657364efbca..d4e4abd7a88 100644
--- a/be/src/core/data_type/data_type_struct.h
+++ b/be/src/core/data_type/data_type_struct.h
@@ -70,9 +70,6 @@ public:
DataTypeStruct(const DataTypes& elems, const Strings& names);
PrimitiveType get_primitive_type() const override { return
PrimitiveType::TYPE_STRUCT; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_STRUCT;
- }
std::string do_get_name() const override;
const std::string get_family_name() const override { return "Struct"; }
diff --git a/be/src/core/data_type/data_type_variant.h
b/be/src/core/data_type/data_type_variant.h
index f8ec3484d6b..3f4e08a0ada 100644
--- a/be/src/core/data_type/data_type_variant.h
+++ b/be/src/core/data_type/data_type_variant.h
@@ -60,9 +60,6 @@ public:
String do_get_name() const override { return name; }
const std::string get_family_name() const override { return "Variant"; }
- doris::FieldType get_storage_field_type() const override {
- return doris::FieldType::OLAP_FIELD_TYPE_VARIANT;
- }
Status check_column(const IColumn& column) const override {
return check_column_non_nested_type<ColumnVariant>(column);
}
diff --git a/be/src/storage/segment/segment_iterator.cpp
b/be/src/storage/segment/segment_iterator.cpp
index dc6930777f5..c1ba1b17778 100644
--- a/be/src/storage/segment/segment_iterator.cpp
+++ b/be/src/storage/segment/segment_iterator.cpp
@@ -654,21 +654,16 @@ Status SegmentIterator::_lazy_init(Block* block) {
}
_current_return_columns.resize(_schema->columns().size());
- _vec_init_char_column_id();
for (size_t i = 0; i < _schema->column_ids().size(); i++) {
ColumnId cid = _schema->column_ids()[i];
const auto* column_desc = _schema->column(cid);
if (_is_pred_column[cid]) {
auto storage_column_type = _storage_name_and_type[cid].second;
- // Char type is special , since char type's computational datatype
is same with string,
- // both are DataTypeString, but DataTypeString only return
FieldType::OLAP_FIELD_TYPE_STRING
- // in get_storage_field_type.
RETURN_IF_CATCH_EXCEPTION(
// Here, cid will not go out of bounds
// because the size of _current_return_columns equals
_schema->tablet_columns().size()
_current_return_columns[cid] =
Schema::get_predicate_column_ptr(
- _is_char_type[cid] ?
FieldType::OLAP_FIELD_TYPE_CHAR
- :
storage_column_type->get_storage_field_type(),
+ storage_column_type->get_storage_field_type(),
storage_column_type->is_nullable(),
_opts.io_ctx.reader_type));
_current_return_columns[cid]->set_rowset_segment_id(
{_segment->rowset_id(), _segment->id()});
@@ -2201,21 +2196,6 @@ bool
SegmentIterator::_can_evaluated_by_vectorized(std::shared_ptr<ColumnPredica
}
}
-void SegmentIterator::_vec_init_char_column_id() {
- if (!_is_char_type.empty()) {
- return;
- }
- _is_char_type.resize(_schema->columns().size(), false);
- for (size_t i = 0; i < _schema->num_column_ids(); i++) {
- auto cid = _schema->column_id(i);
- const TabletColumn* column_desc = _schema->column(cid);
-
- if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_CHAR) {
- _is_char_type[cid] = true;
- }
- }
-}
-
bool SegmentIterator::_prune_column(ColumnId cid, MutableColumnPtr& column,
bool fill_defaults,
size_t num_of_defaults) {
if (_need_read_data(cid)) {
diff --git a/be/src/storage/segment/segment_iterator.h
b/be/src/storage/segment/segment_iterator.h
index c7faf3fdb51..8fd143867ed 100644
--- a/be/src/storage/segment/segment_iterator.h
+++ b/be/src/storage/segment/segment_iterator.h
@@ -204,7 +204,6 @@ private:
bool _is_literal_node(const TExprNodeType::type& node_type);
Status _vec_init_lazy_materialization();
- void _vec_init_char_column_id();
uint32_t segment_id() const { return _segment->id(); }
uint32_t num_rows() const { return _segment->num_rows(); }
@@ -427,8 +426,6 @@ private:
io::FileReaderSPtr _file_reader;
- std::vector<bool> _is_char_type;
-
// used for compaction, record selectd rowids of current batch
uint16_t _selected_size;
std::vector<uint16_t> _sel_rowid_idx;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]