This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3af779561eb [fix](test) Remove wrapper_field dependency from unit
tests (#60530)
3af779561eb is described below
commit 3af779561ebdc0f60a2ceea030d3f566ea720f83
Author: Gabriel <[email protected]>
AuthorDate: Sat Feb 7 22:01:08 2026 +0800
[fix](test) Remove wrapper_field dependency from unit tests (#60530)
Update FromStringTest, TextSerde, and CsvSerde unit tests to remove
dependency on the deleted wrapper_field.h header. Replace WrapperField
usage with hardcoded min/max values for IP addresses and direct string
comparisons for other types. Test logic and coverage are preserved.
- Remove wrapper_field.h includes
- Replace WrapperField::create_by_type() and related calls with explicit
values
- Update test data structures to remove wrapper field expectations
- Simplify nullable string tests to use test string directly
- Update comments to reflect serde-only testing approach
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---------
Co-authored-by: Claude Haiku 4.5 <[email protected]>
---
be/src/olap/accept_null_predicate.h | 1 -
be/src/olap/bitmap_filter_predicate.h | 1 -
be/src/olap/column_mapping.h | 4 +-
be/src/olap/comparison_predicate.h | 1 -
be/src/olap/in_list_predicate.h | 1 -
be/src/olap/null_predicate.h | 1 -
be/src/olap/rowset/segment_v2/column_reader.cpp | 197 +++-----------------
be/src/olap/rowset/segment_v2/column_reader.h | 15 +-
be/src/olap/rowset/segment_v2/segment.cpp | 2 +-
be/src/olap/schema_change.cpp | 30 ++-
be/src/olap/shared_predicate.h | 1 -
be/src/olap/wrapper_field.cpp | 112 -----------
be/src/olap/wrapper_field.h | 107 -----------
be/src/vec/columns/column.h | 7 +
be/src/vec/columns/column_nothing.h | 2 +-
be/src/vec/columns/column_nullable.cpp | 10 +
be/src/vec/columns/column_nullable.h | 1 +
be/src/vec/columns/predicate_column.h | 22 +++
be/src/vec/common/cow.h | 7 +-
be/src/vec/data_types/data_type_factory.hpp | 4 +-
.../vec/data_types/data_type_fixed_length_object.h | 2 +-
be/src/vec/data_types/data_type_string.h | 2 +-
.../data_types/serde/data_type_bitmap_serde.cpp | 4 +-
.../vec/data_types/serde/data_type_bitmap_serde.h | 4 +-
.../serde/data_type_date_or_datetime_serde.cpp | 4 +-
.../serde/data_type_date_or_datetime_serde.h | 4 +-
.../serde/data_type_datetimev2_serde.cpp | 4 +-
.../data_types/serde/data_type_datetimev2_serde.h | 4 +-
.../data_types/serde/data_type_datev2_serde.cpp | 4 +-
.../vec/data_types/serde/data_type_datev2_serde.h | 4 +-
.../data_types/serde/data_type_decimal_serde.cpp | 12 +-
.../vec/data_types/serde/data_type_decimal_serde.h | 4 +-
.../vec/data_types/serde/data_type_hll_serde.cpp | 4 +-
be/src/vec/data_types/serde/data_type_hll_serde.h | 4 +-
.../vec/data_types/serde/data_type_ipv4_serde.cpp | 4 +-
be/src/vec/data_types/serde/data_type_ipv4_serde.h | 4 +-
.../vec/data_types/serde/data_type_ipv6_serde.cpp | 4 +-
be/src/vec/data_types/serde/data_type_ipv6_serde.h | 4 +-
.../vec/data_types/serde/data_type_jsonb_serde.h | 2 +-
.../data_types/serde/data_type_nullable_serde.cpp | 7 +-
.../data_types/serde/data_type_nullable_serde.h | 4 +-
.../data_types/serde/data_type_number_serde.cpp | 4 +-
.../vec/data_types/serde/data_type_number_serde.h | 4 +-
.../serde/data_type_quantilestate_serde.cpp | 4 +-
.../serde/data_type_quantilestate_serde.h | 4 +-
be/src/vec/data_types/serde/data_type_serde.h | 14 +-
.../data_types/serde/data_type_string_serde.cpp | 13 +-
.../vec/data_types/serde/data_type_string_serde.h | 11 +-
.../vec/data_types/serde/data_type_time_serde.cpp | 4 +-
be/src/vec/data_types/serde/data_type_time_serde.h | 4 +-
.../serde/data_type_timestamptz_serde.cpp | 4 +-
.../data_types/serde/data_type_timestamptz_serde.h | 4 +-
be/src/vec/exec/format/csv/csv_reader.cpp | 2 +-
be/src/vec/exec/format/text/text_reader.cpp | 2 +-
be/src/vec/olap/vgeneric_iterators.cpp | 9 +
be/test/vec/data_types/from_string_test.cpp | 205 ++++++---------------
.../data_types/serde/data_type_serde_csv_test.cpp | 32 ++--
.../data_type_serde_fixed_length_object_test.cpp | 15 +-
.../data_types/serde/data_type_serde_map_test.cpp | 4 +-
.../serde/data_type_serde_string_test.cpp | 2 +-
.../serde/data_type_serde_struct_test.cpp | 2 +-
.../data_types/serde/data_type_serde_text_test.cpp | 29 ++-
.../data/alter_p0/test_alter_column_char.out | 22 +++
.../suites/alter_p0/test_alter_column_char.groovy | 57 ++++++
64 files changed, 374 insertions(+), 692 deletions(-)
diff --git a/be/src/olap/accept_null_predicate.h
b/be/src/olap/accept_null_predicate.h
index fb3053e09e1..1f1f836bc37 100644
--- a/be/src/olap/accept_null_predicate.h
+++ b/be/src/olap/accept_null_predicate.h
@@ -25,7 +25,6 @@
#include "olap/rowset/segment_v2/bloom_filter.h"
#include "olap/rowset/segment_v2/inverted_index_cache.h"
#include "olap/rowset/segment_v2/inverted_index_reader.h"
-#include "olap/wrapper_field.h"
#include "vec/columns/column_dictionary.h"
namespace doris {
diff --git a/be/src/olap/bitmap_filter_predicate.h
b/be/src/olap/bitmap_filter_predicate.h
index 51699918010..e9c3b7e140e 100644
--- a/be/src/olap/bitmap_filter_predicate.h
+++ b/be/src/olap/bitmap_filter_predicate.h
@@ -21,7 +21,6 @@
#include "exprs/bitmapfilter_predicate.h"
#include "olap/column_predicate.h"
-#include "olap/wrapper_field.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/predicate_column.h"
diff --git a/be/src/olap/column_mapping.h b/be/src/olap/column_mapping.h
index bf3a6118d76..0faaa7f72a4 100644
--- a/be/src/olap/column_mapping.h
+++ b/be/src/olap/column_mapping.h
@@ -24,8 +24,6 @@
#include "olap/tablet_schema.h"
namespace doris {
-class WrapperField;
-
struct ColumnMapping {
ColumnMapping() = default;
virtual ~ColumnMapping() = default;
@@ -36,7 +34,7 @@ struct ColumnMapping {
// >=0: use origin column
int32_t ref_column_idx = -1;
// normally for default value. stores values for filters
- WrapperField* default_value = nullptr;
+ vectorized::Field default_value;
std::shared_ptr<TExpr> expr;
const TabletColumn* new_column = nullptr;
};
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index 82c5d3e5df4..6992112b63f 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -25,7 +25,6 @@
#include "olap/rowset/segment_v2/bloom_filter.h"
#include "olap/rowset/segment_v2/inverted_index_cache.h" // IWYU pragma: keep
#include "olap/rowset/segment_v2/inverted_index_reader.h"
-#include "olap/wrapper_field.h"
#include "vec/columns/column_dictionary.h"
namespace doris {
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 6bc546ace1e..b271fa85d1c 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -28,7 +28,6 @@
#include "olap/rowset/segment_v2/bloom_filter.h"
#include "olap/rowset/segment_v2/inverted_index_cache.h" // IWYU pragma: keep
#include "olap/rowset/segment_v2/inverted_index_reader.h"
-#include "olap/wrapper_field.h"
#include "runtime/define_primitive_type.h"
#include "runtime/primitive_type.h"
#include "runtime/type_limit.h"
diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h
index a9fa6a20911..b5298b167f9 100644
--- a/be/src/olap/null_predicate.h
+++ b/be/src/olap/null_predicate.h
@@ -28,7 +28,6 @@
#include "olap/column_predicate.h"
#include "olap/rowset/segment_v2/bloom_filter.h"
#include "olap/schema.h"
-#include "olap/wrapper_field.h"
#include "vec/exec/format/parquet/parquet_predicate.h"
namespace roaring {
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index d57f7c91741..104cb45c866 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -57,7 +57,6 @@
#include "olap/rowset/segment_v2/zone_map_index.h"
#include "olap/tablet_schema.h"
#include "olap/types.h" // for TypeInfo
-#include "olap/wrapper_field.h"
#include "runtime/decimalv2_value.h"
#include "runtime/define_primitive_type.h"
#include "util/binary_cast.hpp"
@@ -447,32 +446,16 @@ Status ColumnReader::next_batch_of_zone_map(size_t* n,
vectorized::MutableColumn
}
// TODO: this work to get min/max value seems should only do once
ZoneMapInfo zone_map_info;
- RETURN_IF_ERROR(_parse_zone_map_skip_null(*_segment_zone_map,
zone_map_info));
+ RETURN_IF_ERROR(_parse_zone_map(*_segment_zone_map, zone_map_info));
dst->reserve(*n);
if (zone_map_info.is_all_null) {
assert_cast<vectorized::ColumnNullable&>(*dst).insert_many_defaults(*n);
return Status::OK();
}
- FieldType type = _type_info->type();
- if (type == FieldType::OLAP_FIELD_TYPE_CHAR) {
- auto s = zone_map_info.max_value.template get<TYPE_CHAR>();
- while (!s.empty() && s.back() == '\0') {
- s.pop_back();
- }
- dst->insert(vectorized::Field::create_field<TYPE_CHAR>(s));
- for (int i = 1; i < *n; ++i) {
- s = zone_map_info.min_value.template get<TYPE_CHAR>();
- while (!s.empty() && s.back() == '\0') {
- s.pop_back();
- }
- dst->insert(vectorized::Field::create_field<TYPE_CHAR>(s));
- }
- } else {
- dst->insert(zone_map_info.max_value);
- for (int i = 1; i < *n; ++i) {
- dst->insert(zone_map_info.min_value);
- }
+ dst->insert(zone_map_info.max_value);
+ for (int i = 1; i < *n; ++i) {
+ dst->insert(zone_map_info.min_value);
}
return Status::OK();
}
@@ -514,10 +497,10 @@ Status ColumnReader::prune_predicates_by_zone_map(
}
Status ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, ZoneMapInfo&
zone_map_info) const {
+ zone_map_info.has_null = zone_map.has_null();
+ zone_map_info.is_all_null = !zone_map.has_not_null();
// min value and max value are valid if has_not_null is true
if (zone_map.has_not_null()) {
- zone_map_info.has_null = false;
-
if (zone_map.has_negative_inf()) {
if (FieldType::OLAP_FIELD_TYPE_FLOAT == _meta_type) {
static auto constexpr float_neg_inf =
-std::numeric_limits<float>::infinity();
@@ -532,8 +515,9 @@ Status ColumnReader::_parse_zone_map(const ZoneMapPB&
zone_map, ZoneMapInfo& zon
}
} else {
vectorized::DataTypeSerDe::FormatOptions opt;
-
RETURN_IF_ERROR(_data_type->get_serde()->from_string(zone_map.min(),
-
zone_map_info.min_value, opt));
+ opt.ignore_scale = true;
+ RETURN_IF_ERROR(_data_type->get_serde()->from_olap_string(
+ zone_map.min(), zone_map_info.min_value, opt));
}
if (zone_map.has_nan()) {
@@ -560,35 +544,10 @@ Status ColumnReader::_parse_zone_map(const ZoneMapPB&
zone_map, ZoneMapInfo& zon
}
} else {
vectorized::DataTypeSerDe::FormatOptions opt;
-
RETURN_IF_ERROR(_data_type->get_serde()->from_string(zone_map.max(),
-
zone_map_info.max_value, opt));
+ opt.ignore_scale = true;
+ RETURN_IF_ERROR(_data_type->get_serde()->from_olap_string(
+ zone_map.max(), zone_map_info.max_value, opt));
}
- } else {
- zone_map_info.is_all_null = true;
- }
- // for compatible original Cond eval logic
- if (zone_map.has_null()) {
- // for compatible, if exist null, original logic treat null as min
- zone_map_info.has_null = true;
- if (!zone_map.has_not_null()) {
- // for compatible OlapCond's 'is not null'
- zone_map_info.has_null = true;
- }
- }
- return Status::OK();
-}
-
-Status ColumnReader::_parse_zone_map_skip_null(const ZoneMapPB& zone_map,
- ZoneMapInfo& zone_map_info)
const {
- // min value and max value are valid if has_not_null is true
- if (zone_map.has_not_null()) {
- vectorized::DataTypeSerDe::FormatOptions opt;
- RETURN_IF_ERROR(
- _data_type->get_serde()->from_string(zone_map.max(),
zone_map_info.max_value, opt));
- RETURN_IF_ERROR(
- _data_type->get_serde()->from_string(zone_map.min(),
zone_map_info.min_value, opt));
- } else {
- zone_map_info.is_all_null = true;
}
return Status::OK();
}
@@ -611,11 +570,11 @@ Status ColumnReader::_get_filtered_pages(
const std::vector<ZoneMapPB>& zone_maps =
_zone_map_index->page_zone_maps();
size_t page_size = _zone_map_index->num_pages();
- ZoneMapInfo zone_map_info;
for (size_t i = 0; i < page_size; ++i) {
if (zone_maps[i].pass_all()) {
page_indexes->push_back(cast_set<uint32_t>(i));
} else {
+ ZoneMapInfo zone_map_info;
RETURN_IF_ERROR(_parse_zone_map(zone_maps[i], zone_map_info));
if (_zone_map_match_condition(zone_maps[i], zone_map_info,
col_predicates)) {
bool should_read = true;
@@ -2106,41 +2065,30 @@ Status DefaultValueColumnIterator::init(const
ColumnIteratorOptions& opts) {
// "NULL" is a special default value which means the default value is null.
if (_has_default_value) {
if (_default_value == "NULL") {
- _is_default_value_null = true;
+ _default_value_field =
vectorized::Field::create_field<TYPE_NULL>(vectorized::Null {});
} else {
- _type_size = _type_info->size();
- _mem_value.resize(_type_size);
- Status s = Status::OK();
- // If char length is 10, but default value is 'a' , it's length is
1
- // not fill 0 to the ending, because segment iterator will shrink
the tail 0 char
- if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_HLL ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_BITMAP ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_STRING ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_CHAR) {
- ((Slice*)_mem_value.data())->size = _default_value.length();
- ((Slice*)_mem_value.data())->data = _default_value.data();
- } else if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_ARRAY)
{
+ if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
if (_default_value != "[]") {
return Status::NotSupported("Array default {} is
unsupported", _default_value);
} else {
- ((Slice*)_mem_value.data())->size =
_default_value.length();
- ((Slice*)_mem_value.data())->data = _default_value.data();
+ _default_value_field =
+
vectorized::Field::create_field<TYPE_ARRAY>(vectorized::Array {});
+ return Status::OK();
}
} else if (_type_info->type() ==
FieldType::OLAP_FIELD_TYPE_STRUCT) {
return Status::NotSupported("STRUCT default type is
unsupported");
} else if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_MAP) {
return Status::NotSupported("MAP default type is unsupported");
- } else {
- s = _type_info->from_string(_mem_value.data(), _default_value,
_precision, _scale);
- }
- if (!s.ok()) {
- return s;
}
+ const auto t = _type_info->type();
+ const auto serde = vectorized::DataTypeFactory::instance()
+ .create_data_type(t, _precision,
_scale, _len)
+ ->get_serde();
+ vectorized::DataTypeSerDe::FormatOptions opt;
+ RETURN_IF_ERROR(serde->from_olap_string(_default_value,
_default_value_field, opt));
}
} else if (_is_nullable) {
- // if _has_default_value is false but _is_nullable is true, we should
return null as default value.
- _is_default_value_null = true;
+ _default_value_field =
vectorized::Field::create_field<TYPE_NULL>(vectorized::Null {});
} else {
return Status::InternalError(
"invalid default value column for no default value and not
nullable");
@@ -2148,97 +2096,9 @@ Status DefaultValueColumnIterator::init(const
ColumnIteratorOptions& opts) {
return Status::OK();
}
-void DefaultValueColumnIterator::insert_default_data(const TypeInfo*
type_info, size_t type_size,
- void* mem_value,
-
vectorized::MutableColumnPtr& dst, size_t n) {
- dst = dst->convert_to_predicate_column_if_dictionary();
-
- switch (type_info->type()) {
- case FieldType::OLAP_FIELD_TYPE_BITMAP:
- case FieldType::OLAP_FIELD_TYPE_HLL: {
- dst->insert_many_defaults(n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DATE: {
- vectorized::Int64 int64;
- char* data_ptr = (char*)&int64;
- size_t data_len = sizeof(int64);
-
- assert(type_size ==
-
sizeof(FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>::CppType)); //uint24_t
- std::string str =
FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>::to_string(mem_value);
-
- VecDateTimeValue value;
- value.from_date_str(str.c_str(), str.length());
- value.cast_to_date();
-
- int64 = binary_cast<VecDateTimeValue, vectorized::Int64>(value);
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DATETIME: {
- vectorized::Int64 int64;
- char* data_ptr = (char*)&int64;
- size_t data_len = sizeof(int64);
-
- assert(type_size ==
-
sizeof(FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIME>::CppType));
//int64_t
- std::string str =
-
FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIME>::to_string(mem_value);
-
- VecDateTimeValue value;
- value.from_date_str(str.c_str(), str.length());
- value.to_datetime();
-
- int64 = binary_cast<VecDateTimeValue, vectorized::Int64>(value);
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DECIMAL: {
- vectorized::Int128 int128;
- char* data_ptr = (char*)&int128;
- size_t data_len = sizeof(int128);
-
- assert(type_size ==
-
sizeof(FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DECIMAL>::CppType));
//decimal12_t
- decimal12_t* d = (decimal12_t*)mem_value;
- int128 = DecimalV2Value(d->integer, d->fraction).value();
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_STRING:
- case FieldType::OLAP_FIELD_TYPE_VARCHAR:
- case FieldType::OLAP_FIELD_TYPE_CHAR:
- case FieldType::OLAP_FIELD_TYPE_JSONB:
- case FieldType::OLAP_FIELD_TYPE_AGG_STATE: {
- char* data_ptr = ((Slice*)mem_value)->data;
- size_t data_len = ((Slice*)mem_value)->size;
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_ARRAY: {
- if (dst->is_nullable()) {
-
static_cast<vectorized::ColumnNullable&>(*dst).insert_not_null_elements(n);
- } else {
- dst->insert_many_defaults(n);
- }
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_VARIANT: {
- dst->insert_many_defaults(n);
- break;
- }
- default: {
- char* data_ptr = (char*)mem_value;
- size_t data_len = type_size;
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- }
- }
-}
-
Status DefaultValueColumnIterator::next_batch(size_t* n,
vectorized::MutableColumnPtr& dst,
bool* has_null) {
- *has_null = _is_default_value_null;
+ *has_null = _default_value_field.is_null();
_insert_many_default(dst, *n);
return Status::OK();
}
@@ -2250,10 +2110,11 @@ Status DefaultValueColumnIterator::read_by_rowids(const
rowid_t* rowids, const s
}
void
DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnPtr&
dst, size_t n) {
- if (_is_default_value_null) {
+ if (_default_value_field.is_null()) {
dst->insert_many_defaults(n);
} else {
- insert_default_data(_type_info.get(), _type_size, _mem_value.data(),
dst, n);
+ dst = dst->convert_to_predicate_column_if_dictionary();
+ dst->insert_duplicate_fields(_default_value_field, n);
}
}
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h
b/be/src/olap/rowset/segment_v2/column_reader.h
index 9191f58f65e..d71f63363f8 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -252,8 +252,6 @@ private:
Status _parse_zone_map(const ZoneMapPB& zone_map, ZoneMapInfo&
zone_map_info) const;
- Status _parse_zone_map_skip_null(const ZoneMapPB& zone_map, ZoneMapInfo&
zone_map_info) const;
-
Status _get_filtered_pages(
const AndBlockColumnPredicate* col_predicates,
const std::vector<std::shared_ptr<const ColumnPredicate>>*
delete_predicates,
@@ -715,13 +713,14 @@ private:
class DefaultValueColumnIterator : public ColumnIterator {
public:
DefaultValueColumnIterator(bool has_default_value, std::string
default_value, bool is_nullable,
- TypeInfoPtr type_info, int precision, int scale)
+ TypeInfoPtr type_info, int precision, int
scale, int len)
: _has_default_value(has_default_value),
_default_value(std::move(default_value)),
_is_nullable(is_nullable),
_type_info(std::move(type_info)),
_precision(precision),
- _scale(scale) {}
+ _scale(scale),
+ _len(len) {}
Status init(const ColumnIteratorOptions& opts) override;
@@ -746,9 +745,6 @@ public:
ordinal_t get_current_ordinal() const override { return _current_rowid; }
- static void insert_default_data(const TypeInfo* type_info, size_t
type_size, void* mem_value,
- vectorized::MutableColumnPtr& dst, size_t
n);
-
private:
void _insert_many_default(vectorized::MutableColumnPtr& dst, size_t n);
@@ -756,11 +752,10 @@ private:
std::string _default_value;
bool _is_nullable;
TypeInfoPtr _type_info;
- bool _is_default_value_null {false};
- size_t _type_size {0};
int _precision;
int _scale;
- std::vector<char> _mem_value;
+ const int _len;
+ vectorized::Field _default_value_field;
// current rowid
ordinal_t _current_rowid = 0;
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 0739f057ea0..7a50b2a87a5 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -631,7 +631,7 @@ Status Segment::new_default_iterator(const TabletColumn&
tablet_column,
std::unique_ptr<DefaultValueColumnIterator> default_value_iter(new
DefaultValueColumnIterator(
tablet_column.has_default_value(), tablet_column.default_value(),
tablet_column.is_nullable(), std::move(type_info),
tablet_column.precision(),
- tablet_column.frac()));
+ tablet_column.frac(), tablet_column.length()));
ColumnIteratorOptions iter_opts;
RETURN_IF_ERROR(default_value_iter->init(iter_opts));
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 6582fbd3da0..281394f5c7c 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -68,7 +68,6 @@
#include "olap/tablet_schema.h"
#include "olap/types.h"
#include "olap/utils.h"
-#include "olap/wrapper_field.h"
#include "runtime/exec_env.h"
#include "runtime/memory/mem_tracker.h"
#include "runtime/runtime_state.h"
@@ -301,9 +300,6 @@ BlockChanger::BlockChanger(TabletSchemaSPtr tablet_schema,
DescriptorTbl desc_tb
}
BlockChanger::~BlockChanger() {
- for (auto it = _schema_mapping.begin(); it != _schema_mapping.end(); ++it)
{
- SAFE_DELETE(it->default_value);
- }
_schema_mapping.clear();
}
@@ -386,15 +382,14 @@ Status BlockChanger::change_block(vectorized::Block*
ref_block,
swap_idx_list.emplace_back(result_tmp_column_idx, idx);
} else if (_schema_mapping[idx].ref_column_idx < 0) {
// new column, write default value
- auto* value = _schema_mapping[idx].default_value;
+ const auto& value = _schema_mapping[idx].default_value;
auto column =
new_block->get_by_position(idx).column->assume_mutable();
- if (value->is_null()) {
+ if (value.is_null()) {
DCHECK(column->is_nullable());
column->insert_many_defaults(row_num);
} else {
- auto type_info =
get_type_info(_schema_mapping[idx].new_column);
-
DefaultValueColumnIterator::insert_default_data(type_info.get(), value->size(),
- value->ptr(),
column, row_num);
+ column = column->convert_to_predicate_column_if_dictionary();
+ column->insert_duplicate_fields(value, row_num);
}
} else {
// same type, just swap column
@@ -1521,17 +1516,16 @@ Status SchemaChangeJob::parse_request(const
SchemaChangeParams& sc_params,
Status SchemaChangeJob::_init_column_mapping(ColumnMapping* column_mapping,
const TabletColumn& column_schema,
const std::string& value) {
- if (auto field = WrapperField::create(column_schema); field.has_value()) {
- column_mapping->default_value = field.value();
- } else {
- return field.error();
+ auto t = FieldFactory::create(column_schema);
+ Defer defer([t]() { delete t; });
+ if (t == nullptr) {
+ return Status::Uninitialized("Unsupport field creation of {}",
column_schema.name());
}
- if (column_schema.is_nullable() && value.length() == 0) {
- column_mapping->default_value->set_null();
- } else {
- RETURN_IF_ERROR(column_mapping->default_value->from_string(value,
column_schema.precision(),
-
column_schema.frac()));
+ if (!column_schema.is_nullable() || value.length() != 0) {
+ vectorized::DataTypeSerDe::FormatOptions options;
+
RETURN_IF_ERROR(column_schema.get_vec_type()->get_serde()->from_olap_string(
+ value, column_mapping->default_value, options));
}
return Status::OK();
diff --git a/be/src/olap/shared_predicate.h b/be/src/olap/shared_predicate.h
index e59d5e5c7ec..fa65f344929 100644
--- a/be/src/olap/shared_predicate.h
+++ b/be/src/olap/shared_predicate.h
@@ -24,7 +24,6 @@
#include "olap/column_predicate.h"
#include "olap/rowset/segment_v2/bloom_filter.h"
#include "olap/rowset/segment_v2/inverted_index_reader.h"
-#include "olap/wrapper_field.h"
#include "vec/columns/column_dictionary.h"
namespace doris {
diff --git a/be/src/olap/wrapper_field.cpp b/be/src/olap/wrapper_field.cpp
deleted file mode 100644
index f44e83c9c05..00000000000
--- a/be/src/olap/wrapper_field.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/wrapper_field.h"
-
-#include <glog/logging.h>
-
-#include <algorithm>
-#include <cstring>
-#include <ostream>
-
-#include "common/config.h"
-#include "common/status.h"
-#include "olap/olap_common.h"
-#include "olap/olap_define.h"
-#include "olap/row_cursor.h"
-#include "util/expected.hpp"
-
-namespace doris {
-
-const size_t DEFAULT_STRING_LENGTH = 50;
-
-Result<WrapperField*> WrapperField::create(const TabletColumn& column,
uint32_t len) {
- bool is_string_type = (column.type() == FieldType::OLAP_FIELD_TYPE_CHAR ||
- column.type() == FieldType::OLAP_FIELD_TYPE_VARCHAR
||
- column.type() == FieldType::OLAP_FIELD_TYPE_HLL ||
- column.type() == FieldType::OLAP_FIELD_TYPE_BITMAP
||
- column.type() == FieldType::OLAP_FIELD_TYPE_STRING);
- size_t max_length = column.type() == FieldType::OLAP_FIELD_TYPE_STRING
- ? config::string_type_length_soft_limit_bytes
- : OLAP_VARCHAR_MAX_LENGTH;
- if (is_string_type && len > max_length) {
- LOG(WARNING) << "length of string parameter is too long[len=" << len
- << ", max_len=" << max_length << "].";
- return unexpected {Status::Error<ErrorCode::EXCEEDED_LIMIT>(
- "length of string parameter is too long[len={}, max_len={}].",
len, max_length)};
- }
-
- Field* rep = FieldFactory::create(column);
- if (rep == nullptr) {
- return unexpected {Status::Uninitialized("Unsupport field creation of
{}", column.name())};
- }
-
- size_t variable_len = 0;
- if (column.type() == FieldType::OLAP_FIELD_TYPE_CHAR) {
- variable_len = std::max(len, (uint32_t)(column.length()));
- } else if (column.type() == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
- column.type() == FieldType::OLAP_FIELD_TYPE_HLL) {
- // column.length is the serialized varchar length
- // the first sizeof(VarcharLengthType) bytes is the length of varchar
- // variable_len is the real length of varchar
- variable_len =
- std::max(len, static_cast<uint32_t>(column.length() -
sizeof(VarcharLengthType)));
- } else if (column.type() == FieldType::OLAP_FIELD_TYPE_STRING) {
- variable_len = len;
- } else {
- variable_len = column.length();
- }
- return new WrapperField(rep, variable_len, is_string_type);
-}
-
-WrapperField* WrapperField::create_by_type(const FieldType& type, int64_t
var_length) {
- Field* rep = FieldFactory::create_by_type(type);
- if (rep == nullptr) {
- throw Exception(Status::InternalError("Unsupport field creation of
type {}",
- static_cast<int>(type)));
- }
- bool is_string_type =
- (type == FieldType::OLAP_FIELD_TYPE_CHAR ||
- type == FieldType::OLAP_FIELD_TYPE_VARCHAR || type ==
FieldType::OLAP_FIELD_TYPE_HLL ||
- type == FieldType::OLAP_FIELD_TYPE_BITMAP ||
- type == FieldType::OLAP_FIELD_TYPE_STRING ||
- type == FieldType::OLAP_FIELD_TYPE_QUANTILE_STATE);
- return new WrapperField(rep, var_length, is_string_type);
-}
-
-WrapperField::WrapperField(Field* rep, size_t variable_len, bool
is_string_type)
- : _rep(rep), _is_string_type(is_string_type), _var_length(0) {
- size_t fixed_len = _rep->size();
- _length = fixed_len + 1;
- _field_buf = new char[_length];
- memset(_field_buf, 0, _length);
- _owned_buf = _field_buf;
- char* buf = _field_buf + 1;
-
- if (_is_string_type) {
- _var_length = variable_len > DEFAULT_STRING_LENGTH ?
DEFAULT_STRING_LENGTH : variable_len;
- auto* slice = reinterpret_cast<Slice*>(buf);
- slice->size = _var_length;
- _string_content.reset(new char[slice->size]);
- slice->data = _string_content.get();
- }
- if (_rep->type() == FieldType::OLAP_FIELD_TYPE_STRING) {
- _long_text_buf = (char*)malloc(RowCursor::DEFAULT_TEXT_LENGTH *
sizeof(char));
- rep->set_long_text_buf(&_long_text_buf);
- }
-}
-} // namespace doris
diff --git a/be/src/olap/wrapper_field.h b/be/src/olap/wrapper_field.h
deleted file mode 100644
index c7d8e5e9d53..00000000000
--- a/be/src/olap/wrapper_field.h
+++ /dev/null
@@ -1,107 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <stdint.h>
-#include <stdlib.h>
-
-#include <memory>
-#include <string>
-
-#include "common/status.h"
-#include "olap/field.h"
-#include "olap/row_cursor_cell.h"
-#include "olap/tablet_schema.h"
-#include "util/slice.h"
-
-namespace doris {
-enum class FieldType;
-
-class WrapperField {
-public:
- static Result<WrapperField*> create(const TabletColumn& column, uint32_t
len = 0);
- static WrapperField* create_by_type(const FieldType& type) { return
create_by_type(type, 0); }
- static WrapperField* create_by_type(const FieldType& type, int64_t
var_length);
-
- WrapperField(Field* rep, size_t variable_len, bool is_string_type);
-
- virtual ~WrapperField() {
- delete _rep;
- delete[] _owned_buf;
- if (_long_text_buf) {
- free(_long_text_buf);
- }
- }
-
- // Convert the internal value to string output.
- //
- // NOTE: it only for DEBUG use. Do not include the null flag.
- std::string to_string() const { return _rep->to_string(_field_buf + 1); }
-
- // Deserialize field value from incoming string.
- //
- // NOTE: the parameter must be a '\0' terminated string. It do not include
the null flag.
- Status from_string(const std::string& value_string, const int precision =
0,
- const int scale = 0) {
- if (_is_string_type) {
- if (value_string.size() > _var_length) {
- Slice* slice = reinterpret_cast<Slice*>(cell_ptr());
- slice->size = value_string.size();
- _var_length = slice->size;
- _string_content.reset(new char[slice->size]);
- slice->data = _string_content.get();
- }
- }
- return _rep->from_string(_field_buf + 1, value_string, precision,
scale);
- }
-
- bool is_string_type() const { return _is_string_type; }
- char* ptr() const { return _field_buf + 1; }
- size_t size() const { return _rep->size(); }
- size_t field_size() const { return _rep->field_size(); }
- bool is_null() const { return *reinterpret_cast<bool*>(_field_buf); }
- void set_is_null(bool is_null) { *reinterpret_cast<bool*>(_field_buf) =
is_null; }
- void set_null() { *reinterpret_cast<bool*>(_field_buf) = true; }
- void set_not_null() { *reinterpret_cast<bool*>(_field_buf) = false; }
- char* nullable_cell_ptr() const { return _field_buf; }
- void set_to_max() { _rep->set_to_max(_field_buf + 1); }
- void set_to_min() { _rep->set_to_min(_field_buf + 1); }
- void set_raw_value(const void* value, size_t size) { memcpy(_field_buf +
1, value, size); }
- void* cell_ptr() const { return _field_buf + 1; }
- void* mutable_cell_ptr() const { return _field_buf + 1; }
- const Field* field() const { return _rep; }
-
- int cmp(const WrapperField* field) const { return
_rep->compare_cell(*this, *field); }
-
- void copy(const WrapperField* field) { _rep->direct_copy(this, *field); }
-
-private:
- Field* _rep = nullptr;
- bool _is_string_type;
- char* _field_buf = nullptr;
- char* _owned_buf = nullptr;
- char* _long_text_buf = nullptr;
-
- // Include fixed and variable length and null bytes.
- size_t _length;
- size_t _var_length;
- // Memory for string type field.
- std::unique_ptr<char[]> _string_content;
-};
-
-} // namespace doris
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 846cf902018..d0aa6970af5 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -516,6 +516,7 @@ public:
// In fact, this function is just calling insert_from but without the
overhead of a virtual function.
virtual void append_data_by_selector(MutablePtr& res, const Selector&
selector) const = 0;
+ virtual void insert_duplicate_fields(const Field& x, const size_t n) = 0;
// Here, begin and end represent the range of the Selector.
virtual void append_data_by_selector(MutablePtr& res, const Selector&
selector, size_t begin,
@@ -722,6 +723,12 @@ protected:
append_data_by_selector_impl<Derived>(res, selector, 0,
selector.size());
}
template <typename Derived>
+ void insert_impl(const Field& x, const size_t n) {
+ for (size_t i = 0; i < n; ++i) {
+ static_cast<Derived&>(*this).insert(x);
+ }
+ }
+ template <typename Derived>
void append_data_by_selector_impl(MutablePtr& res, const Selector&
selector, size_t begin,
size_t end) const {
size_t num_rows = size();
diff --git a/be/src/vec/columns/column_nothing.h
b/be/src/vec/columns/column_nothing.h
index ccd1ca5a0a4..90721da0a0e 100644
--- a/be/src/vec/columns/column_nothing.h
+++ b/be/src/vec/columns/column_nothing.h
@@ -37,11 +37,11 @@ private:
}
Field operator[](size_t) const override { return {}; }
void get(size_t, Field& f) const override { f = {}; }
- void insert(const Field&) override { ++s; }
public:
std::string get_name() const override { return "Nothing"; }
MutableColumnPtr clone_dummy(size_t s_) const override { return
ColumnNothing::create(s_); }
+ void insert(const Field&) override { ++s; }
bool structure_equals(const IColumn& rhs) const override {
return typeid(rhs) == typeid(ColumnNothing);
diff --git a/be/src/vec/columns/column_nullable.cpp
b/be/src/vec/columns/column_nullable.cpp
index cf737154bf9..f0cf6943c09 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -331,6 +331,16 @@ void ColumnNullable::insert(const Field& x) {
}
}
+void ColumnNullable::insert_duplicate_fields(const Field& x, const size_t n) {
+ if (x.is_null()) {
+ get_nested_column().insert_many_defaults(n);
+ get_null_map_column().insert_many_vals(1, n);
+ } else {
+ get_nested_column().insert_duplicate_fields(x, n);
+ get_null_map_column().insert_many_vals(0, n);
+ }
+}
+
void ColumnNullable::insert_from(const IColumn& src, size_t n) {
const auto& src_concrete = assert_cast<const ColumnNullable&>(src);
get_nested_column().insert_from(src_concrete.get_nested_column(), n);
diff --git a/be/src/vec/columns/column_nullable.h
b/be/src/vec/columns/column_nullable.h
index e601147b72c..48a57755473 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -138,6 +138,7 @@ public:
const uint32_t* indices_end);
void insert(const Field& x) override;
+ void insert_duplicate_fields(const Field& x, const size_t n) override;
void insert_from(const IColumn& src, size_t n) override;
void insert_many_from(const IColumn& src, size_t position, size_t length)
override;
diff --git a/be/src/vec/columns/predicate_column.h
b/be/src/vec/columns/predicate_column.h
index 0fb8192f533..106e8fa9b2d 100644
--- a/be/src/vec/columns/predicate_column.h
+++ b/be/src/vec/columns/predicate_column.h
@@ -319,6 +319,28 @@ public:
return this->create();
}
+ void insert_duplicate_fields(const Field& x, const size_t n) override {
+ if constexpr (is_string_type(Type)) {
+ const auto& str = x.get<TYPE_STRING>();
+ auto* dst = _arena.alloc(str.size() * n);
+ for (size_t i = 0; i < n; i++) {
+ memcpy(dst, str.data(), str.size());
+ insert_string_value(dst, str.size());
+ dst += i * str.size();
+ }
+ } else if constexpr (Type == TYPE_LARGEINT) {
+ const auto& v = x.get<TYPE_LARGEINT>();
+ for (size_t i = 0; i < n; i++) {
+ insert_in_copy_way(reinterpret_cast<const char*>(&v),
sizeof(v));
+ }
+ } else {
+ const auto& v = x.get<Type>();
+ for (size_t i = 0; i < n; i++) {
+ insert_default_type(reinterpret_cast<const char*>(&v),
sizeof(v));
+ }
+ }
+ }
+
void insert(const Field& x) override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"insert not supported in PredicateColumnType");
diff --git a/be/src/vec/common/cow.h b/be/src/vec/common/cow.h
index 2f40a736fda..127fb25cf6a 100644
--- a/be/src/vec/common/cow.h
+++ b/be/src/vec/common/cow.h
@@ -396,7 +396,8 @@ public:
*/
namespace vectorized {
class IColumn;
-}
+class Field;
+} // namespace vectorized
template <typename Base, typename Derived>
class COWHelper : public Base {
public:
@@ -406,6 +407,7 @@ public:
using MutablePtr = typename Base::template mutable_ptr<Derived>;
#include "common/compile_check_avoid_begin.h"
+
//This code uses templates, and errors like the following are likely to
occur, mainly due to literal type mismatches:
// be/src/vec/common/cow.h:409:39: warning: implicit conversion loses
integer precision: 'int' to 'value_type' (aka 'unsigned char')
[-Wimplicit-int-conversion]
// 409 | return MutablePtr(new
Derived(std::forward<Args>(args)...));
@@ -430,6 +432,9 @@ public:
const typename Base::Selector& selector)
const override {
this->template append_data_by_selector_impl<Derived>(res, selector);
}
+ void insert_duplicate_fields(const vectorized::Field& x, const size_t n)
override {
+ this->template insert_impl<Derived>(x, n);
+ }
void append_data_by_selector(typename Base::MutablePtr& res,
const typename Base::Selector& selector,
size_t begin,
diff --git a/be/src/vec/data_types/data_type_factory.hpp
b/be/src/vec/data_types/data_type_factory.hpp
index 1b15fb6aff0..e636d216a73 100644
--- a/be/src/vec/data_types/data_type_factory.hpp
+++ b/be/src/vec/data_types/data_type_factory.hpp
@@ -66,8 +66,8 @@ public:
// Nullable will be not consistent with `raw_type.is_nullable` in
SlotDescriptor.
DataTypePtr create_data_type(const TTypeDesc& raw_type, bool is_nullable);
- DataTypePtr create_data_type(const FieldType& type, int precision, int
scale) {
- return _create_primitive_data_type(type, precision, scale, -1);
+ DataTypePtr create_data_type(const FieldType& type, int precision, int
scale, int len = -1) {
+ return _create_primitive_data_type(type, precision, scale, len);
}
// Create DataType by PrimitiveType (only for naive types)
DataTypePtr create_data_type(const PrimitiveType primitive_type, bool
is_nullable,
diff --git a/be/src/vec/data_types/data_type_fixed_length_object.h
b/be/src/vec/data_types/data_type_fixed_length_object.h
index e7e5e4c5917..1cc7e8f21a6 100644
--- a/be/src/vec/data_types/data_type_fixed_length_object.h
+++ b/be/src/vec/data_types/data_type_fixed_length_object.h
@@ -72,7 +72,7 @@ public:
using SerDeType = DataTypeFixedLengthObjectSerDe;
DataTypeSerDeSPtr get_serde(int nesting_level = 1) const override {
- return std::make_shared<SerDeType>(nesting_level);
+ return std::make_shared<SerDeType>(PType, nesting_level);
};
};
diff --git a/be/src/vec/data_types/data_type_string.h
b/be/src/vec/data_types/data_type_string.h
index 2982b755f35..b01501dd03b 100644
--- a/be/src/vec/data_types/data_type_string.h
+++ b/be/src/vec/data_types/data_type_string.h
@@ -81,7 +81,7 @@ public:
bool equals(const IDataType& rhs) const override;
using SerDeType = DataTypeStringSerDe;
DataTypeSerDeSPtr get_serde(int nesting_level = 1) const override {
- return std::make_shared<SerDeType>(nesting_level);
+ return std::make_shared<SerDeType>(_primitive_type, nesting_level,
_len);
};
bool is_char_type() const { return _primitive_type ==
PrimitiveType::TYPE_CHAR; }
int len() const { return _len; }
diff --git a/be/src/vec/data_types/serde/data_type_bitmap_serde.cpp
b/be/src/vec/data_types/serde/data_type_bitmap_serde.cpp
index 73a9b343b5a..646652bfe31 100644
--- a/be/src/vec/data_types/serde/data_type_bitmap_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_bitmap_serde.cpp
@@ -224,8 +224,8 @@ Status DataTypeBitMapSerDe::from_string(StringRef& str,
IColumn& column,
return deserialize_one_cell_from_json(column, slice, options);
}
-Status DataTypeBitMapSerDe::from_string(const std::string& str, Field& field,
- const FormatOptions& options) const {
+Status DataTypeBitMapSerDe::from_olap_string(const std::string& str, Field&
field,
+ const FormatOptions& options)
const {
BitmapValue value;
if (!value.deserialize(str.data())) {
return Status::InternalError("deserialize BITMAP from string fail!");
diff --git a/be/src/vec/data_types/serde/data_type_bitmap_serde.h
b/be/src/vec/data_types/serde/data_type_bitmap_serde.h
index cb41ef2dc76..a8b388b1ac6 100644
--- a/be/src/vec/data_types/serde/data_type_bitmap_serde.h
+++ b/be/src/vec/data_types/serde/data_type_bitmap_serde.h
@@ -38,8 +38,8 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int64_t row_num,
BufferWritable& bw,
FormatOptions& options) const override;
diff --git a/be/src/vec/data_types/serde/data_type_date_or_datetime_serde.cpp
b/be/src/vec/data_types/serde/data_type_date_or_datetime_serde.cpp
index b7a29f82759..2c0fefdfca9 100644
--- a/be/src/vec/data_types/serde/data_type_date_or_datetime_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_date_or_datetime_serde.cpp
@@ -397,8 +397,8 @@ Status DataTypeDateSerDe<T>::from_string(StringRef& str,
IColumn& column,
}
template <PrimitiveType T>
-Status DataTypeDateSerDe<T>::from_string(const std::string& str, Field& field,
- const FormatOptions& options) const {
+Status DataTypeDateSerDe<T>::from_olap_string(const std::string& str, Field&
field,
+ const FormatOptions& options)
const {
CastParameters params {.status = Status::OK(), .is_strict = false};
CppType res;
diff --git a/be/src/vec/data_types/serde/data_type_date_or_datetime_serde.h
b/be/src/vec/data_types/serde/data_type_date_or_datetime_serde.h
index 3360aaf6a7c..1ebd23a50ee 100644
--- a/be/src/vec/data_types/serde/data_type_date_or_datetime_serde.h
+++ b/be/src/vec/data_types/serde/data_type_date_or_datetime_serde.h
@@ -51,8 +51,8 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
index 5d47125fb11..6cc0958e5e1 100644
--- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
@@ -122,8 +122,8 @@ Status DataTypeDateTimeV2SerDe::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
-Status DataTypeDateTimeV2SerDe::from_string(const std::string& str, Field&
field,
- const FormatOptions& options)
const {
+Status DataTypeDateTimeV2SerDe::from_olap_string(const std::string& str,
Field& field,
+ const FormatOptions& options)
const {
CastParameters params {.status = Status::OK(), .is_strict = false};
DateV2Value<DateTimeV2ValueType> res;
diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
index a903dc2bd2d..5d7f4bed8ec 100644
--- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
+++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
@@ -41,8 +41,8 @@ public:
Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const final;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status from_string_strict_mode_batch(const ColumnString& str, IColumn&
column,
const FormatOptions& options,
diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
index a4fb0f8f8ee..1a7a9fed56a 100644
--- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
@@ -229,8 +229,8 @@ Status DataTypeDateV2SerDe::from_string_batch(const
ColumnString& col_str, Colum
return Status::OK();
}
-Status DataTypeDateV2SerDe::from_string(const std::string& str, Field& field,
- const FormatOptions& options) const {
+Status DataTypeDateV2SerDe::from_olap_string(const std::string& str, Field&
field,
+ const FormatOptions& options)
const {
CastParameters params {.status = Status::OK(), .is_strict = false};
DateV2Value<DateV2ValueType> res;
diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h
b/be/src/vec/data_types/serde/data_type_datev2_serde.h
index 173ce75e6bc..4b64862c921 100644
--- a/be/src/vec/data_types/serde/data_type_datev2_serde.h
+++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h
@@ -40,8 +40,8 @@ public:
Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const final;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status from_string_strict_mode_batch(const ColumnString& str, IColumn&
column,
const FormatOptions& options,
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index b3cfd6b88b6..5006d7937f2 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -126,15 +126,17 @@ Status DataTypeDecimalSerDe<T>::from_string(StringRef&
str, IColumn& column,
}
template <PrimitiveType T>
-Status DataTypeDecimalSerDe<T>::from_string(const std::string& str, Field&
field,
- const FormatOptions& options)
const {
+Status DataTypeDecimalSerDe<T>::from_olap_string(const std::string& str,
Field& field,
+ const FormatOptions& options)
const {
FieldType to;
CastParameters params;
params.is_strict = false;
- auto arg_precision = static_cast<UInt32>(precision);
-
- if (!CastToDecimal::from_string(StringRef(str), to, arg_precision, 0,
params)) {
+ // Decimal string in storage is saved as an integer. The scale is
maintained by data type, so we
+ // can just parse the string as an integer here.
+ if (!CastToDecimal::from_string(StringRef(str), to,
static_cast<UInt32>(precision),
+ options.ignore_scale ? 0 :
static_cast<UInt32>(scale),
+ params)) {
return Status::InvalidArgument("parse Decimal fail, string: '{}'",
str);
}
field = Field::create_field<T>(std::move(to));
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h
b/be/src/vec/data_types/serde/data_type_decimal_serde.h
index 713c786c090..1ba23084034 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.h
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h
@@ -58,8 +58,8 @@ public:
Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const override;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status from_string_strict_mode_batch(
const ColumnString& str, IColumn& column, const FormatOptions&
options,
diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.cpp
b/be/src/vec/data_types/serde/data_type_hll_serde.cpp
index 2210a607adc..80702bd3012 100644
--- a/be/src/vec/data_types/serde/data_type_hll_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_hll_serde.cpp
@@ -220,8 +220,8 @@ Status DataTypeHLLSerDe::from_string(StringRef& str,
IColumn& column,
return deserialize_one_cell_from_json(column, slice, options);
}
-Status DataTypeHLLSerDe::from_string(const std::string& str, Field& field,
- const FormatOptions& options) const {
+Status DataTypeHLLSerDe::from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const {
HyperLogLog hyper_log_log(Slice(str.data(), str.size()));
field = Field::create_field<TYPE_HLL>(std::move(hyper_log_log));
return Status::OK();
diff --git a/be/src/vec/data_types/serde/data_type_hll_serde.h
b/be/src/vec/data_types/serde/data_type_hll_serde.h
index 1b66c7dcd2f..d39cece675d 100644
--- a/be/src/vec/data_types/serde/data_type_hll_serde.h
+++ b/be/src/vec/data_types/serde/data_type_hll_serde.h
@@ -39,8 +39,8 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int64_t row_num,
BufferWritable& bw,
FormatOptions& options) const override;
diff --git a/be/src/vec/data_types/serde/data_type_ipv4_serde.cpp
b/be/src/vec/data_types/serde/data_type_ipv4_serde.cpp
index 2ddff0dc226..d11e8ceeac7 100644
--- a/be/src/vec/data_types/serde/data_type_ipv4_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_ipv4_serde.cpp
@@ -177,8 +177,8 @@ Status DataTypeIPv4SerDe::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
-Status DataTypeIPv4SerDe::from_string(const std::string& str, Field& field,
- const FormatOptions& options) const {
+Status DataTypeIPv4SerDe::from_olap_string(const std::string& str, Field&
field,
+ const FormatOptions& options) const
{
CastParameters params;
params.is_strict = false;
diff --git a/be/src/vec/data_types/serde/data_type_ipv4_serde.h
b/be/src/vec/data_types/serde/data_type_ipv4_serde.h
index 424066fc34e..28289b7f5b7 100644
--- a/be/src/vec/data_types/serde/data_type_ipv4_serde.h
+++ b/be/src/vec/data_types/serde/data_type_ipv4_serde.h
@@ -65,8 +65,8 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
diff --git a/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp
b/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp
index 7e8426e4fb2..69bb8de613a 100644
--- a/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp
@@ -275,8 +275,8 @@ Status DataTypeIPv6SerDe::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
-Status DataTypeIPv6SerDe::from_string(const std::string& str, Field& field,
- const FormatOptions& options) const {
+Status DataTypeIPv6SerDe::from_olap_string(const std::string& str, Field&
field,
+ const FormatOptions& options) const
{
CastParameters params;
params.is_strict = false;
diff --git a/be/src/vec/data_types/serde/data_type_ipv6_serde.h
b/be/src/vec/data_types/serde/data_type_ipv6_serde.h
index 176079d50d3..e53bd1dcb5d 100644
--- a/be/src/vec/data_types/serde/data_type_ipv6_serde.h
+++ b/be/src/vec/data_types/serde/data_type_ipv6_serde.h
@@ -76,8 +76,8 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h
b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
index 7e29db5bae7..97c61eb85b8 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
@@ -34,7 +34,7 @@ class Arena;
class DataTypeJsonbSerDe : public DataTypeStringSerDe {
public:
- DataTypeJsonbSerDe(int nesting_level = 1) :
DataTypeStringSerDe(nesting_level) {};
+ DataTypeJsonbSerDe(int nesting_level = 1) :
DataTypeStringSerDe(TYPE_JSONB, nesting_level) {};
std::string get_name() const override { return "JSONB"; }
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
index 5e874b8fcbb..6544ea69541 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
@@ -491,10 +491,9 @@ Status DataTypeNullableSerDe::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
-Status DataTypeNullableSerDe::from_string(const std::string& str, Field& field,
- const FormatOptions& options) const {
- auto st = nested_serde->from_string(str, field, options);
- if (!st.ok()) {
+Status DataTypeNullableSerDe::from_olap_string(const std::string& str, Field&
field,
+ const FormatOptions& options)
const {
+ if (!nested_serde->from_olap_string(str, field, options).ok()) {
// fill null if fail
field = Field();
return Status::OK();
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h
b/be/src/vec/data_types/serde/data_type_nullable_serde.h
index 48d477e2169..4bac47ea52b 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.h
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h
@@ -40,8 +40,8 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index 6f891c406f0..7b000c1022d 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -746,8 +746,8 @@ Status DataTypeNumberSerDe<T>::from_string(StringRef& str,
IColumn& column,
}
template <PrimitiveType T>
-Status DataTypeNumberSerDe<T>::from_string(const std::string& str, Field&
field,
- const FormatOptions& options) const
{
+Status DataTypeNumberSerDe<T>::from_olap_string(const std::string& str, Field&
field,
+ const FormatOptions& options)
const {
typename PrimitiveTypeTraits<T>::CppType val;
CastParameters params;
params.is_strict = false;
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h
b/be/src/vec/data_types/serde/data_type_number_serde.h
index 096837ed08a..ef075c541ff 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.h
+++ b/be/src/vec/data_types/serde/data_type_number_serde.h
@@ -62,8 +62,8 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.cpp
b/be/src/vec/data_types/serde/data_type_quantilestate_serde.cpp
index b8c0e0b0002..2387228d425 100644
--- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_quantilestate_serde.cpp
@@ -21,8 +21,8 @@
namespace doris::vectorized {
-Status DataTypeQuantileStateSerDe::from_string(const std::string& str, Field&
field,
- const FormatOptions& options)
const {
+Status DataTypeQuantileStateSerDe::from_olap_string(const std::string& str,
Field& field,
+ const FormatOptions&
options) const {
QuantileState value;
if (!value.deserialize(Slice(str.data(), str.size()))) {
return Status::InternalError("deserialize QuantileState from string
fail!");
diff --git a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h
b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h
index c32e788bf5e..9803066d937 100644
--- a/be/src/vec/data_types/serde/data_type_quantilestate_serde.h
+++ b/be/src/vec/data_types/serde/data_type_quantilestate_serde.h
@@ -59,8 +59,8 @@ public:
return Status::OK();
}
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status serialize_column_to_json(const IColumn& column, int64_t start_idx,
int64_t end_idx,
BufferWritable& bw, FormatOptions&
options) const override {
diff --git a/be/src/vec/data_types/serde/data_type_serde.h
b/be/src/vec/data_types/serde/data_type_serde.h
index 8f4eaad3989..b67cdee9180 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -209,6 +209,12 @@ public:
const cctz::time_zone* timezone = nullptr;
+ /**
+ * Ignore scale when converting decimal to string, because decimal in
zone map is stored in
+ * unscaled value.
+ */
+ bool ignore_scale = false;
+
[[nodiscard]] char get_collection_delimiter(
int hive_text_complex_type_delimiter_level) const {
CHECK(0 <= hive_text_complex_type_delimiter_level &&
@@ -315,9 +321,11 @@ public:
const FormatOptions& options) const {
return Status::NotSupported("from_string is not supported");
}
- virtual Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const {
- return Status::NotSupported("from_string is not supported");
+ // Convert string which is read from OLAP table to corresponding type.
+ // Only used for basic data types, such as Ip, Date, Number, etc.
+ virtual Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const {
+ return Status::NotSupported("from_olap_string is not supported");
}
// For strict mode, we should not have nullable columns, as we will
directly report errors when string conversion fails instead of handling them
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.cpp
b/be/src/vec/data_types/serde/data_type_string_serde.cpp
index 15c4df8ff52..c4d6dd60635 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_string_serde.cpp
@@ -455,9 +455,16 @@ Status
DataTypeStringSerDeBase<ColumnType>::from_string(StringRef& str, IColumn&
}
template <typename ColumnType>
-Status DataTypeStringSerDeBase<ColumnType>::from_string(const std::string&
str, Field& field,
- const FormatOptions&
options) const {
- field = Field::create_field<TYPE_STRING>(str);
+Status DataTypeStringSerDeBase<ColumnType>::from_olap_string(const
std::string& str, Field& field,
+ const
FormatOptions& options) const {
+ if (cast_set<int>(str.size()) < _len) {
+ DCHECK_EQ(_type, TYPE_CHAR);
+ std::string tmp(_len, '\0');
+ memcpy(tmp.data(), str.data(), str.size());
+ field = Field::create_field<TYPE_CHAR>(std::move(tmp));
+ } else {
+ field = Field::create_field<TYPE_STRING>(str);
+ }
return Status::OK();
}
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h
b/be/src/vec/data_types/serde/data_type_string_serde.h
index d718ea47088..5603ba9df90 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -95,14 +95,15 @@ class DataTypeStringSerDeBase : public DataTypeSerDe {
using ColumnStrType = ColumnType;
public:
- DataTypeStringSerDeBase(int nesting_level = 1) :
DataTypeSerDe(nesting_level) {};
+ DataTypeStringSerDeBase(PrimitiveType type, int nesting_level = 1, int len
= -1)
+ : DataTypeSerDe(nesting_level), _type(type), _len(len) {}
std::string get_name() const override { return "String"; }
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status serialize_one_cell_to_json(const IColumn& column, int64_t row_num,
BufferWritable& bw,
FormatOptions& options) const override;
@@ -262,6 +263,10 @@ public:
void to_string(const IColumn& column, size_t row_num, BufferWritable& bw,
const FormatOptions& options) const override;
+
+private:
+ const PrimitiveType _type;
+ const int _len = -1;
};
using DataTypeStringSerDe = DataTypeStringSerDeBase<ColumnString>;
diff --git a/be/src/vec/data_types/serde/data_type_time_serde.cpp
b/be/src/vec/data_types/serde/data_type_time_serde.cpp
index 891ffe198cf..787540f7e72 100644
--- a/be/src/vec/data_types/serde/data_type_time_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_time_serde.cpp
@@ -111,8 +111,8 @@ Status DataTypeTimeV2SerDe::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
-Status DataTypeTimeV2SerDe::from_string(const std::string& str, Field& field,
- const FormatOptions& options) const {
+Status DataTypeTimeV2SerDe::from_olap_string(const std::string& str, Field&
field,
+ const FormatOptions& options)
const {
CastParameters params {.status = Status::OK(), .is_strict = false};
// set false to `is_strict`, it will not set error code cuz we dont need
then speed up the process.
// then we rely on return value to check success.
diff --git a/be/src/vec/data_types/serde/data_type_time_serde.h
b/be/src/vec/data_types/serde/data_type_time_serde.h
index 970d5beb3c8..227e8daa02d 100644
--- a/be/src/vec/data_types/serde/data_type_time_serde.h
+++ b/be/src/vec/data_types/serde/data_type_time_serde.h
@@ -39,8 +39,8 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status from_string_strict_mode(StringRef& str, IColumn& column,
const FormatOptions& options) const
override;
diff --git a/be/src/vec/data_types/serde/data_type_timestamptz_serde.cpp
b/be/src/vec/data_types/serde/data_type_timestamptz_serde.cpp
index 5c23537eb81..47dd3f3ca75 100644
--- a/be/src/vec/data_types/serde/data_type_timestamptz_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_timestamptz_serde.cpp
@@ -42,8 +42,8 @@ Status DataTypeTimeStampTzSerDe::from_string(StringRef& str,
IColumn& column,
return Status::OK();
}
-Status DataTypeTimeStampTzSerDe::from_string(const std::string& str, Field&
field,
- const FormatOptions& options)
const {
+Status DataTypeTimeStampTzSerDe::from_olap_string(const std::string& str,
Field& field,
+ const FormatOptions&
options) const {
CastParameters params {.status = Status::OK(), .is_strict = false};
TimestampTzValue res;
diff --git a/be/src/vec/data_types/serde/data_type_timestamptz_serde.h
b/be/src/vec/data_types/serde/data_type_timestamptz_serde.h
index 34926787460..1457942c6a4 100644
--- a/be/src/vec/data_types/serde/data_type_timestamptz_serde.h
+++ b/be/src/vec/data_types/serde/data_type_timestamptz_serde.h
@@ -36,8 +36,8 @@ public:
Status from_string(StringRef& str, IColumn& column,
const FormatOptions& options) const override;
- Status from_string(const std::string& str, Field& field,
- const FormatOptions& options) const override;
+ Status from_olap_string(const std::string& str, Field& field,
+ const FormatOptions& options) const override;
Status from_string_batch(const ColumnString& str, ColumnNullable& column,
const FormatOptions& options) const override;
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 4496f300d76..4711f62a203 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -477,7 +477,7 @@ Status CsvReader::_deserialize_nullable_string(IColumn&
column, Slice& slice) {
return Status::OK();
}
}
- static DataTypeStringSerDe stringSerDe;
+ static DataTypeStringSerDe stringSerDe(TYPE_STRING);
auto st =
stringSerDe.deserialize_one_cell_from_csv(null_column.get_nested_column(),
slice,
_options);
if (!st.ok()) {
diff --git a/be/src/vec/exec/format/text/text_reader.cpp
b/be/src/vec/exec/format/text/text_reader.cpp
index bf5c072f432..16b28850ce2 100644
--- a/be/src/vec/exec/format/text/text_reader.cpp
+++ b/be/src/vec/exec/format/text/text_reader.cpp
@@ -169,7 +169,7 @@ Status TextReader::_deserialize_nullable_string(IColumn&
column, Slice& slice) {
null_column.insert_data(nullptr, 0);
return Status::OK();
}
- static DataTypeStringSerDe stringSerDe;
+ static DataTypeStringSerDe stringSerDe(TYPE_STRING);
auto st =
stringSerDe.deserialize_one_cell_from_hive_text(null_column.get_nested_column(),
slice, _options);
if (!st.ok()) {
diff --git a/be/src/vec/olap/vgeneric_iterators.cpp
b/be/src/vec/olap/vgeneric_iterators.cpp
index 314c1f0acbb..4966d5e64e4 100644
--- a/be/src/vec/olap/vgeneric_iterators.cpp
+++ b/be/src/vec/olap/vgeneric_iterators.cpp
@@ -80,6 +80,15 @@ Status VStatisticsIterator::next_batch(Block* block) {
} else {
for (int i = 0; i < columns.size(); ++i) {
RETURN_IF_ERROR(_column_iterators[i]->next_batch_of_zone_map(&size,
columns[i]));
+ if (auto cid = _schema.column_id(i);
+ _schema.column(cid)->type() ==
FieldType::OLAP_FIELD_TYPE_CHAR) {
+ auto col = columns[i]->clone_empty();
+ for (size_t j = 0; j < columns[i]->size(); ++j) {
+ const auto& ref =
columns[i]->get_data_at(j).trim_tail_padding_zero();
+
col->insert(Field::create_field<TYPE_CHAR>(ref.to_string()));
+ }
+ columns[i].swap(col);
+ }
}
}
block->set_columns(std::move(columns));
diff --git a/be/test/vec/data_types/from_string_test.cpp
b/be/test/vec/data_types/from_string_test.cpp
index 851c6d3b4e8..a10c803e19c 100644
--- a/be/test/vec/data_types/from_string_test.cpp
+++ b/be/test/vec/data_types/from_string_test.cpp
@@ -17,7 +17,6 @@
#include "gtest/gtest_pred_impl.h"
#include "olap/olap_common.h"
-#include "olap/wrapper_field.h"
#include "vec/columns/column.h"
#include "vec/core/field.h"
#include "vec/data_types/data_type.h"
@@ -27,117 +26,70 @@
namespace doris::vectorized {
/**
- * This test is used to check wrapperField from_string is equal to data type
from_string or not
- * same string feed to wrapperField and data type from_string, and check the
result from
- * wrapperField and data type to_string is equal or not
+ * This test is used to check data type from_string behavior
+ * same string feed to data type from_string, and check the result
*/
-TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
+TEST(FromStringTest, ScalaDataTypeFromString) {
// arithmetic scala field types
{
- // fieldType, test_string, expect_wrapper_field_string,
expect_data_type_string
- using FieldType_RandStr = std::tuple<FieldType,
std::vector<std::string>,
- std::vector<std::string>,
std::vector<std::string>>;
+ // fieldType, test_string, expect_data_type_string
+ using FieldType_RandStr =
+ std::tuple<FieldType, std::vector<std::string>,
std::vector<std::string>>;
std::vector<FieldType_RandStr> arithmetic_scala_field_types = {
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_BOOL, {"0", "1",
"-9"},
- {"0", "1", "1"}, {"0", "1", ""}),
+ {"0", "1", ""}),
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_TINYINT, {"127",
"-128", "-190"},
- {"127", "-128", "66"}, {"127", "-128", ""}),
- // here if it has overflow , wrapper field will return make
max/min value, but data type will just throw error
+ {"127", "-128", ""}),
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_SMALLINT,
{"32767", "32768", "-32769"},
- {"32767", "-32768", "32767"}, {"32767", "",
""}),
- // here if it has overflow , wrapper field will return make
max/min value, but data type will just throw error
- FieldType_RandStr(
- FieldType::OLAP_FIELD_TYPE_INT, {"2147483647",
"2147483648", "-2147483649"},
- {"2147483647", "-2147483648", "2147483647"},
{"2147483647", "", ""}),
- // float ==> float32(32bit)
- // here if it has overflow , wrapper field will return make
max/min value, but data type will just throw error
- FieldType_RandStr(
- FieldType::OLAP_FIELD_TYPE_FLOAT, {"1.123",
"3.40282e+38", "3.40282e+38+1"},
- {"1.123", "3.40282e+38", "3.40282e+38"}, {"1.123",
"3.40282e+38", ""}),
- // double ==> float64(64bit)
- // here if it has overflow , wrapper field will return make
max/min value, but data type will just throw error
+ {"32767", "", ""}),
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_INT,
+ {"2147483647", "2147483648", "-2147483649"},
+ {"2147483647", "", ""}),
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_FLOAT,
+ {"1.123", "3.40282e+38", "3.40282e+38+1"},
+ {"1.123", "3.40282e+38", ""}),
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DOUBLE,
{"2343.12345465746", "2.22507e-308",
"2.22507e-308-1"},
- {"2343.12345465746", "2.22507e-308",
"2.22507e-308"},
{"2343.12345465746", "2.22507e-308", ""}),
- // BIGINT ==> int64_t(64bit)
- // here if it has overflow , wrapper field will return make
max/min value, but data type will just throw error
FieldType_RandStr(
FieldType::OLAP_FIELD_TYPE_BIGINT,
{"9223372036854775807", "-9223372036854775808",
"9223372036854775808"},
- {"9223372036854775807", "-9223372036854775808",
"9223372036854775807"},
{"9223372036854775807", "-9223372036854775808", ""}),
- // LARGEINT ==> int128_t(128bit)
- // here if it has overflow , wrapper field will return 0, but
data type will just throw error
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_LARGEINT,
{"170141183460469231731687303715884105727",
"−170141183460469231731687303715884105728",
"170141183460469231731687303715884105728"},
- {"170141183460469231731687303715884105727",
"0", "0"},
{"170141183460469231731687303715884105727",
"", ""}),
- FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_CHAR, {"amory
happy"}, {"amory happy"},
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_CHAR, {"amory
happy"},
{"amory happy"}),
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_VARCHAR, {"doris
be better"},
- {"doris be better"}, {"doris be better"}),
+ {"doris be better"}),
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, {"doris
be better"},
- {"doris be better"}, {"doris be better"}),
- // Decimal parse using StringParser which has
SUCCESS|OVERFLOW|UNDERFLOW|FAILURE
- // wrapper_field from_string(scale) and data_type
from_string(scale) use rounding when meet underflow,
- // wrapper_field use min/max when meet overflow, but
data_type just throw error
+ {"doris be better"}),
FieldType_RandStr(
- // decimalv2 will ignore the scale and precision when
parse string
FieldType::OLAP_FIELD_TYPE_DECIMAL,
{
"012345678901234567.012345678",
- // (18, 8)
"123456789012345678.01234567",
- // (17, 10)
"12345678901234567.0123456779",
- // (17, 11)
"12345678901234567.01234567791",
- // (19, 8)
"1234567890123456789.01234567",
},
- {"12345678901234567.012345678",
"123456789012345678.012345670",
- "12345678901234567.012345677",
"12345678901234567.012345677",
- "999999999999999999.999999999"},
{"12345678901234567.012345678",
"123456789012345678.012345670",
"12345678901234567.012345678",
"12345678901234567.012345678", ""}),
- // decimal32 ==> decimal32(9,2)
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
- // (7,2) (6,3) (7,3)
(8,1)
{"1234567.12", "123456.123", "1234567.123"},
- //StringParser res: SUCCESS UNDERFLOW
UNDERFLOW OVERFLOW
- {"123456712", "12345612", "123456712"},
{"1234567.12", "123456.12", "1234567.12"}),
- // decimal64 ==> decimal64(18,9)
- FieldType_RandStr(
- FieldType::OLAP_FIELD_TYPE_DECIMAL64,
- //(9, 9) (3,2) (9, 10)
- {"123456789.123456789", "123.12",
"123456789.0123456789"},
- //StringParser res: SUCCESS SUCCESS
UNDERFLOW OVERFLOW
- {"123456789123456789", "123120000000",
"123456789012345679"},
- {"123456789.123456789", "123.120000000",
"123456789.012345679"}),
- // decimal128I ==> decimal128I(38,18)
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL64,
+ {"123456789.123456789", "123.12",
"123456789.0123456789"},
+ {"123456789.123456789", "123.120000000",
"123456789.012345679"}),
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL128I,
- // (19,18) ==> StringParser::SUCCESS
{"01234567890123456789.123456789123456789",
- // (20,11) ==> StringParser::SUCCESS
"12345678901234567890.12345678911",
- // (19,18) ==> StringParser::SUCCESS
"1234567890123456789.123456789123456789",
- // (19,19) ==> StringParser::UNDERFLOW
"1234567890123456789.1234567890123456789",
- // (18, 20) ==> StringParser::UNDERFLOW
"123456789012345678.01234567890123456789",
- // (20, 19) ==> StringParser::UNDERFLOW
"12345678901234567890.1234567890123456789"},
- {"1234567890123456789123456789123456789",
- "12345678901234567890123456789110000000",
- "1234567890123456789123456789123456789",
- "1234567890123456789123456789012345679",
- "123456789012345678012345678901234568",
- "12345678901234567890123456789012345679"},
{"1234567890123456789.123456789123456789",
"12345678901234567890.123456789110000000",
"1234567890123456789.123456789123456789",
@@ -149,46 +101,23 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
for (auto type_pair : arithmetic_scala_field_types) {
auto type = std::get<0>(type_pair);
DataTypePtr data_type_ptr;
- int precision = 0;
- int scale = 0;
if (type == FieldType::OLAP_FIELD_TYPE_DECIMAL) {
data_type_ptr =
DataTypeFactory::instance().create_data_type(type, 27, 9);
- precision = 27;
- scale = 9;
} else if (type == FieldType::OLAP_FIELD_TYPE_DECIMAL32) {
// decimal32(7, 2)
data_type_ptr =
DataTypeFactory::instance().create_data_type(type, 9, 2);
- precision = 9;
- scale = 2;
} else if (type == FieldType::OLAP_FIELD_TYPE_DECIMAL64) {
// decimal64(18, 9)
data_type_ptr =
DataTypeFactory::instance().create_data_type(type, 18, 9);
- precision = 18;
- scale = 9;
} else if (type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I) {
// decimal128I(38,18)
data_type_ptr =
DataTypeFactory::instance().create_data_type(type, 38, 18);
- precision = 38;
- scale = 18;
} else {
data_type_ptr =
DataTypeFactory::instance().create_data_type(type, 0, 0);
}
std::cout << "this type is " << data_type_ptr->get_name() << ": "
<< fmt::format("{}", type) << std::endl;
- // wrapper_field
- for (int i = 0; i < std::get<1>(type_pair).size(); ++i) {
- std::string test_str = std::get<1>(type_pair)[i];
- std::unique_ptr<WrapperField>
wf(WrapperField::create_by_type(type));
- std::cout << "the ith : " << i << " test_str: " << test_str <<
std::endl;
- // from_string
- Status st = wf->from_string(test_str, precision, scale);
- EXPECT_EQ(st.ok(), true);
- // wrapper field to_string is only for debug
- std::string wfs = wf->to_string();
- EXPECT_EQ(wfs, std::get<2>(type_pair)[i]) << int(type);
- }
-
auto col = data_type_ptr->create_column();
// data_type
for (int i = 0; i < std::get<1>(type_pair).size(); ++i) {
@@ -197,7 +126,7 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
// data_type from_string
StringRef rb_test(test_str.data(), test_str.size());
Status st = data_type_ptr->from_string(rb_test, col.get());
- if (std::get<3>(type_pair)[i].empty()) {
+ if (std::get<2>(type_pair)[i].empty()) {
EXPECT_EQ(st.ok(), false);
std::cout << "deserialize failed: " << st.to_json() <<
std::endl;
continue;
@@ -205,23 +134,39 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
EXPECT_EQ(st.ok(), true);
// data_type to_string
std::string min_s_d = data_type_ptr->to_string(*col, i);
- EXPECT_EQ(min_s_d, std::get<3>(type_pair)[i]);
+ EXPECT_EQ(min_s_d, std::get<2>(type_pair)[i]);
}
}
}
// date and datetime type
{
- using FieldType_RandStr = std::pair<FieldType, std::string>;
- std::vector<FieldType_RandStr> date_scala_field_types = {
- FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DATE,
"2020-01-01"),
- FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DATEV2,
"2020-01-01"),
- FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DATETIME,
"2020-01-01 12:00:00"),
- FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DATETIMEV2,
- "2020-01-01 12:00:00.666666"),
+ struct DateTestField {
+ FieldType type;
+ std::string str;
+ std::string min_str;
+ std::string max_str;
+ };
+ std::vector<DateTestField> date_scala_field_types = {
+ DateTestField {.type = FieldType::OLAP_FIELD_TYPE_DATE,
+ .str = "2020-01-01",
+ .min_str = "0001-01-01",
+ .max_str = "9999-12-31"},
+ DateTestField {.type = FieldType::OLAP_FIELD_TYPE_DATEV2,
+ .str = "2020-01-01",
+ .min_str = "0001-01-01",
+ .max_str = "9999-12-31"},
+ DateTestField {.type = FieldType::OLAP_FIELD_TYPE_DATETIME,
+ .str = "2020-01-01 12:00:00",
+ .min_str = "0001-01-01 00:00:00",
+ .max_str = "9999-12-31 23:59:59"},
+ DateTestField {.type = FieldType::OLAP_FIELD_TYPE_DATETIMEV2,
+ .str = "2020-01-01 12:00:00.666666",
+ .min_str = "0001-01-01 00:00:00",
+ .max_str = "9999-12-31 23:59:59.000000"},
};
for (auto pair : date_scala_field_types) {
- auto type = pair.first;
+ auto type = pair.type;
DataTypePtr data_type_ptr = nullptr;
if (type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2) {
data_type_ptr =
DataTypeFactory::instance().create_data_type(type, 0, 6);
@@ -232,17 +177,9 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
std::cout << "this type is " << data_type_ptr->get_name() << ": "
<< fmt::format("{}", type) << std::endl;
- std::unique_ptr<WrapperField>
min_wf(WrapperField::create_by_type(type));
- std::unique_ptr<WrapperField>
max_wf(WrapperField::create_by_type(type));
- std::unique_ptr<WrapperField>
rand_wf(WrapperField::create_by_type(type));
-
- min_wf->set_to_min();
- max_wf->set_to_max();
- static_cast<void>(rand_wf->from_string(pair.second, 0, 0));
-
- std::string min_s = min_wf->to_string();
- std::string max_s = max_wf->to_string();
- std::string rand_date = rand_wf->to_string();
+ std::string min_s = pair.min_str;
+ std::string max_s = pair.max_str;
+ std::string rand_date = pair.str;
StringRef min_rb(min_s.data(), min_s.size());
StringRef max_rb(max_s.data(), max_s.size());
@@ -262,20 +199,9 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
rtrim(min_s);
rtrim(max_s);
rtrim(rand_date);
- std::cout << "min(" << min_s << ") with datat_ype_str:" << min_s_d
<< std::endl;
- std::cout << "max(" << max_s << ") with datat_ype_str:" << max_s_d
<< std::endl;
- std::cout << "rand(" << rand_date << ") with datat_type_str:" <<
rand_s_d << std::endl;
- // min wrapper field date to_string in macOS and linux system has
different result
- // macOs equals with data type to_string(0000-01-01), but in
linux is (0-01-01)
- if (FieldType::OLAP_FIELD_TYPE_DATE == type ||
- FieldType::OLAP_FIELD_TYPE_DATETIME == type) {
- // min wrapper field date to_string in macOS and linux system
has different result
- // macOs equals with data type to_string(0000-01-01), but in
linux is (0-01-01)
- std::cout << "wrapper field (" << min_s << ") with data type
to_string(" << min_s_d
- << ")" << std::endl;
- } else {
- EXPECT_EQ(min_s, min_s_d);
- }
+ std::cout << "min(" << min_s << ") with data_type_str:" << min_s_d
<< std::endl;
+ std::cout << "max(" << max_s << ") with data_type_str:" << max_s_d
<< std::endl;
+ std::cout << "rand(" << rand_date << ") with data_type_str:" <<
rand_s_d << std::endl;
EXPECT_EQ(max_s, max_s_d);
EXPECT_EQ(rand_date, rand_s_d);
}
@@ -307,12 +233,10 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
DataTypePtr data_type_ptr =
DataTypeFactory::instance().create_data_type(type, 0, 0);
std::cout << "this type is " << data_type_ptr->get_name() << ": "
<< fmt::format("{}", type) << std::endl;
- std::unique_ptr<WrapperField>
rand_wf(WrapperField::create_by_type(type));
- Status st = rand_wf->from_string(pair.second, 0, 0);
- std::string rand_ip = rand_wf->to_string();
+ std::string rand_ip = pair.second;
StringRef rand_rb(rand_ip.data(), rand_ip.size());
auto col = data_type_ptr->create_column();
- st = data_type_ptr->from_string(rand_rb, col.get());
+ Status st = data_type_ptr->from_string(rand_rb, col.get());
EXPECT_EQ(st.ok(), true);
std::string rand_s_d = data_type_ptr->to_string(*col, 0);
rtrim(rand_ip);
@@ -324,29 +248,12 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
DataTypePtr data_type_ptr =
DataTypeFactory::instance().create_data_type(type, 0, 0);
std::cout << "this type is " << data_type_ptr->get_name() << ": "
<< fmt::format("{}", type) << std::endl;
- std::unique_ptr<WrapperField>
rand_wf(WrapperField::create_by_type(type));
- Status st = rand_wf->from_string(pair.second, 0, 0);
- EXPECT_EQ(st.ok(), false);
StringRef rand_rb(pair.second.data(), pair.second.size());
auto col = data_type_ptr->create_column();
- st = data_type_ptr->from_string(rand_rb, col.get());
+ Status st = data_type_ptr->from_string(rand_rb, col.get());
EXPECT_EQ(st.ok(), false);
}
}
-
- // null data type
- {
- DataTypePtr data_type_ptr =
DataTypeFactory::instance().create_data_type(
- FieldType::OLAP_FIELD_TYPE_STRING, 0, 0);
- DataTypePtr nullable_ptr =
std::make_shared<DataTypeNullable>(data_type_ptr);
- std::unique_ptr<WrapperField> rand_wf(
-
WrapperField::create_by_type(FieldType::OLAP_FIELD_TYPE_STRING));
- std::string test_str = generate(128);
- static_cast<void>(rand_wf->from_string(test_str, 0, 0));
- Field string_field = Field::create_field<TYPE_STRING>(test_str);
- ColumnPtr col = nullable_ptr->create_column_const(0, string_field);
- EXPECT_EQ(rand_wf->to_string(), nullable_ptr->to_string(*col, 0));
- }
}
} // namespace doris::vectorized
diff --git a/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp
index a6fbb01f081..b89890cf226 100644
--- a/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp
@@ -17,7 +17,6 @@
#include "gtest/gtest_pred_impl.h"
#include "olap/types.h" // for TypeInfo
-#include "olap/wrapper_field.h"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_string.h"
@@ -35,7 +34,7 @@
namespace doris::vectorized {
// This test aim to make sense for csv serde of data types.
-// we use default formatOption and special formatOption to equal serde for
wrapperField.
+// we use default formatOption and special formatOption to test serde
behavior.
TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) {
// arithmetic scala field types
{
@@ -275,17 +274,18 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) {
std::cout << "========= This type is " <<
data_type_ptr->get_name() << ": "
<< fmt::format("{}", type) << std::endl;
- std::unique_ptr<WrapperField>
min_wf(WrapperField::create_by_type(type));
- std::unique_ptr<WrapperField>
max_wf(WrapperField::create_by_type(type));
- std::unique_ptr<WrapperField>
rand_wf(WrapperField::create_by_type(type));
-
- min_wf->set_to_min();
- max_wf->set_to_max();
- EXPECT_EQ(rand_wf->from_string(pair.second, 0, 0).ok(), true);
-
- std::string min_s = min_wf->to_string();
- std::string max_s = max_wf->to_string();
- std::string rand_ip = rand_wf->to_string();
+ // Set min, max values based on type
+ std::string min_s;
+ std::string max_s;
+ std::string rand_ip = pair.second;
+
+ if (type == FieldType::OLAP_FIELD_TYPE_IPV4) {
+ min_s = "0.0.0.0";
+ max_s = "255.255.255.255";
+ } else if (type == FieldType::OLAP_FIELD_TYPE_IPV6) {
+ min_s = "::";
+ max_s = "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff";
+ }
Slice min_rb(min_s.data(), min_s.size());
Slice max_rb(max_s.data(), max_s.size());
@@ -293,7 +293,6 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) {
auto col = data_type_ptr->create_column();
DataTypeSerDeSPtr serde = data_type_ptr->get_serde();
- // make use c++ lib equals to wrapper field from_string behavior
DataTypeSerDe::FormatOptions formatOptions;
Status st = serde->deserialize_one_cell_from_json(*col, min_rb,
formatOptions);
@@ -336,10 +335,7 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) {
DataTypePtr data_type_ptr =
DataTypeFactory::instance().create_data_type(
FieldType::OLAP_FIELD_TYPE_STRING, 0, 0);
DataTypePtr nullable_ptr =
std::make_shared<DataTypeNullable>(data_type_ptr);
- std::unique_ptr<WrapperField> rand_wf(
-
WrapperField::create_by_type(FieldType::OLAP_FIELD_TYPE_STRING));
std::string test_str = generate(128);
- EXPECT_EQ(rand_wf->from_string(test_str, 0, 0).ok(), true);
Field string_field = Field::create_field<TYPE_STRING>(test_str);
ColumnPtr col = nullable_ptr->create_column_const(0, string_field);
DataTypeSerDe::FormatOptions default_format_option;
@@ -352,7 +348,7 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) {
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
- EXPECT_EQ(rand_wf->to_string(), rand_s_d.to_string());
+ EXPECT_EQ(test_str, rand_s_d.to_string());
}
}
diff --git
a/be/test/vec/data_types/serde/data_type_serde_fixed_length_object_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_fixed_length_object_test.cpp
index b5399c66e3d..07998965f31 100644
--- a/be/test/vec/data_types/serde/data_type_serde_fixed_length_object_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_fixed_length_object_test.cpp
@@ -26,7 +26,8 @@
namespace doris::vectorized {
TEST(FixedLengthObjectSerdeTest, writeOneCellToJsonb) {
- auto fixed_length_serde =
std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(1);
+ auto fixed_length_serde =
std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(
+ TYPE_FIXED_LENGTH_OBJECT, 1);
auto column_fixed_length =
ColumnFixedLengthObject::create(sizeof(int64_t));
column_fixed_length->resize(1);
*((int64_t*)column_fixed_length->get_data().data()) = 123;
@@ -58,7 +59,8 @@ TEST(FixedLengthObjectSerdeTest, writeOneCellToJsonb) {
}
TEST(FixedLengthObjectSerdeTest, writeColumnToPb) {
- auto fixed_length_serde =
std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(1);
+ auto fixed_length_serde =
std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(
+ TYPE_FIXED_LENGTH_OBJECT, 1);
auto column_fixed_length =
ColumnFixedLengthObject::create(sizeof(int64_t));
column_fixed_length->resize(2);
*((int64_t*)column_fixed_length->get_data().data()) = 11;
@@ -85,7 +87,8 @@ TEST(FixedLengthObjectSerdeTest, writeColumnToPb) {
}
TEST(FixedLengthObjectSerdeTest, serializeOneCellToJson) {
- auto fixed_length_serde =
std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(1);
+ auto fixed_length_serde =
std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(
+ TYPE_FIXED_LENGTH_OBJECT, 1);
auto column_fixed_length =
ColumnFixedLengthObject::create(sizeof(int64_t));
column_fixed_length->resize(2);
*((int64_t*)column_fixed_length->get_data().data()) = 11;
@@ -124,7 +127,8 @@ TEST(FixedLengthObjectSerdeTest, serializeOneCellToJson) {
}
TEST(FixedLengthObjectSerdeTest, serializeColumnToJson) {
- auto fixed_length_serde =
std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(1);
+ auto fixed_length_serde =
std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(
+ TYPE_FIXED_LENGTH_OBJECT, 1);
auto column_fixed_length =
ColumnFixedLengthObject::create(sizeof(int64_t));
column_fixed_length->resize(2);
*((int64_t*)column_fixed_length->get_data().data()) = 11;
@@ -160,7 +164,8 @@ TEST(FixedLengthObjectSerdeTest, serializeColumnToJson) {
}
TEST(FixedLengthObjectSerdeTest, serializeOneCellToHiveText) {
- auto fixed_length_serde =
std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(1);
+ auto fixed_length_serde =
std::make_shared<vectorized::DataTypeFixedLengthObjectSerDe>(
+ TYPE_FIXED_LENGTH_OBJECT, 1);
auto column_fixed_length =
ColumnFixedLengthObject::create(sizeof(int64_t));
column_fixed_length->resize(2);
*((int64_t*)column_fixed_length->get_data().data()) = 11;
diff --git a/be/test/vec/data_types/serde/data_type_serde_map_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_map_test.cpp
index 322d923c5c8..4e9b6d093b7 100644
--- a/be/test/vec/data_types/serde/data_type_serde_map_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_map_test.cpp
@@ -48,8 +48,8 @@
#include "vec/data_types/data_type_string.h"
namespace doris::vectorized {
-static auto serde_str_key = std::make_shared<DataTypeStringSerDe>();
-static auto serde_str_value = std::make_shared<DataTypeStringSerDe>();
+static auto serde_str_key = std::make_shared<DataTypeStringSerDe>(TYPE_STRING);
+static auto serde_str_value =
std::make_shared<DataTypeStringSerDe>(TYPE_STRING);
class DataTypeMapSerDeTest : public ::testing::Test {
protected:
diff --git a/be/test/vec/data_types/serde/data_type_serde_string_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_string_test.cpp
index 1104f6a6977..b00b6749649 100644
--- a/be/test/vec/data_types/serde/data_type_serde_string_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_string_test.cpp
@@ -49,7 +49,7 @@
namespace doris::vectorized {
static std::string test_data_dir;
-static auto serde_str = std::make_shared<DataTypeStringSerDe>();
+static auto serde_str = std::make_shared<DataTypeStringSerDe>(TYPE_STRING);
static ColumnString::MutablePtr column_str32;
static ColumnString64::MutablePtr column_str64;
diff --git a/be/test/vec/data_types/serde/data_type_serde_struct_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_struct_test.cpp
index 74af068bca0..94935748f1f 100644
--- a/be/test/vec/data_types/serde/data_type_serde_struct_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_struct_test.cpp
@@ -50,7 +50,7 @@
namespace doris::vectorized {
static auto serde_int32 = std::make_shared<DataTypeNumberSerDe<TYPE_INT>>();
-static auto serde_str = std::make_shared<DataTypeStringSerDe>();
+static auto serde_str = std::make_shared<DataTypeStringSerDe>(TYPE_STRING);
class DataTypeStructSerDeTest : public ::testing::Test {
protected:
diff --git a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
index 0e6792d53f8..05bce0c1f1e 100644
--- a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
@@ -17,7 +17,6 @@
#include "gtest/gtest_pred_impl.h"
#include "olap/types.h" // for TypeInfo
-#include "olap/wrapper_field.h"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/common/string_buffer.hpp"
@@ -31,7 +30,7 @@
namespace doris::vectorized {
// This test aim to make sense for text serde of data types.
-// we use default formatOption and special formatOption to equal serde for
wrapperField.
+// we use default formatOption and special formatOption to test serde
behavior.
TEST(TextSerde, ScalaDataTypeSerdeTextTest) {
// arithmetic scala field types
{
@@ -271,17 +270,18 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) {
std::cout << "========= This type is " <<
data_type_ptr->get_name() << ": "
<< fmt::format("{}", type) << std::endl;
- std::unique_ptr<WrapperField>
min_wf(WrapperField::create_by_type(type));
- std::unique_ptr<WrapperField>
max_wf(WrapperField::create_by_type(type));
- std::unique_ptr<WrapperField>
rand_wf(WrapperField::create_by_type(type));
+ // Set min, max values based on type
+ std::string min_s;
+ std::string max_s;
+ std::string rand_ip = pair.second;
- min_wf->set_to_min();
- max_wf->set_to_max();
- static_cast<void>(rand_wf->from_string(pair.second, 0, 0));
-
- std::string min_s = min_wf->to_string();
- std::string max_s = max_wf->to_string();
- std::string rand_ip = rand_wf->to_string();
+ if (type == FieldType::OLAP_FIELD_TYPE_IPV4) {
+ min_s = "0.0.0.0";
+ max_s = "255.255.255.255";
+ } else if (type == FieldType::OLAP_FIELD_TYPE_IPV6) {
+ min_s = "::";
+ max_s = "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff";
+ }
Slice min_rb(min_s.data(), min_s.size());
Slice max_rb(max_s.data(), max_s.size());
@@ -332,10 +332,7 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) {
DataTypePtr data_type_ptr =
DataTypeFactory::instance().create_data_type(
FieldType::OLAP_FIELD_TYPE_STRING, 0, 0);
DataTypePtr nullable_ptr =
std::make_shared<DataTypeNullable>(data_type_ptr);
- std::unique_ptr<WrapperField> rand_wf(
-
WrapperField::create_by_type(FieldType::OLAP_FIELD_TYPE_STRING));
std::string test_str = generate(128);
- static_cast<void>(rand_wf->from_string(test_str, 0, 0));
Field string_field = Field::create_field<TYPE_STRING>(test_str);
ColumnPtr col = nullable_ptr->create_column_const(0, string_field);
DataTypeSerDe::FormatOptions default_format_option;
@@ -348,7 +345,7 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) {
EXPECT_EQ(st.ok(), true);
buffer_writer.commit();
StringRef rand_s_d = ser_col->get_data_at(0);
- EXPECT_EQ(rand_wf->to_string(), rand_s_d.to_string());
+ EXPECT_EQ(test_str, rand_s_d.to_string());
}
}
diff --git a/regression-test/data/alter_p0/test_alter_column_char.out
b/regression-test/data/alter_p0/test_alter_column_char.out
new file mode 100644
index 00000000000..dc9c534a49c
--- /dev/null
+++ b/regression-test/data/alter_p0/test_alter_column_char.out
@@ -0,0 +1,22 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql1 --
+1 1
+2 2
+3 3
+
+-- !sql2 --
+1 1 a
+2 2 a
+3 3 a
+
+-- !sql3 --
+1 1 a
+2 2 a
+3 3 a
+4 4 b
+
+-- !sql4 --
+1 1 a
+2 2 a
+3 3 a
+
diff --git a/regression-test/suites/alter_p0/test_alter_column_char.groovy
b/regression-test/suites/alter_p0/test_alter_column_char.groovy
new file mode 100644
index 00000000000..0a3112ef251
--- /dev/null
+++ b/regression-test/suites/alter_p0/test_alter_column_char.groovy
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite('test_alter_column_char') {
+ def tbl = 'test_alter_column_char'
+ sql "DROP TABLE IF EXISTS ${tbl}"
+
+ sql """
+ CREATE TABLE ${tbl} (
+ `k1` BIGINT NOT NULL,
+ `v1` BIGINT NULL
+ ) ENGINE=OLAP
+ UNIQUE KEY(`k1`)
+ DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "disable_auto_compaction" = "true"
+ );
+ """
+
+ sql """
+ INSERT INTO ${tbl} VALUES (1,1),(2,2),(3,3)
+ """
+ sql """ SYNC """
+ qt_sql1 """ select * from ${tbl} order by k1 """
+
+ sql """
+ ALTER TABLE ${tbl} add column v2 char(10) default 'a'
+ """
+
+ waitForSchemaChangeDone {
+ sql """ SHOW ALTER TABLE COLUMN WHERE TableName='${tbl}' ORDER BY
createtime DESC LIMIT 1 """
+ time 600
+ }
+ qt_sql2 """ select * from ${tbl} order by k1 """
+
+ sql """
+ INSERT INTO ${tbl} VALUES (4,4,'b')
+ """
+ sql """ SYNC """
+ qt_sql3 """ select * from ${tbl} order by k1 """
+ qt_sql4 """ select * from ${tbl} where v2 = 'a' order by k1 """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]