yiguolei commented on code in PR #60530:
URL: https://github.com/apache/doris/pull/60530#discussion_r2767681092
##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -2106,139 +2065,42 @@ Status DefaultValueColumnIterator::init(const
ColumnIteratorOptions& opts) {
// "NULL" is a special default value which means the default value is null.
if (_has_default_value) {
if (_default_value == "NULL") {
- _is_default_value_null = true;
+ _default_value_field =
vectorized::Field::create_field<TYPE_NULL>(vectorized::Null {});
} else {
- _type_size = _type_info->size();
- _mem_value.resize(_type_size);
- Status s = Status::OK();
- // If char length is 10, but default value is 'a' , it's length is
1
- // not fill 0 to the ending, because segment iterator will shrink
the tail 0 char
- if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_HLL ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_BITMAP ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_STRING ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_CHAR) {
- ((Slice*)_mem_value.data())->size = _default_value.length();
- ((Slice*)_mem_value.data())->data = _default_value.data();
- } else if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_ARRAY)
{
- if (_default_value != "[]") {
- return Status::NotSupported("Array default {} is
unsupported", _default_value);
- } else {
- ((Slice*)_mem_value.data())->size =
_default_value.length();
- ((Slice*)_mem_value.data())->data = _default_value.data();
- }
+ if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_ARRAY &&
_default_value != "[]") {
+ return Status::NotSupported("Array default {} is unsupported",
_default_value);
} else if (_type_info->type() ==
FieldType::OLAP_FIELD_TYPE_STRUCT) {
return Status::NotSupported("STRUCT default type is
unsupported");
} else if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_MAP) {
return Status::NotSupported("MAP default type is unsupported");
- } else {
- s = _type_info->from_string(_mem_value.data(), _default_value,
_precision, _scale);
- }
- if (!s.ok()) {
- return s;
}
+ const auto t = _type_info->type();
+ const auto serde = vectorized::DataTypeFactory::instance()
Review Comment:
// If char length is 10, but default value is 'a' , it's length is 1
// not fill 0 to the ending, because segment iterator will
shrink the tail 0 char
原来的注释得保留。
不过这里我有一个疑问,为什么这里不加 padding 呢? 如果我add new column 有default 值是char
类型,此时我再发送一个查询,带新增这个列作为where 条件,此时predicate 有padding,但是default
生成的列没有padding,感觉就不对了
##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -2106,139 +2065,42 @@ Status DefaultValueColumnIterator::init(const
ColumnIteratorOptions& opts) {
// "NULL" is a special default value which means the default value is null.
if (_has_default_value) {
if (_default_value == "NULL") {
- _is_default_value_null = true;
+ _default_value_field =
vectorized::Field::create_field<TYPE_NULL>(vectorized::Null {});
} else {
- _type_size = _type_info->size();
- _mem_value.resize(_type_size);
- Status s = Status::OK();
- // If char length is 10, but default value is 'a' , it's length is
1
- // not fill 0 to the ending, because segment iterator will shrink
the tail 0 char
- if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_HLL ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_BITMAP ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_STRING ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_CHAR) {
- ((Slice*)_mem_value.data())->size = _default_value.length();
- ((Slice*)_mem_value.data())->data = _default_value.data();
- } else if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_ARRAY)
{
- if (_default_value != "[]") {
- return Status::NotSupported("Array default {} is
unsupported", _default_value);
- } else {
- ((Slice*)_mem_value.data())->size =
_default_value.length();
- ((Slice*)_mem_value.data())->data = _default_value.data();
- }
+ if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_ARRAY &&
_default_value != "[]") {
+ return Status::NotSupported("Array default {} is unsupported",
_default_value);
} else if (_type_info->type() ==
FieldType::OLAP_FIELD_TYPE_STRUCT) {
return Status::NotSupported("STRUCT default type is
unsupported");
} else if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_MAP) {
return Status::NotSupported("MAP default type is unsupported");
- } else {
- s = _type_info->from_string(_mem_value.data(), _default_value,
_precision, _scale);
- }
- if (!s.ok()) {
- return s;
}
+ const auto t = _type_info->type();
+ const auto serde = vectorized::DataTypeFactory::instance()
+ .create_data_type(t, _precision, _scale)
+ ->get_serde();
+ vectorized::DataTypeSerDe::FormatOptions opt;
+ RETURN_IF_ERROR(serde->from_olap_string(_default_value,
_default_value_field, opt));
}
} else if (_is_nullable) {
- // if _has_default_value is false but _is_nullable is true, we should
return null as default value.
- _is_default_value_null = true;
+ _default_value_field =
vectorized::Field::create_field<TYPE_NULL>(vectorized::Null {});
} else {
return Status::InternalError(
"invalid default value column for no default value and not
nullable");
}
return Status::OK();
}
-void DefaultValueColumnIterator::insert_default_data(const TypeInfo*
type_info, size_t type_size,
- void* mem_value,
+void DefaultValueColumnIterator::insert_default_data(const vectorized::Field&
value,
vectorized::MutableColumnPtr& dst, size_t n) {
dst = dst->convert_to_predicate_column_if_dictionary();
-
- switch (type_info->type()) {
- case FieldType::OLAP_FIELD_TYPE_BITMAP:
- case FieldType::OLAP_FIELD_TYPE_HLL: {
- dst->insert_many_defaults(n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DATE: {
- vectorized::Int64 int64;
- char* data_ptr = (char*)&int64;
- size_t data_len = sizeof(int64);
-
- assert(type_size ==
-
sizeof(FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>::CppType)); //uint24_t
- std::string str =
FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>::to_string(mem_value);
-
- VecDateTimeValue value;
- value.from_date_str(str.c_str(), str.length());
- value.cast_to_date();
-
- int64 = binary_cast<VecDateTimeValue, vectorized::Int64>(value);
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DATETIME: {
- vectorized::Int64 int64;
- char* data_ptr = (char*)&int64;
- size_t data_len = sizeof(int64);
-
- assert(type_size ==
-
sizeof(FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIME>::CppType));
//int64_t
- std::string str =
-
FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIME>::to_string(mem_value);
-
- VecDateTimeValue value;
- value.from_date_str(str.c_str(), str.length());
- value.to_datetime();
-
- int64 = binary_cast<VecDateTimeValue, vectorized::Int64>(value);
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DECIMAL: {
- vectorized::Int128 int128;
- char* data_ptr = (char*)&int128;
- size_t data_len = sizeof(int128);
-
- assert(type_size ==
-
sizeof(FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DECIMAL>::CppType));
//decimal12_t
- decimal12_t* d = (decimal12_t*)mem_value;
- int128 = DecimalV2Value(d->integer, d->fraction).value();
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_STRING:
- case FieldType::OLAP_FIELD_TYPE_VARCHAR:
- case FieldType::OLAP_FIELD_TYPE_CHAR:
- case FieldType::OLAP_FIELD_TYPE_JSONB:
- case FieldType::OLAP_FIELD_TYPE_AGG_STATE: {
- char* data_ptr = ((Slice*)mem_value)->data;
- size_t data_len = ((Slice*)mem_value)->size;
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_ARRAY: {
- if (dst->is_nullable()) {
-
static_cast<vectorized::ColumnNullable&>(*dst).insert_not_null_elements(n);
- } else {
- dst->insert_many_defaults(n);
- }
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_VARIANT: {
- dst->insert_many_defaults(n);
- break;
- }
- default: {
- char* data_ptr = (char*)mem_value;
- size_t data_len = type_size;
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- }
+ for (size_t i = 0; i < n; ++i) {
+ dst->insert(value);
Review Comment:
这个性能会很慢,需要有一个批量的接口调用
##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -2106,139 +2065,42 @@ Status DefaultValueColumnIterator::init(const
ColumnIteratorOptions& opts) {
// "NULL" is a special default value which means the default value is null.
if (_has_default_value) {
if (_default_value == "NULL") {
- _is_default_value_null = true;
+ _default_value_field =
vectorized::Field::create_field<TYPE_NULL>(vectorized::Null {});
} else {
- _type_size = _type_info->size();
- _mem_value.resize(_type_size);
- Status s = Status::OK();
- // If char length is 10, but default value is 'a' , it's length is
1
- // not fill 0 to the ending, because segment iterator will shrink
the tail 0 char
- if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_HLL ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_BITMAP ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_STRING ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_CHAR) {
- ((Slice*)_mem_value.data())->size = _default_value.length();
- ((Slice*)_mem_value.data())->data = _default_value.data();
- } else if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_ARRAY)
{
- if (_default_value != "[]") {
- return Status::NotSupported("Array default {} is
unsupported", _default_value);
- } else {
- ((Slice*)_mem_value.data())->size =
_default_value.length();
- ((Slice*)_mem_value.data())->data = _default_value.data();
- }
+ if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_ARRAY &&
_default_value != "[]") {
+ return Status::NotSupported("Array default {} is unsupported",
_default_value);
} else if (_type_info->type() ==
FieldType::OLAP_FIELD_TYPE_STRUCT) {
return Status::NotSupported("STRUCT default type is
unsupported");
} else if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_MAP) {
return Status::NotSupported("MAP default type is unsupported");
- } else {
- s = _type_info->from_string(_mem_value.data(), _default_value,
_precision, _scale);
- }
- if (!s.ok()) {
- return s;
}
+ const auto t = _type_info->type();
+ const auto serde = vectorized::DataTypeFactory::instance()
+ .create_data_type(t, _precision, _scale)
+ ->get_serde();
+ vectorized::DataTypeSerDe::FormatOptions opt;
+ RETURN_IF_ERROR(serde->from_olap_string(_default_value,
_default_value_field, opt));
}
} else if (_is_nullable) {
- // if _has_default_value is false but _is_nullable is true, we should
return null as default value.
- _is_default_value_null = true;
+ _default_value_field =
vectorized::Field::create_field<TYPE_NULL>(vectorized::Null {});
} else {
return Status::InternalError(
"invalid default value column for no default value and not
nullable");
}
return Status::OK();
}
-void DefaultValueColumnIterator::insert_default_data(const TypeInfo*
type_info, size_t type_size,
- void* mem_value,
+void DefaultValueColumnIterator::insert_default_data(const vectorized::Field&
value,
vectorized::MutableColumnPtr& dst, size_t n) {
dst = dst->convert_to_predicate_column_if_dictionary();
-
- switch (type_info->type()) {
- case FieldType::OLAP_FIELD_TYPE_BITMAP:
- case FieldType::OLAP_FIELD_TYPE_HLL: {
- dst->insert_many_defaults(n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DATE: {
- vectorized::Int64 int64;
- char* data_ptr = (char*)&int64;
- size_t data_len = sizeof(int64);
-
- assert(type_size ==
-
sizeof(FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>::CppType)); //uint24_t
- std::string str =
FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>::to_string(mem_value);
-
- VecDateTimeValue value;
- value.from_date_str(str.c_str(), str.length());
- value.cast_to_date();
-
- int64 = binary_cast<VecDateTimeValue, vectorized::Int64>(value);
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DATETIME: {
- vectorized::Int64 int64;
- char* data_ptr = (char*)&int64;
- size_t data_len = sizeof(int64);
-
- assert(type_size ==
-
sizeof(FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIME>::CppType));
//int64_t
- std::string str =
-
FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIME>::to_string(mem_value);
-
- VecDateTimeValue value;
- value.from_date_str(str.c_str(), str.length());
- value.to_datetime();
-
- int64 = binary_cast<VecDateTimeValue, vectorized::Int64>(value);
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_DECIMAL: {
- vectorized::Int128 int128;
- char* data_ptr = (char*)&int128;
- size_t data_len = sizeof(int128);
-
- assert(type_size ==
-
sizeof(FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_DECIMAL>::CppType));
//decimal12_t
- decimal12_t* d = (decimal12_t*)mem_value;
- int128 = DecimalV2Value(d->integer, d->fraction).value();
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_STRING:
- case FieldType::OLAP_FIELD_TYPE_VARCHAR:
- case FieldType::OLAP_FIELD_TYPE_CHAR:
- case FieldType::OLAP_FIELD_TYPE_JSONB:
- case FieldType::OLAP_FIELD_TYPE_AGG_STATE: {
- char* data_ptr = ((Slice*)mem_value)->data;
- size_t data_len = ((Slice*)mem_value)->size;
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_ARRAY: {
- if (dst->is_nullable()) {
-
static_cast<vectorized::ColumnNullable&>(*dst).insert_not_null_elements(n);
- } else {
- dst->insert_many_defaults(n);
- }
- break;
- }
- case FieldType::OLAP_FIELD_TYPE_VARIANT: {
- dst->insert_many_defaults(n);
- break;
- }
- default: {
- char* data_ptr = (char*)mem_value;
- size_t data_len = type_size;
- dst->insert_data_repeatedly(data_ptr, data_len, n);
- }
+ for (size_t i = 0; i < n; ++i) {
+ dst->insert(value);
Review Comment:
当用户add column 之后,查询性能会很慢
##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -2250,10 +2112,10 @@ Status DefaultValueColumnIterator::read_by_rowids(const
rowid_t* rowids, const s
}
void
DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnPtr&
dst, size_t n) {
- if (_is_default_value_null) {
+ if (_default_value_field.is_null()) {
dst->insert_many_defaults(n);
} else {
- insert_default_data(_type_info.get(), _type_size, _mem_value.data(),
dst, n);
+ insert_default_data(_default_value_field, dst, n);
Review Comment:
insert_default_data 这函数直接去掉把,直接在这里调用column 相关的接口吧,实现一个类似insert repeatedly
这样的接口
##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -2106,139 +2065,42 @@ Status DefaultValueColumnIterator::init(const
ColumnIteratorOptions& opts) {
// "NULL" is a special default value which means the default value is null.
if (_has_default_value) {
if (_default_value == "NULL") {
- _is_default_value_null = true;
+ _default_value_field =
vectorized::Field::create_field<TYPE_NULL>(vectorized::Null {});
} else {
- _type_size = _type_info->size();
- _mem_value.resize(_type_size);
- Status s = Status::OK();
- // If char length is 10, but default value is 'a' , it's length is
1
- // not fill 0 to the ending, because segment iterator will shrink
the tail 0 char
- if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_HLL ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_BITMAP ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_STRING ||
- _type_info->type() == FieldType::OLAP_FIELD_TYPE_CHAR) {
- ((Slice*)_mem_value.data())->size = _default_value.length();
- ((Slice*)_mem_value.data())->data = _default_value.data();
- } else if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_ARRAY)
{
- if (_default_value != "[]") {
- return Status::NotSupported("Array default {} is
unsupported", _default_value);
- } else {
- ((Slice*)_mem_value.data())->size =
_default_value.length();
- ((Slice*)_mem_value.data())->data = _default_value.data();
- }
+ if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_ARRAY &&
_default_value != "[]") {
+ return Status::NotSupported("Array default {} is unsupported",
_default_value);
} else if (_type_info->type() ==
FieldType::OLAP_FIELD_TYPE_STRUCT) {
return Status::NotSupported("STRUCT default type is
unsupported");
} else if (_type_info->type() == FieldType::OLAP_FIELD_TYPE_MAP) {
return Status::NotSupported("MAP default type is unsupported");
- } else {
- s = _type_info->from_string(_mem_value.data(), _default_value,
_precision, _scale);
- }
- if (!s.ok()) {
- return s;
}
+ const auto t = _type_info->type();
+ const auto serde = vectorized::DataTypeFactory::instance()
Review Comment:
这里我们可以补充一个这样的测试case,看看现在的改动是否有问题
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]