Copilot commented on code in PR #60526:
URL: https://github.com/apache/doris/pull/60526#discussion_r2767237937
##########
be/src/vec/data_types/data_type_fixed_length_object.cpp:
##########
@@ -53,26 +85,49 @@ char* DataTypeFixedLengthObject::serialize(const IColumn&
column, char* buf,
DCHECK(src_col.item_size() > 0)
<< "[serialize]item size of DataTypeFixedLengthObject should be
greater than 0";
- // item size
unaligned_store<size_t>(buf, src_col.item_size());
buf += sizeof(size_t);
- // column data
+
const auto* origin_data = src_col.get_data().data();
memcpy(buf, origin_data, real_need_copy_num * src_col.item_size());
buf += real_need_copy_num * src_col.item_size();
-
return buf;
}
const char* DataTypeFixedLengthObject::deserialize(const char* buf,
MutableColumnPtr* column,
int be_exec_version) const {
- //const flag
+ if (be_exec_version >= USE_NEW_FIXED_OBJECT_SERIALIZATION_VERSION) {
+ // New deserialization with streamvbyte decoding for large data
+ size_t real_have_saved_num = 0;
+ buf = deserialize_const_flag_and_row_num(buf, column,
&real_have_saved_num);
+
+ auto& dst_col = assert_cast<ColumnType&>(*(column->get()));
Review Comment:
The original column pointer must be saved before calling
deserialize_const_flag_and_row_num, similar to how it's done in DataTypeDecimal
at line 178. The deserialize_const_flag_and_row_num function may wrap the
column in a ColumnConst, and subsequent operations need to work on the
underlying data column, not the wrapper. The correct pattern is:
1. Save the original column pointer: auto* origin_column = column->get();
2. Call deserialize_const_flag_and_row_num
3. Use origin_column for subsequent operations
```suggestion
auto* origin_column = column->get();
size_t real_have_saved_num = 0;
buf = deserialize_const_flag_and_row_num(buf, column,
&real_have_saved_num);
auto& dst_col = assert_cast<ColumnType&>(*origin_column);
```
##########
be/src/vec/data_types/data_type_fixed_length_object.cpp:
##########
@@ -97,6 +153,23 @@ const char* DataTypeFixedLengthObject::deserialize(const
char* buf, MutableColum
// data : item data1 | item data2...
int64_t DataTypeFixedLengthObject::get_uncompressed_serialized_bytes(const
IColumn& column,
int
be_exec_version) const {
+ if (be_exec_version >= USE_NEW_FIXED_OBJECT_SERIALIZATION_VERSION) {
+ // New format size calculation with streamvbyte
+ auto size = sizeof(bool) + sizeof(size_t) + sizeof(size_t) +
sizeof(size_t);
+ auto real_need_copy_num = is_column_const(column) ? 1 : column.size();
+ const auto& src_col = assert_cast<const ColumnType&>(column);
Review Comment:
When the column is a ColumnConst, directly casting it to ColumnType will
fail. The code should first unwrap the const column to get the underlying data
column before casting, similar to how the old format handles it at lines
175-178. The serialize function correctly handles this at line 41 using
serialize_const_flag_and_row_num which unwraps const columns, but
get_uncompressed_serialized_bytes does not perform this unwrapping.
```suggestion
const IColumn* data_column = &column;
auto real_need_copy_num = column.size();
if (is_column_const(column)) {
const auto& const_column = assert_cast<const
ColumnConst&>(column);
data_column = &(const_column.get_data_column());
real_need_copy_num = 1;
}
const auto& src_col = assert_cast<const ColumnType&>(*data_column);
```
##########
be/src/vec/data_types/data_type_fixed_length_object.cpp:
##########
@@ -97,6 +153,23 @@ const char* DataTypeFixedLengthObject::deserialize(const
char* buf, MutableColum
// data : item data1 | item data2...
int64_t DataTypeFixedLengthObject::get_uncompressed_serialized_bytes(const
IColumn& column,
int
be_exec_version) const {
+ if (be_exec_version >= USE_NEW_FIXED_OBJECT_SERIALIZATION_VERSION) {
+ // New format size calculation with streamvbyte
+ auto size = sizeof(bool) + sizeof(size_t) + sizeof(size_t) +
sizeof(size_t);
+ auto real_need_copy_num = is_column_const(column) ? 1 : column.size();
+ const auto& src_col = assert_cast<const ColumnType&>(column);
+ auto mem_size = src_col.item_size() * real_need_copy_num;
+ if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) {
+ return size + mem_size;
+ } else {
+ // Throw exception if mem_size is large than UINT32_MAX
Review Comment:
The comment has a grammatical error: "large than" should be "larger than".
##########
be/src/vec/data_types/data_type_fixed_length_object.cpp:
##########
@@ -33,12 +34,43 @@ namespace doris::vectorized {
char* DataTypeFixedLengthObject::serialize(const IColumn& column, char* buf,
int be_exec_version) const {
- // const flag
+ if (be_exec_version >= USE_NEW_FIXED_OBJECT_SERIALIZATION_VERSION) {
+ // New serialization with streamvbyte encoding for large data
+ const auto* data_column = &column;
+ size_t real_need_copy_num = 0;
+ buf = serialize_const_flag_and_row_num(&data_column, buf,
&real_need_copy_num);
+
+ const auto& src_col = assert_cast<const ColumnType&>(*data_column);
+ DCHECK(src_col.item_size() > 0)
+ << "[serialize]item size of DataTypeFixedLengthObject should
be greater than 0";
+
+ // item size
+ unaligned_store<size_t>(buf, src_col.item_size());
+ buf += sizeof(size_t);
+
+ auto mem_size = real_need_copy_num * src_col.item_size();
+ const auto* origin_data = src_col.get_data().data();
+
+ // column data
+ if (mem_size <= SERIALIZED_MEM_SIZE_LIMIT) {
+ memcpy(buf, origin_data, mem_size);
+ return buf + mem_size;
+ } else {
+ // Throw exception if mem_size is large than UINT32_MAX
Review Comment:
The comment has a grammatical error: "large than" should be "larger than".
Additionally, the comment is misleading because streamvbyte_encode will not
throw an exception if mem_size is larger than UINT32_MAX; instead, cast_set
will throw an exception during the conversion at line 61. Consider revising the
comment to accurately reflect this behavior.
```suggestion
// cast_set will throw an exception if mem_size exceeds
UINT32_MAX during conversion
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]