This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push:
new e1f5c677fc0 [improve](performance) replace
serialized_sparse_column->insert_many_defaults to
serialized_sparse_column->resize (#49952)
e1f5c677fc0 is described below
commit e1f5c677fc0eb3fc7788f4941349c4e3cc6270b8
Author: lihangyu <[email protected]>
AuthorDate: Fri Apr 11 14:24:34 2025 +0800
[improve](performance) replace
serialized_sparse_column->insert_many_defaults to
serialized_sparse_column->resize (#49952)
for map type `insert_many_defaults` will do insert_default one by one
---
.../rowset/segment_v2/hierarchical_data_reader.cpp | 15 +++++++++------
be/src/vec/columns/column_object.cpp | 18 ++++++------------
be/src/vec/columns/column_object.h | 6 ++++++
be/src/vec/data_types/data_type_object.cpp | 3 ++-
4 files changed, 23 insertions(+), 19 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
index 185a6d82422..f0af8f77894 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
@@ -318,19 +318,23 @@ Status
HierarchicalDataReader::_process_sparse_column(vectorized::ColumnObject&
using namespace vectorized;
container_variant.clear_sparse_column();
if (!_sparse_column_reader) {
-
container_variant.get_sparse_column()->assume_mutable()->insert_many_defaults(nrows);
+ container_variant.get_sparse_column()->assume_mutable()->resize(
+ container_variant.get_sparse_column()->size() + nrows);
+ ENABLE_CHECK_CONSISTENCY(&container_variant);
return Status::OK();
}
// process sparse column
if (_path.get_parts().empty()) {
// directly use sparse column if access root
container_variant.set_sparse_column(_sparse_column_reader->column->get_ptr());
+ ENABLE_CHECK_CONSISTENCY(&container_variant);
} else {
const auto& offsets =
assert_cast<const
ColumnMap&>(*_sparse_column_reader->column).get_offsets();
/// Check if there is no data in shared data in current range.
if (offsets.back() == offsets[-1]) {
-
container_variant.get_sparse_column()->assume_mutable()->insert_many_defaults(nrows);
+ container_variant.get_sparse_column()->assume_mutable()->resize(
+ container_variant.get_sparse_column()->size() + nrows);
} else {
// Read for variant sparse column
// Example path: a.b
@@ -402,6 +406,7 @@ Status
HierarchicalDataReader::_process_sparse_column(vectorized::ColumnObject&
}
}
}
+ ENABLE_CHECK_CONSISTENCY(&container_variant);
return Status::OK();
}
@@ -474,10 +479,8 @@ void
SparseColumnExtractReader::_fill_path_column(vectorized::MutableColumnPtr&
*var.get_subcolumn({}) /*root*/, null_map, StringRef
{_path.data(), _path.size()},
_sparse_column->get_ptr(), 0, _sparse_column->size());
var.incr_num_rows(_sparse_column->size());
-
var.get_sparse_column()->assume_mutable()->insert_many_defaults(_sparse_column->size());
-#ifndef NDEBUG
- var.check_consistency();
-#endif
+ var.get_sparse_column()->assume_mutable()->resize(var.rows());
+ ENABLE_CHECK_CONSISTENCY(&var);
// _sparse_column->clear();
}
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index 1b607a7e87e..fd71c0ff967 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -139,12 +139,6 @@ size_t get_number_of_dimensions(const IDataType& type) {
}
} // namespace
-#ifdef NDEBUG
-#define ENABLE_CHECK_CONSISTENCY (void)/* Nothing */
-#else
-#define ENABLE_CHECK_CONSISTENCY(this) (this)->check_consistency()
-#endif
-
// current nested level is 2, inside column object
constexpr int CURRENT_SERIALIZE_NESTING_LEVEL = 2;
@@ -662,7 +656,7 @@ ColumnObject::ColumnObject(int32_t max_subcolumns_count,
DataTypePtr root_type,
_max_subcolumns_count(max_subcolumns_count) {
subcolumns.create_root(
Subcolumn(std::move(root_column), root_type, is_nullable, true
/*root*/));
- serialized_sparse_column->insert_many_defaults(num_rows);
+ serialized_sparse_column->resize(num_rows);
ENABLE_CHECK_CONSISTENCY(this);
}
@@ -677,7 +671,7 @@ ColumnObject::ColumnObject(int32_t max_subcolumns_count,
Subcolumns&& subcolumns
"subcolumns count: {}",
max_subcolumns_count, subcolumns_.size());
}
- serialized_sparse_column->insert_many_defaults(num_rows);
+ serialized_sparse_column->resize(num_rows);
}
ColumnObject::ColumnObject(int32_t max_subcolumns_count, size_t size)
@@ -822,7 +816,7 @@ void ColumnObject::insert_many_defaults(size_t length) {
for (auto& entry : subcolumns) {
entry->data.insert_many_defaults(length);
}
- serialized_sparse_column->insert_many_defaults(length);
+ serialized_sparse_column->resize(num_rows + length);
num_rows += length;
ENABLE_CHECK_CONSISTENCY(this);
}
@@ -1189,7 +1183,7 @@ void
ColumnObject::insert_from_sparse_column_and_fill_remaing_dense_column(
/// If no src subcolumns should be inserted into sparse column, insert
defaults.
if (sorted_src_subcolumn_for_sparse_column.empty()) {
- serialized_sparse_column->insert_many_defaults(length);
+ serialized_sparse_column->resize(num_rows + length);
} else {
// Otherwise insert required src dense columns into sparse column.
auto [sparse_column_keys, sparse_column_values] =
get_sparse_data_paths_and_values();
@@ -1757,7 +1751,7 @@ Status ColumnObject::serialize_sparse_columns(
CHECK(is_finalized());
if (remaing_subcolumns.empty()) {
- serialized_sparse_column->insert_many_defaults(num_rows);
+ serialized_sparse_column->resize(num_rows);
return Status::OK();
}
serialized_sparse_column->reserve(num_rows);
@@ -2052,7 +2046,7 @@ void ColumnObject::create_root(const DataTypePtr& type,
MutableColumnPtr&& colum
}
add_sub_column({}, std::move(column), type);
if (serialized_sparse_column->empty()) {
- serialized_sparse_column->insert_many_defaults(num_rows);
+ serialized_sparse_column->resize(num_rows);
}
ENABLE_CHECK_CONSISTENCY(this);
}
diff --git a/be/src/vec/columns/column_object.h
b/be/src/vec/columns/column_object.h
index 8a76151bba6..2747cbc89d7 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -84,6 +84,12 @@ struct FieldInfo {
int precision = 0;
};
+#ifdef NDEBUG
+#define ENABLE_CHECK_CONSISTENCY (void)/* Nothing */
+#else
+#define ENABLE_CHECK_CONSISTENCY(this) (this)->check_consistency()
+#endif
+
/** A column that represents object with dynamic set of subcolumns.
* Subcolumns are identified by paths in document and are stored in
* a trie-like structure. ColumnObject is not suitable for writing into tables
diff --git a/be/src/vec/data_types/data_type_object.cpp
b/be/src/vec/data_types/data_type_object.cpp
index 0ea10460cf5..457d66adc64 100644
--- a/be/src/vec/data_types/data_type_object.cpp
+++ b/be/src/vec/data_types/data_type_object.cpp
@@ -207,7 +207,8 @@ const char* DataTypeObject::deserialize(const char* buf,
MutableColumnPtr* colum
be_exec_version);
column_object->set_sparse_column(std::move(sparse_column));
} else {
-
column_object->get_sparse_column()->assume_mutable()->insert_many_defaults(num_rows);
+ column_object->get_sparse_column()->assume_mutable()->resize(
+ column_object->get_sparse_column()->size() + num_rows);
}
if (!root_added && column_object->get_subcolumn({})) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]