This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 39c69c766e9 [Optimize](Variant) optimize schema update performance
(#45480) (#45731)
39c69c766e9 is described below
commit 39c69c766e9b17bf3bccd89410acb6be3931ecf3
Author: lihangyu <[email protected]>
AuthorDate: Sat Dec 21 23:41:03 2024 +0800
[Optimize](Variant) optimize schema update performance (#45480) (#45731)
(#45480)
---
be/src/olap/rowset_builder.cpp | 24 +++++++++++++-----------
be/src/olap/tablet_schema.cpp | 15 +++++++++++++++
be/src/olap/tablet_schema.h | 3 +++
be/src/vec/common/schema_util.cpp | 5 ++---
4 files changed, 33 insertions(+), 14 deletions(-)
diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp
index 1929ffbb78e..c668df4bd33 100644
--- a/be/src/olap/rowset_builder.cpp
+++ b/be/src/olap/rowset_builder.cpp
@@ -327,21 +327,22 @@ Status RowsetBuilder::commit_txn() {
SCOPED_TIMER(_commit_txn_timer);
const RowsetWriterContext& rw_ctx = _rowset_writer->context();
- if (rw_ctx.tablet_schema->num_variant_columns() > 0) {
+ if (rw_ctx.tablet_schema->num_variant_columns() > 0 && _rowset->num_rows()
> 0) {
// Need to merge schema with `rw_ctx.merged_tablet_schema` in prior,
// merged schema keeps the newest merged schema for the rowset, which
is updated and merged
// during flushing segments.
if (rw_ctx.merged_tablet_schema != nullptr) {
RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.merged_tablet_schema));
+ } else {
+ // We should merge rowset schema further, in case that the
merged_tablet_schema maybe null
+ // when enable_memtable_on_sink_node is true, the
merged_tablet_schema will not be passed to
+ // the destination backend.
+ // update tablet schema when meet variant columns, before
commit_txn
+ // Eg. rowset schema: A(int), B(float), C(int), D(int)
+ // _tabelt->tablet_schema: A(bigint), B(double)
+ // => update_schema: A(bigint), B(double), C(int), D(int)
+
RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.tablet_schema));
}
- // We should merge rowset schema further, in case that the
merged_tablet_schema maybe null
- // when enable_memtable_on_sink_node is true, the merged_tablet_schema
will not be passed to
- // the destination backend.
- // update tablet schema when meet variant columns, before commit_txn
- // Eg. rowset schema: A(int), B(float), C(int), D(int)
- // _tabelt->tablet_schema: A(bigint), B(double)
- // => update_schema: A(bigint), B(double), C(int), D(int)
-
RETURN_IF_ERROR(tablet()->update_by_least_common_schema(rw_ctx.tablet_schema));
}
// Transfer ownership of `PendingRowsetGuard` to `TxnManager`
@@ -379,7 +380,6 @@ Status BaseRowsetBuilder::cancel() {
void BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id,
const
OlapTableSchemaParam* table_schema_param,
const TabletSchema&
ori_tablet_schema) {
- _tablet_schema->copy_from(ori_tablet_schema);
// find the right index id
int i = 0;
auto indexes = table_schema_param->indexes();
@@ -388,11 +388,13 @@ void
BaseRowsetBuilder::_build_current_tablet_schema(int64_t index_id,
break;
}
}
-
if (!indexes.empty() && !indexes[i]->columns.empty() &&
indexes[i]->columns[0]->unique_id() >= 0) {
+ _tablet_schema->shawdow_copy_without_columns(ori_tablet_schema);
_tablet_schema->build_current_tablet_schema(index_id,
table_schema_param->version(),
indexes[i],
ori_tablet_schema);
+ } else {
+ _tablet_schema->copy_from(ori_tablet_schema);
}
if (_tablet_schema->schema_version() > ori_tablet_schema.schema_version())
{
// After schema change, should include extracted column
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 7da0f99537a..488e9755b23 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -1047,6 +1047,21 @@ void TabletSchema::copy_from(const TabletSchema&
tablet_schema) {
_table_id = tablet_schema.table_id();
}
+void TabletSchema::shawdow_copy_without_columns(const TabletSchema&
tablet_schema) {
+ *this = tablet_schema;
+ _field_path_to_index.clear();
+ _field_name_to_index.clear();
+ _field_id_to_index.clear();
+ _num_columns = 0;
+ _num_variant_columns = 0;
+ _num_null_columns = 0;
+ _num_key_columns = 0;
+ _cols.clear();
+ _vl_field_mem_size = 0;
+ // notice : do not ref columns
+ _column_cache_handlers.clear();
+}
+
void TabletSchema::update_index_info_from(const TabletSchema& tablet_schema) {
for (auto& col : _cols) {
if (col->unique_id() < 0) {
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index c9a0d45bd9b..75b3a78e183 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -300,6 +300,8 @@ public:
// Must make sure the row column is always the last column
void add_row_column();
void copy_from(const TabletSchema& tablet_schema);
+ // lightweight copy, take care of lifecycle of TabletColumn
+ void shawdow_copy_without_columns(const TabletSchema& tablet_schema);
void update_index_info_from(const TabletSchema& tablet_schema);
std::string to_key() const;
// Don't use.
@@ -481,6 +483,7 @@ public:
private:
friend bool operator==(const TabletSchema& a, const TabletSchema& b);
friend bool operator!=(const TabletSchema& a, const TabletSchema& b);
+ TabletSchema(const TabletSchema&) = default;
void clear_column_cache_handlers();
diff --git a/be/src/vec/common/schema_util.cpp
b/be/src/vec/common/schema_util.cpp
index b373dbd1347..51a3ed8c317 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -431,9 +431,8 @@ Status get_least_common_schema(const
std::vector<TabletSchemaSPtr>& schemas,
// duplicated paths following the update_least_common_schema process.
auto build_schema_without_extracted_columns = [&](const TabletSchemaSPtr&
base_schema) {
output_schema = std::make_shared<TabletSchema>();
- output_schema->copy_from(*base_schema);
- // Merge columns from other schemas
- output_schema->clear_columns();
+ // not copy columns but only shadow copy other attributes
+ output_schema->shawdow_copy_without_columns(*base_schema);
// Get all columns without extracted columns and collect variant col
unique id
for (const TabletColumnPtr& col : base_schema->columns()) {
if (col->is_variant_type()) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]