This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push:
new 19e5a4b54f1 fix 1 (#45517)
19e5a4b54f1 is described below
commit 19e5a4b54f1ee6bb8e0673142b1839aecfb979ea
Author: lihangyu <[email protected]>
AuthorDate: Tue Dec 17 15:20:15 2024 +0800
fix 1 (#45517)
---
.../segment_v2/variant_column_writer_impl.cpp | 3 +--
be/src/vec/columns/column_object.cpp | 8 ++++++--
be/src/vec/common/schema_util.cpp | 24 ++++++++++------------
be/src/vec/common/schema_util.h | 2 +-
be/src/vec/functions/function_cast.h | 1 +
5 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
index 761cbec8c49..5fbb7433e10 100644
--- a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
+++ b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
@@ -214,8 +214,7 @@ Status VariantColumnWriterImpl::_process_sparse_column(
vectorized::ColumnObject* ptr, vectorized::OlapBlockDataConvertor*
converter,
size_t num_rows, int& column_id) {
// create sparse column writer
- TabletColumn sparse_column =
-
vectorized::schema_util::create_sparse_column(_tablet_column->unique_id());
+ TabletColumn sparse_column =
vectorized::schema_util::create_sparse_column(*_tablet_column);
ColumnWriterOptions sparse_writer_opts;
sparse_writer_opts.meta = _opts.footer->add_columns();
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index 568ed7f8bbc..2cb1e013f8c 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -2058,9 +2058,12 @@ Status ColumnObject::finalize(FinalizeMode mode) {
if (entry->data.is_root) {
continue;
}
+ if (mode != FinalizeMode::WRITE_MODE) {
+ new_subcolumns.add(entry->path, entry->data);
+ }
}
- // merge and encode sparse column
+ // caculate stats & merge and encode sparse column
if (mode == FinalizeMode::WRITE_MODE) {
// pick sparse columns
std::set<std::string_view> selected_path;
@@ -2108,6 +2111,7 @@ Status ColumnObject::finalize(FinalizeMode mode) {
remaing_subcolumns.emplace(entry->path.get_path(),
entry->data);
}
}
+ serialized_sparse_column->clear();
RETURN_IF_ERROR(serialize_sparse_columns(std::move(remaing_subcolumns)));
}
@@ -2168,7 +2172,7 @@ ColumnPtr ColumnObject::filter(const Filter& filter,
ssize_t count) const {
}
ColumnPtr ColumnObject::replicate(const IColumn::Offsets& offsets) const {
- column_match_offsets_size(num_rows, offsets.size());
+ // column_match_offsets_size(num_rows, offsets.size());
return apply_for_columns([&](const ColumnPtr column) { return
column->replicate(offsets); });
}
diff --git a/be/src/vec/common/schema_util.cpp
b/be/src/vec/common/schema_util.cpp
index 77b3299c5b5..74298d2c838 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -606,20 +606,18 @@ bool has_schema_index_diff(const TabletSchema*
new_schema, const TabletSchema* o
return new_schema_has_inverted_index != old_schema_has_inverted_index;
}
-TabletColumn create_sparse_column(int32_t parent_unique_id) {
- TColumn tcolumn;
- tcolumn.column_name = SPARSE_COLUMN_PATH;
- tcolumn.col_unique_id = parent_unique_id;
- tcolumn.column_type = TColumnType {};
- tcolumn.column_type.type = TPrimitiveType::MAP;
-
- TColumn child_tcolumn;
- tcolumn.column_type = TColumnType {};
- tcolumn.column_type.type = TPrimitiveType::STRING;
- tcolumn.children_column.push_back(child_tcolumn);
- tcolumn.children_column.push_back(child_tcolumn);
- auto res = TabletColumn {tcolumn};
+TabletColumn create_sparse_column(const TabletColumn& variant) {
+ TabletColumn res;
+ res.set_name(SPARSE_COLUMN_PATH);
+ res.set_unique_id(variant.unique_id());
+ res.set_type(FieldType::OLAP_FIELD_TYPE_MAP);
+ res.set_aggregation_method(variant.aggregation());
res.set_path_info(PathInData {SPARSE_COLUMN_PATH});
+
+ TabletColumn child_tcolumn;
+ child_tcolumn.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+ res.add_sub_column(child_tcolumn);
+ res.add_sub_column(child_tcolumn);
return res;
}
diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h
index fee6e778325..795c700e636 100644
--- a/be/src/vec/common/schema_util.h
+++ b/be/src/vec/common/schema_util.h
@@ -128,6 +128,6 @@ bool has_schema_index_diff(const TabletSchema* new_schema,
const TabletSchema* o
int32_t new_col_idx, int32_t old_col_idx);
// create ColumnMap<String, String>
-TabletColumn create_sparse_column(int32_t parent_unique_id);
+TabletColumn create_sparse_column(const TabletColumn& variant);
} // namespace doris::vectorized::schema_util
diff --git a/be/src/vec/functions/function_cast.h
b/be/src/vec/functions/function_cast.h
index 5de820dfa3a..0e7a8c495d3 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -1933,6 +1933,7 @@ private:
// set variant root column/type to from column/type
auto variant = ColumnObject::create(true /*always nullable*/);
variant->create_root(from_type, col_from->assume_mutable());
+
variant->get_sparse_column()->assume_mutable()->insert_many_defaults(input_rows_count);
block.replace_by_position(result, std::move(variant));
return Status::OK();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]