This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/variant-sparse by this push:
new 989aa0f4280 fix 4 (#45601)
989aa0f4280 is described below
commit 989aa0f4280270bba78542844af491a07a1d67ca
Author: lihangyu <[email protected]>
AuthorDate: Wed Dec 18 19:18:28 2024 +0800
fix 4 (#45601)
---
be/src/olap/rowset/segment_v2/column_reader.cpp | 174 ++++++++++-----------
be/src/olap/rowset/segment_v2/column_reader.h | 72 ++++-----
.../rowset/segment_v2/hierarchical_data_reader.cpp | 31 ++--
.../rowset/segment_v2/hierarchical_data_reader.h | 1 +
be/src/olap/rowset/segment_v2/segment.cpp | 38 +++--
be/src/olap/rowset/segment_v2/segment.h | 5 +-
.../segment_v2/variant_column_writer_impl.cpp | 15 +-
be/src/vec/columns/column_object.cpp | 11 +-
be/src/vec/columns/column_object.h | 14 +-
be/src/vec/data_types/data_type_object.cpp | 17 ++
10 files changed, 194 insertions(+), 184 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 745ff3d93a3..2f303999aea 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -282,18 +282,6 @@ Status VariantColumnReader::init(const
ColumnReaderOptions& opts, const SegmentF
io::FileReaderSPtr file_reader) {
// init sub columns
_subcolumn_readers = std::make_unique<SubcolumnColumnReaders>();
- std::unordered_map<vectorized::PathInData, uint32_t,
vectorized::PathInData::Hash>
- column_path_to_footer_ordinal;
- for (uint32_t ordinal = 0; ordinal < footer.columns().size(); ++ordinal) {
- const auto& column_pb = footer.columns(ordinal);
- // column path for accessing subcolumns of variant
- if (column_pb.has_column_path_info()) {
- vectorized::PathInData path;
- path.from_protobuf(column_pb.column_path_info());
- column_path_to_footer_ordinal.emplace(path, ordinal);
- }
- }
-
const ColumnMetaPB& self_column_pb = footer.columns(column_id);
for (const ColumnMetaPB& column_pb : footer.columns()) {
if (column_pb.unique_id() != self_column_pb.unique_id()) {
@@ -311,23 +299,25 @@ Status VariantColumnReader::init(const
ColumnReaderOptions& opts, const SegmentF
&_sparse_column_reader));
continue;
}
- // init subcolumns
auto relative_path = path.copy_pop_front();
+ auto get_data_type_fn = [&]() {
+ if (relative_path.empty()) {
+ return
make_nullable(std::make_unique<vectorized::ColumnObject::MostCommonType>());
+ }
+ return
vectorized::DataTypeFactory::instance().create_data_type(column_pb);
+ };
+ // init subcolumns
if (_subcolumn_readers->get_root() == nullptr) {
_subcolumn_readers->create_root(SubcolumnReader {nullptr,
nullptr});
}
if (relative_path.empty()) {
// root column
-
_subcolumn_readers->get_mutable_root()->modify_to_scalar(SubcolumnReader {
- std::move(reader),
-
vectorized::DataTypeFactory::instance().create_data_type(column_pb)});
+ _subcolumn_readers->get_mutable_root()->modify_to_scalar(
+ SubcolumnReader {std::move(reader), get_data_type_fn()});
} else {
// check the root is already a leaf node
- _subcolumn_readers->add(
- relative_path,
- SubcolumnReader {
- std::move(reader),
-
vectorized::DataTypeFactory::instance().create_data_type(column_pb)});
+ _subcolumn_readers->add(relative_path,
+ SubcolumnReader {std::move(reader),
get_data_type_fn()});
}
}
@@ -876,7 +866,9 @@ Status ColumnReader::new_iterator(ColumnIterator**
iterator) {
return new_map_iterator(iterator);
}
case FieldType::OLAP_FIELD_TYPE_VARIANT: {
- *iterator = new VariantRootColumnIterator(new
FileColumnIterator(this));
+ // read from root data
+ // *iterator = new VariantRootColumnIterator(new
FileColumnIterator(this));
+ *iterator = new FileColumnIterator(this);
return Status::OK();
}
default:
@@ -1738,75 +1730,75 @@ void
DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnP
}
}
-Status VariantRootColumnIterator::_process_root_column(
- vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr&
root_column,
- const vectorized::DataTypePtr& most_common_type) {
- auto& obj =
- dst->is_nullable()
- ? assert_cast<vectorized::ColumnObject&>(
-
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
- : assert_cast<vectorized::ColumnObject&>(*dst);
-
- // fill nullmap
- if (root_column->is_nullable() && dst->is_nullable()) {
- vectorized::ColumnUInt8& dst_null_map =
-
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
- vectorized::ColumnUInt8& src_null_map =
-
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
- dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size());
- }
-
- // add root column to a tmp object column
- auto tmp = vectorized::ColumnObject::create(true, false);
- auto& tmp_obj = assert_cast<vectorized::ColumnObject&>(*tmp);
- tmp_obj.add_sub_column({}, std::move(root_column), most_common_type);
-
- // merge tmp object column to dst
- obj.insert_range_from(*tmp, 0, tmp->size());
-
- // finalize object if needed
- if (!obj.is_finalized()) {
- obj.finalize();
- }
-
-#ifndef NDEBUG
- obj.check_consistency();
-#endif
-
- return Status::OK();
-}
-
-Status VariantRootColumnIterator::next_batch(size_t* n,
vectorized::MutableColumnPtr& dst,
- bool* has_null) {
- // read root column
- auto& obj =
- dst->is_nullable()
- ? assert_cast<vectorized::ColumnObject&>(
-
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
- : assert_cast<vectorized::ColumnObject&>(*dst);
-
- auto most_common_type = obj.get_most_common_type();
- auto root_column = most_common_type->create_column();
- RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));
-
- return _process_root_column(dst, root_column, most_common_type);
-}
-
-Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const
size_t count,
- vectorized::MutableColumnPtr&
dst) {
- // read root column
- auto& obj =
- dst->is_nullable()
- ? assert_cast<vectorized::ColumnObject&>(
-
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
- : assert_cast<vectorized::ColumnObject&>(*dst);
-
- auto most_common_type = obj.get_most_common_type();
- auto root_column = most_common_type->create_column();
- RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count, root_column));
-
- return _process_root_column(dst, root_column, most_common_type);
-}
+// Status VariantRootColumnIterator::_process_root_column(
+// vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr&
root_column,
+// const vectorized::DataTypePtr& most_common_type) {
+// auto& obj =
+// dst->is_nullable()
+// ? assert_cast<vectorized::ColumnObject&>(
+//
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
+// : assert_cast<vectorized::ColumnObject&>(*dst);
+//
+// // fill nullmap
+// if (root_column->is_nullable() && dst->is_nullable()) {
+// vectorized::ColumnUInt8& dst_null_map =
+//
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
+// vectorized::ColumnUInt8& src_null_map =
+//
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
+// dst_null_map.insert_range_from(src_null_map, 0,
src_null_map.size());
+// }
+//
+// // add root column to a tmp object column
+// auto tmp = vectorized::ColumnObject::create(true, false);
+// auto& tmp_obj = assert_cast<vectorized::ColumnObject&>(*tmp);
+// tmp_obj.add_sub_column({}, std::move(root_column), most_common_type);
+//
+// // merge tmp object column to dst
+// obj.insert_range_from(*tmp, 0, tmp_obj.rows());
+//
+// // finalize object if needed
+// if (!obj.is_finalized()) {
+// obj.finalize();
+// }
+//
+// #ifndef NDEBUG
+// obj.check_consistency();
+// #endif
+//
+// return Status::OK();
+// }
+//
+// Status VariantRootColumnIterator::next_batch(size_t* n,
vectorized::MutableColumnPtr& dst,
+// bool* has_null) {
+// // read root column
+// auto& obj =
+// dst->is_nullable()
+// ? assert_cast<vectorized::ColumnObject&>(
+//
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
+// : assert_cast<vectorized::ColumnObject&>(*dst);
+//
+// auto most_common_type = obj.get_most_common_type();
+// auto root_column = most_common_type->create_column();
+// RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));
+//
+// return _process_root_column(dst, root_column, most_common_type);
+// }
+//
+// Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids,
const size_t count,
+//
vectorized::MutableColumnPtr& dst) {
+// // read root column
+// auto& obj =
+// dst->is_nullable()
+// ? assert_cast<vectorized::ColumnObject&>(
+//
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
+// : assert_cast<vectorized::ColumnObject&>(*dst);
+//
+// auto most_common_type = obj.get_most_common_type();
+// auto root_column = most_common_type->create_column();
+// RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count,
root_column));
+//
+// return _process_root_column(dst, root_column, most_common_type);
+// }
Status DefaultNestedColumnIterator::next_batch(size_t* n,
vectorized::MutableColumnPtr& dst) {
bool has_null = false;
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h
b/be/src/olap/rowset/segment_v2/column_reader.h
index d61393e820c..189435c2095 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -216,7 +216,7 @@ public:
void disable_index_meta_cache() { _use_index_page_cache = false; }
- FieldType get_meta_type() { return _meta_type; }
+ virtual FieldType get_meta_type() { return _meta_type; }
private:
ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB& meta,
uint64_t num_rows,
@@ -309,6 +309,8 @@ public:
~VariantColumnReader() override = default;
+ FieldType get_meta_type() override { return
FieldType::OLAP_FIELD_TYPE_VARIANT; }
+
private:
std::unique_ptr<SubcolumnColumnReaders> _subcolumn_readers;
std::unique_ptr<ColumnReader> _sparse_column_reader;
@@ -661,40 +663,40 @@ private:
int32_t _segment_id = 0;
};
-class VariantRootColumnIterator : public ColumnIterator {
-public:
- VariantRootColumnIterator() = delete;
-
- explicit VariantRootColumnIterator(FileColumnIterator* iter) {
_inner_iter.reset(iter); }
-
- ~VariantRootColumnIterator() override = default;
-
- Status init(const ColumnIteratorOptions& opts) override { return
_inner_iter->init(opts); }
-
- Status seek_to_first() override { return _inner_iter->seek_to_first(); }
-
- Status seek_to_ordinal(ordinal_t ord_idx) override {
- return _inner_iter->seek_to_ordinal(ord_idx);
- }
-
- Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) {
- bool has_null;
- return next_batch(n, dst, &has_null);
- }
-
- Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool*
has_null) override;
-
- Status read_by_rowids(const rowid_t* rowids, const size_t count,
- vectorized::MutableColumnPtr& dst) override;
-
- ordinal_t get_current_ordinal() const override { return
_inner_iter->get_current_ordinal(); }
-
-private:
- Status _process_root_column(vectorized::MutableColumnPtr& dst,
- vectorized::MutableColumnPtr& root_column,
- const vectorized::DataTypePtr&
most_common_type);
- std::unique_ptr<FileColumnIterator> _inner_iter;
-};
+// class VariantRootColumnIterator : public ColumnIterator {
+// public:
+// VariantRootColumnIterator() = delete;
+//
+// explicit VariantRootColumnIterator(FileColumnIterator* iter) {
_inner_iter.reset(iter); }
+//
+// ~VariantRootColumnIterator() override = default;
+//
+// Status init(const ColumnIteratorOptions& opts) override { return
_inner_iter->init(opts); }
+//
+// Status seek_to_first() override { return _inner_iter->seek_to_first(); }
+//
+// Status seek_to_ordinal(ordinal_t ord_idx) override {
+// return _inner_iter->seek_to_ordinal(ord_idx);
+// }
+//
+// Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) {
+// bool has_null;
+// return next_batch(n, dst, &has_null);
+// }
+//
+// Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool*
has_null) override;
+//
+// Status read_by_rowids(const rowid_t* rowids, const size_t count,
+// vectorized::MutableColumnPtr& dst) override;
+//
+// ordinal_t get_current_ordinal() const override { return
_inner_iter->get_current_ordinal(); }
+//
+// private:
+// Status _process_root_column(vectorized::MutableColumnPtr& dst,
+// vectorized::MutableColumnPtr& root_column,
+// const vectorized::DataTypePtr&
most_common_type);
+// std::unique_ptr<FileColumnIterator> _inner_iter;
+// };
// This iterator is used to read default value column
class DefaultValueColumnIterator : public ColumnIterator {
diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
index 2b8e58d47f1..ca25b230bce 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
@@ -239,15 +239,17 @@ Status
HierarchicalDataReader::_init_container(vectorized::MutableColumnPtr& con
// add root first
if (_path.get_parts().empty() && _root_reader) {
- auto& root_var =
- _root_reader->column->is_nullable()
- ? assert_cast<vectorized::ColumnObject&>(
-
assert_cast<vectorized::ColumnNullable&>(*_root_reader->column)
- .get_nested_column())
- :
assert_cast<vectorized::ColumnObject&>(*_root_reader->column);
- auto column = root_var.get_root();
- auto type = root_var.get_root_type();
- container_variant.add_sub_column({}, std::move(column), type);
+ // auto& root_var =
+ // _root_reader->column->is_nullable()
+ // ? assert_cast<vectorized::ColumnObject&>(
+ //
assert_cast<vectorized::ColumnNullable&>(*_root_reader->column)
+ // .get_nested_column())
+ // :
assert_cast<vectorized::ColumnObject&>(*_root_reader->column);
+ // auto column = root_var.get_root();
+ // auto type = root_var.get_root_type();
+ MutableColumnPtr column = _root_reader->column->get_ptr();
+ container_variant.add_sub_column({}, std::move(column),
+ ColumnObject::get_most_common_type());
}
// parent path -> subcolumns
std::map<PathInData, PathsWithColumnAndType> nested_subcolumns;
@@ -361,7 +363,9 @@ Status
HierarchicalDataReader::_init_null_map_and_clear_columns(
return Status::OK();
}));
container->clear();
- _sparse_column_reader->column->clear();
+ if (_sparse_column_reader) {
+ _sparse_column_reader->column->clear();
+ }
if (_root_reader) {
if (_root_reader->column->is_nullable()) {
// fill nullmap
@@ -372,13 +376,8 @@ Status
HierarchicalDataReader::_init_null_map_and_clear_columns(
dst_null_map.insert_range_from(src_null_map, 0,
src_null_map.size());
// clear nullmap and inner data
src_null_map.clear();
- assert_cast<ColumnObject&>(
-
assert_cast<ColumnNullable&>(*_root_reader->column).get_nested_column())
- .clear_column_data();
- } else {
- auto& root_column =
assert_cast<ColumnObject&>(*_root_reader->column);
- root_column.clear_column_data();
}
+ _root_reader->column->clear();
} else {
if (dst->is_nullable()) {
// No nullable info exist in hirearchical data, fill nullmap with
all none null
diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
index 5d58f666f62..83dab269dfc 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
@@ -22,6 +22,7 @@
#include <unordered_map>
#include <utility>
+#include "common/exception.h"
#include "common/status.h"
#include "io/io_common.h"
#include "olap/field.h"
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 441e839e6ef..ededa493018 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -201,22 +201,23 @@ Status Segment::_open() {
// 0.01 comes from PrimaryKeyIndexBuilder::init
_meta_mem_usage += BloomFilter::optimal_bit_num(_num_rows, 0.01) / 8;
- uint32_t ordinal = 0;
- for (const auto& column_meta : _footer_pb->columns()) {
- // unique_id < 0 means this column is extracted column from variant
- if (static_cast<int>(column_meta.unique_id()) >= 0) {
- _column_id_to_footer_ordinal[column_meta.unique_id()] = ordinal++;
+ // collec variant statistics
+ for (const auto& column_pb : _footer_pb->columns()) {
+ if (column_pb.has_variant_statistics()) {
+ _variant_column_stats.try_emplace(column_pb.unique_id(),
+ column_pb.variant_statistics());
}
}
+
return Status::OK();
}
-const ColumnMetaPB* Segment::get_column_meta(int32_t unique_id) const {
- auto it = _column_id_to_footer_ordinal.find(unique_id);
- if (it == _column_id_to_footer_ordinal.end()) {
+const VariantStatisticsPB* Segment::get_stats(int32_t unique_id) const {
+ auto it = _variant_column_stats.find(unique_id);
+ if (it == _variant_column_stats.end()) {
return nullptr;
}
- return &_footer_pb->columns(it->second);
+ return &it->second;
}
Status Segment::_open_inverted_index() {
@@ -570,8 +571,9 @@ Status Segment::healthy_status() {
vectorized::DataTypePtr Segment::get_data_type_of(const ColumnIdentifier&
identifier,
bool read_flat_leaves) const
{
// Path has higher priority
- if (identifier.path != nullptr && !identifier.path->empty()) {
- auto relative_path = identifier.path->copy_pop_front();
+ auto relative_path = identifier.path != nullptr ?
identifier.path->copy_pop_front()
+ : vectorized::PathInData();
+ if (!relative_path.empty()) {
int32_t unique_id =
identifier.unique_id > 0 ? identifier.unique_id :
identifier.parent_unique_id;
const auto* node = _column_readers.contains(unique_id)
@@ -605,11 +607,17 @@ Status Segment::_create_column_readers_once() {
}
Status Segment::_create_column_readers(const SegmentFooterPB& footer) {
+ // unique_id -> idx in footer.columns()
+ std::unordered_map<int32_t, uint32_t> column_id_to_footer_ordinal;
+ uint32_t ordinal = 0;
+ for (const auto& column_meta : _footer_pb->columns()) {
+ column_id_to_footer_ordinal.try_emplace(column_meta.unique_id(),
ordinal++);
+ }
// init by unique_id
for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns();
++ordinal) {
const auto& column = _tablet_schema->column(ordinal);
- auto iter = _column_id_to_footer_ordinal.find(column.unique_id());
- if (iter == _column_id_to_footer_ordinal.end()) {
+ auto iter = column_id_to_footer_ordinal.find(column.unique_id());
+ if (iter == column_id_to_footer_ordinal.end()) {
continue;
}
@@ -796,8 +804,8 @@ Status Segment::new_column_iterator(const TabletColumn&
tablet_column,
// }
// For compability reason unique_id may less than 0 for variant extracted
column
- int32_t unique_id = tablet_column.unique_id() > 0 ?
tablet_column.unique_id()
- :
tablet_column.parent_unique_id();
+ int32_t unique_id = tablet_column.unique_id() >= 0 ?
tablet_column.unique_id()
+ :
tablet_column.parent_unique_id();
// init default iterator
if (!_column_readers.contains(unique_id)) {
RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index 877f74ae1c3..1c7b9427163 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -208,7 +208,7 @@ public:
const TabletSchemaSPtr& tablet_schema() { return _tablet_schema; }
- const ColumnMetaPB* get_column_meta(int32_t unique_id) const;
+ const VariantStatisticsPB* get_stats(int32_t unique_id) const;
private:
DISALLOW_COPY_AND_ASSIGN(Segment);
@@ -288,8 +288,7 @@ private:
int _be_exec_version = BeExecVersionManager::get_newest_version();
OlapReaderStatistics* _pk_index_load_stats = nullptr;
- // unique_id -> idx in footer.columns()
- std::unordered_map<int32_t, uint32_t> _column_id_to_footer_ordinal;
+ std::unordered_map<int32_t, VariantStatisticsPB> _variant_column_stats;
};
} // namespace segment_v2
diff --git a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
index 5fbb7433e10..a3671f3afd3 100644
--- a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
+++ b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp
@@ -68,22 +68,19 @@ Status
VariantColumnWriterImpl::_get_subcolumn_paths_from_stats(std::set<std::st
RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
std::static_pointer_cast<BetaRowset>(reader->rowset()),
&segment_cache));
for (const auto& segment : segment_cache.get_segments()) {
- const auto* column_meta_pb =
segment->get_column_meta(_tablet_column->unique_id());
- if (!column_meta_pb) {
+ const VariantStatisticsPB* source_statistics =
+ segment->get_stats(_tablet_column->unique_id());
+ if (!source_statistics) {
continue;
}
- if (!column_meta_pb->has_variant_statistics()) {
- continue;
- }
- const VariantStatisticsPB& source_statistics =
column_meta_pb->variant_statistics();
- for (const auto& [path, size] :
source_statistics.subcolumn_non_null_size()) {
+ for (const auto& [path, size] :
source_statistics->subcolumn_non_null_size()) {
auto it = path_to_total_number_of_non_null_values.find(path);
if (it == path_to_total_number_of_non_null_values.end()) {
it = path_to_total_number_of_non_null_values.emplace(path,
0).first;
}
it->second += size;
}
- for (const auto& [path, size] :
source_statistics.sparse_column_non_null_size()) {
+ for (const auto& [path, size] :
source_statistics->sparse_column_non_null_size()) {
auto it = path_to_total_number_of_non_null_values.find(path);
if (it == path_to_total_number_of_non_null_values.end()) {
it = path_to_total_number_of_non_null_values.emplace(path,
0).first;
@@ -256,7 +253,7 @@ Status VariantColumnWriterImpl::_process_sparse_column(
}
void VariantStatistics::to_pb(VariantStatisticsPB* stats) const {
- for (const auto& [path, value] : _sparse_column_non_null_size) {
+ for (const auto& [path, value] : _subcolumns_non_null_size) {
stats->mutable_subcolumn_non_null_size()->emplace(path.to_string(),
value);
}
for (const auto& [path, value] : _sparse_column_non_null_size) {
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index f234ba7bfa4..eb397e85a32 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -2231,12 +2231,6 @@ void ColumnObject::clear() {
_prev_positions.clear();
}
-void ColumnObject::create_root() {
- auto type = is_nullable ? make_nullable(std::make_shared<MostCommonType>())
- : std::make_shared<MostCommonType>();
- add_sub_column({}, type->create_column(), type);
-}
-
void ColumnObject::create_root(const DataTypePtr& type, MutableColumnPtr&&
column) {
if (num_rows == 0) {
num_rows = column->size();
@@ -2244,9 +2238,8 @@ void ColumnObject::create_root(const DataTypePtr& type,
MutableColumnPtr&& colum
add_sub_column({}, std::move(column), type);
}
-DataTypePtr ColumnObject::get_most_common_type() const {
- auto type = is_nullable ? make_nullable(std::make_shared<MostCommonType>())
- : std::make_shared<MostCommonType>();
+const DataTypePtr& ColumnObject::get_most_common_type() {
+ static auto type = make_nullable(std::make_shared<MostCommonType>());
return type;
}
diff --git a/be/src/vec/columns/column_object.h
b/be/src/vec/columns/column_object.h
index 86ba60fffce..647516f97cd 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -46,6 +46,7 @@
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_jsonb.h"
+#include "vec/data_types/data_type_map.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/serde/data_type_serde.h"
#include "vec/io/reader_buffer.h"
@@ -307,15 +308,10 @@ public:
// ensure root node is a certain type
void ensure_root_node_type(const DataTypePtr& type);
- // create jsonb root if missing
- // notice: should only using in VariantRootColumnIterator
- // since some datastructures(sparse columns are schema on read
- void create_root();
-
// create root with type and column if missing
void create_root(const DataTypePtr& type, MutableColumnPtr&& column);
- DataTypePtr get_most_common_type() const;
+ static const DataTypePtr& get_most_common_type();
// root is null or type nothing
bool is_null_root() const;
@@ -377,6 +373,12 @@ public:
vectorized::ColumnArray::ColumnOffsets::create());
}
+ static const DataTypePtr& get_sparse_column_type() {
+ static DataTypePtr type =
std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(),
+
std::make_shared<DataTypeString>());
+ return type;
+ }
+
void set_sparse_column(ColumnPtr column) { serialized_sparse_column =
column; }
Status finalize(FinalizeMode mode);
diff --git a/be/src/vec/data_types/data_type_object.cpp
b/be/src/vec/data_types/data_type_object.cpp
index 0c795e542b0..5829554d118 100644
--- a/be/src/vec/data_types/data_type_object.cpp
+++ b/be/src/vec/data_types/data_type_object.cpp
@@ -30,6 +30,7 @@
#include <vector>
#include "agent/be_exec_version_manager.h"
+#include "vec/columns/column.h"
#include "vec/columns/column_object.h"
#include "vec/common/assert_cast.h"
#include "vec/common/typeid_cast.h"
@@ -84,6 +85,11 @@ int64_t
DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column,
size += sizeof(uint32_t);
}
+ // sparse column
+ // TODO make compability with sparse column
+ size +=
ColumnObject::get_sparse_column_type()->get_uncompressed_serialized_bytes(
+ *column_object.get_sparse_column(), be_exec_version);
+
return size;
}
@@ -134,6 +140,11 @@ char* DataTypeObject::serialize(const IColumn& column,
char* buf, int be_exec_ve
buf += sizeof(uint32_t);
}
+ // serialize sparse column
+ // TODO make compability with sparse column
+ buf =
ColumnObject::get_sparse_column_type()->serialize(*column_object.get_sparse_column(),
buf,
+ be_exec_version);
+
return buf;
}
@@ -175,6 +186,12 @@ const char* DataTypeObject::deserialize(const char* buf,
MutableColumnPtr* colum
buf += sizeof(uint32_t);
}
+ // deserialize sparse column
+ // TODO make compability with sparse column
+ MutableColumnPtr sparse_column =
ColumnObject::get_sparse_column_type()->create_column();
+ buf = ColumnObject::get_sparse_column_type()->deserialize(buf,
&sparse_column, be_exec_version);
+ column_object->set_sparse_column(std::move(sparse_column));
+
column_object->finalize();
#ifndef NDEBUG
// DCHECK size
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]