This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 49c49a4a014 branch-4.0: [fix](variant) fix read from variant sparse
column (#58302) (#58653)
49c49a4a014 is described below
commit 49c49a4a0147e22b4d2b4b0ca92e6fee997a7f2b
Author: Sun Chenyang <[email protected]>
AuthorDate: Wed Dec 3 15:06:33 2025 +0800
branch-4.0: [fix](variant) fix read from variant sparse column (#58302)
(#58653)
pick from master #58302
---
.../variant/hierarchical_data_iterator.cpp | 13 ++++-
be/src/vec/functions/function_variant_element.cpp | 13 +++--
.../vec/function/function_variant_element_test.cpp | 60 ++++++++++++++++++++++
.../data/variant_p0/variant_hirachinal.out | 6 +++
.../suites/variant_p0/variant_hirachinal.groovy | 16 +++++-
5 files changed, 102 insertions(+), 6 deletions(-)
diff --git
a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
index b44aa627ff2..dd375cb1dcc 100644
--- a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
@@ -18,6 +18,7 @@
#include "olap/rowset/segment_v2/variant/hierarchical_data_iterator.h"
#include <memory>
+#include <optional>
#include "common/status.h"
#include "io/io_common.h"
@@ -305,7 +306,11 @@ Status
HierarchicalDataIterator::_init_container(vectorized::MutableColumnPtr& c
// Return sub-path by specified prefix.
// For example, for prefix a.b:
// a.b.c.d -> c.d, a.b.c -> c
-static std::string_view get_sub_path(const std::string_view& path, const
std::string_view& prefix) {
+static std::optional<std::string_view> get_sub_path(const std::string_view&
path,
+ const std::string_view&
prefix) {
+ if (path.size() <= prefix.size() || path[prefix.size()] != '.') {
+ return std::nullopt;
+ }
return path.substr(prefix.size() + 1);
}
@@ -377,7 +382,11 @@ Status HierarchicalDataIterator::_process_sparse_column(
}
// Don't include path that is equal to the prefix.
if (path.size() != path_prefix.size()) {
- auto sub_path = get_sub_path(path, path_prefix);
+ auto sub_path_optional = get_sub_path(path,
path_prefix);
+ if (!sub_path_optional.has_value()) {
+ continue;
+ }
+ std::string_view sub_path = *sub_path_optional;
// Case 1: subcolumn already created, append this
row's value into it.
if (auto it =
subcolumns_from_sparse_column.find(sub_path);
it != subcolumns_from_sparse_column.end()) {
diff --git a/be/src/vec/functions/function_variant_element.cpp
b/be/src/vec/functions/function_variant_element.cpp
index aa2d082d156..9e37deefc2f 100644
--- a/be/src/vec/functions/function_variant_element.cpp
+++ b/be/src/vec/functions/function_variant_element.cpp
@@ -127,8 +127,11 @@ private:
// Return sub-path by specified prefix.
// For example, for prefix a.b:
// a.b.c.d -> c.d, a.b.c -> c
- static std::string_view get_sub_path(const std::string_view& path,
- const std::string_view& prefix) {
+ static std::optional<std::string_view> get_sub_path(const
std::string_view& path,
+ const
std::string_view& prefix) {
+ if (path.size() <= prefix.size() || path[prefix.size()] != '.') {
+ return std::nullopt;
+ }
return path.substr(prefix.size() + 1);
}
static Status get_element_column(const ColumnVariant& src, const
ColumnPtr& index_column,
@@ -206,7 +209,11 @@ private:
}
// Don't include path that is equal to the prefix.
if (path.size() != path_prefix.size()) {
- auto sub_path = get_sub_path(path, path_prefix);
+ auto sub_path_optional = get_sub_path(path,
path_prefix);
+ if (!sub_path_optional.has_value()) {
+ continue;
+ }
+ std::string_view sub_path = *sub_path_optional;
sparse_data_paths->insert_data(sub_path.data(),
sub_path.size());
sparse_data_values->insert_from(src_sparse_data_values,
lower_bound_index);
diff --git a/be/test/vec/function/function_variant_element_test.cpp
b/be/test/vec/function/function_variant_element_test.cpp
new file mode 100644
index 00000000000..f22735a0555
--- /dev/null
+++ b/be/test/vec/function/function_variant_element_test.cpp
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/function_variant_element.cpp"
+
+#include <gtest/gtest.h>
+
+namespace doris::vectorized {
+
+TEST(function_variant_element_test, extract_from_sparse_column) {
+ auto variant_column = ColumnVariant::create(1 /*max_subcolumns_count*/);
+ auto* variant_ptr = assert_cast<ColumnVariant*>(variant_column.get());
+
+ ColumnVariant::Subcolumn subcolumn(0, true, false);
+ Field field = Field::create_field<TYPE_STRING>("John");
+ subcolumn.insert(field);
+
+ auto [sparse_column_keys, sparse_column_values] =
+ variant_ptr->get_sparse_data_paths_and_values();
+ auto& sparse_column_offsets =
variant_ptr->serialized_sparse_column_offsets();
+ subcolumn.serialize_to_sparse_column(sparse_column_keys, "profile.age",
sparse_column_values,
+ 0);
+ subcolumn.serialize_to_sparse_column(sparse_column_keys, "profile.name",
sparse_column_values,
+ 0);
+ subcolumn.serialize_to_sparse_column(sparse_column_keys, "profile_id",
sparse_column_values, 0);
+ sparse_column_offsets.push_back(sparse_column_keys->size());
+ variant_ptr->get_subcolumn({})->insert_default();
+ variant_ptr->set_num_rows(1);
+
+ ColumnPtr result;
+ ColumnPtr index_column_ptr = ColumnString::create();
+ auto* index_column_ptr_mutable =
+
assert_cast<ColumnString*>(index_column_ptr->assume_mutable().get());
+ index_column_ptr_mutable->insert_data("profile", 7);
+ ColumnPtr index_column = ColumnConst::create(index_column_ptr, 1);
+ auto status =
+ FunctionVariantElement::get_element_column(*variant_column,
index_column, &result);
+ EXPECT_TRUE(status.ok());
+
+ auto result_ptr = assert_cast<const ColumnVariant&>(*result.get());
+ std::string result_string;
+ result_ptr.serialize_one_row_to_string(0, &result_string);
+ EXPECT_EQ(result_string, "{\"age\":\"John\",\"name\":\"John\"}");
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/regression-test/data/variant_p0/variant_hirachinal.out
b/regression-test/data/variant_p0/variant_hirachinal.out
index c23fce09221..24179544e53 100644
--- a/regression-test/data/variant_p0/variant_hirachinal.out
+++ b/regression-test/data/variant_p0/variant_hirachinal.out
@@ -56,3 +56,9 @@
-- !sql --
3 \N {"a":1,"b":2,"c":3,"d":4}
+-- !sql --
+{}
+{"age":30,"name":"John"}
+{}
+{"age":30,"name":"John"}
+
diff --git a/regression-test/suites/variant_p0/variant_hirachinal.groovy
b/regression-test/suites/variant_p0/variant_hirachinal.groovy
index cfc660255ce..8ec3fde007c 100644
--- a/regression-test/suites/variant_p0/variant_hirachinal.groovy
+++ b/regression-test/suites/variant_p0/variant_hirachinal.groovy
@@ -96,5 +96,19 @@ suite("regression_test_variant_hirachinal", "variant_type"){
qt_sql """select * from t order by a;"""
qt_sql """select * from t where v is null;"""
-
+ sql "DROP TABLE IF EXISTS ${table_name}"
+ sql """
+ CREATE TABLE ${table_name} (
+ `k` bigint NULL,
+ `v` variant<PROPERTIES ("variant_max_subcolumns_count" = "1")> NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`k`)
+ DISTRIBUTED BY HASH(`k`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ sql """insert into ${table_name} values (1, '{"a": 1}'), (2, '{"a" : 1,
"profile" : {"name" : "John", "age" : 30}, "profile_id" : 123}');"""
+ sql """insert into ${table_name} values (3, '{"a": 1}'), (4, '{"a" : 1,
"profile" : {"name" : "John", "age" : 30}, "profile2" : 123}'); """
+ qt_sql """select v['profile'] from ${table_name} order by k;"""
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]