This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 49c49a4a014 branch-4.0: [fix](variant) fix read from variant sparse 
column (#58302) (#58653)
49c49a4a014 is described below

commit 49c49a4a0147e22b4d2b4b0ca92e6fee997a7f2b
Author: Sun Chenyang <[email protected]>
AuthorDate: Wed Dec 3 15:06:33 2025 +0800

    branch-4.0: [fix](variant) fix read from variant sparse column (#58302) 
(#58653)
    
    pick from master #58302
---
 .../variant/hierarchical_data_iterator.cpp         | 13 ++++-
 be/src/vec/functions/function_variant_element.cpp  | 13 +++--
 .../vec/function/function_variant_element_test.cpp | 60 ++++++++++++++++++++++
 .../data/variant_p0/variant_hirachinal.out         |  6 +++
 .../suites/variant_p0/variant_hirachinal.groovy    | 16 +++++-
 5 files changed, 102 insertions(+), 6 deletions(-)

diff --git 
a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp 
b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
index b44aa627ff2..dd375cb1dcc 100644
--- a/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/variant/hierarchical_data_iterator.cpp
@@ -18,6 +18,7 @@
 #include "olap/rowset/segment_v2/variant/hierarchical_data_iterator.h"
 
 #include <memory>
+#include <optional>
 
 #include "common/status.h"
 #include "io/io_common.h"
@@ -305,7 +306,11 @@ Status 
HierarchicalDataIterator::_init_container(vectorized::MutableColumnPtr& c
 // Return sub-path by specified prefix.
 // For example, for prefix a.b:
 // a.b.c.d -> c.d, a.b.c -> c
-static std::string_view get_sub_path(const std::string_view& path, const 
std::string_view& prefix) {
+static std::optional<std::string_view> get_sub_path(const std::string_view& 
path,
+                                                    const std::string_view& 
prefix) {
+    if (path.size() <= prefix.size() || path[prefix.size()] != '.') {
+        return std::nullopt;
+    }
     return path.substr(prefix.size() + 1);
 }
 
@@ -377,7 +382,11 @@ Status HierarchicalDataIterator::_process_sparse_column(
                     }
                     // Don't include path that is equal to the prefix.
                     if (path.size() != path_prefix.size()) {
-                        auto sub_path = get_sub_path(path, path_prefix);
+                        auto sub_path_optional = get_sub_path(path, 
path_prefix);
+                        if (!sub_path_optional.has_value()) {
+                            continue;
+                        }
+                        std::string_view sub_path = *sub_path_optional;
                         // Case 1: subcolumn already created, append this 
row's value into it.
                         if (auto it = 
subcolumns_from_sparse_column.find(sub_path);
                             it != subcolumns_from_sparse_column.end()) {
diff --git a/be/src/vec/functions/function_variant_element.cpp 
b/be/src/vec/functions/function_variant_element.cpp
index aa2d082d156..9e37deefc2f 100644
--- a/be/src/vec/functions/function_variant_element.cpp
+++ b/be/src/vec/functions/function_variant_element.cpp
@@ -127,8 +127,11 @@ private:
     // Return sub-path by specified prefix.
     // For example, for prefix a.b:
     // a.b.c.d -> c.d, a.b.c -> c
-    static std::string_view get_sub_path(const std::string_view& path,
-                                         const std::string_view& prefix) {
+    static std::optional<std::string_view> get_sub_path(const 
std::string_view& path,
+                                                        const 
std::string_view& prefix) {
+        if (path.size() <= prefix.size() || path[prefix.size()] != '.') {
+            return std::nullopt;
+        }
         return path.substr(prefix.size() + 1);
     }
     static Status get_element_column(const ColumnVariant& src, const 
ColumnPtr& index_column,
@@ -206,7 +209,11 @@ private:
                         }
                         // Don't include path that is equal to the prefix.
                         if (path.size() != path_prefix.size()) {
-                            auto sub_path = get_sub_path(path, path_prefix);
+                            auto sub_path_optional = get_sub_path(path, 
path_prefix);
+                            if (!sub_path_optional.has_value()) {
+                                continue;
+                            }
+                            std::string_view sub_path = *sub_path_optional;
                             sparse_data_paths->insert_data(sub_path.data(), 
sub_path.size());
                             
sparse_data_values->insert_from(src_sparse_data_values,
                                                             lower_bound_index);
diff --git a/be/test/vec/function/function_variant_element_test.cpp 
b/be/test/vec/function/function_variant_element_test.cpp
new file mode 100644
index 00000000000..f22735a0555
--- /dev/null
+++ b/be/test/vec/function/function_variant_element_test.cpp
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/function_variant_element.cpp"
+
+#include <gtest/gtest.h>
+
+namespace doris::vectorized {
+
+TEST(function_variant_element_test, extract_from_sparse_column) {
+    auto variant_column = ColumnVariant::create(1 /*max_subcolumns_count*/);
+    auto* variant_ptr = assert_cast<ColumnVariant*>(variant_column.get());
+
+    ColumnVariant::Subcolumn subcolumn(0, true, false);
+    Field field = Field::create_field<TYPE_STRING>("John");
+    subcolumn.insert(field);
+
+    auto [sparse_column_keys, sparse_column_values] =
+            variant_ptr->get_sparse_data_paths_and_values();
+    auto& sparse_column_offsets = 
variant_ptr->serialized_sparse_column_offsets();
+    subcolumn.serialize_to_sparse_column(sparse_column_keys, "profile.age", 
sparse_column_values,
+                                         0);
+    subcolumn.serialize_to_sparse_column(sparse_column_keys, "profile.name", 
sparse_column_values,
+                                         0);
+    subcolumn.serialize_to_sparse_column(sparse_column_keys, "profile_id", 
sparse_column_values, 0);
+    sparse_column_offsets.push_back(sparse_column_keys->size());
+    variant_ptr->get_subcolumn({})->insert_default();
+    variant_ptr->set_num_rows(1);
+
+    ColumnPtr result;
+    ColumnPtr index_column_ptr = ColumnString::create();
+    auto* index_column_ptr_mutable =
+            
assert_cast<ColumnString*>(index_column_ptr->assume_mutable().get());
+    index_column_ptr_mutable->insert_data("profile", 7);
+    ColumnPtr index_column = ColumnConst::create(index_column_ptr, 1);
+    auto status =
+            FunctionVariantElement::get_element_column(*variant_column, 
index_column, &result);
+    EXPECT_TRUE(status.ok());
+
+    auto result_ptr = assert_cast<const ColumnVariant&>(*result.get());
+    std::string result_string;
+    result_ptr.serialize_one_row_to_string(0, &result_string);
+    EXPECT_EQ(result_string, "{\"age\":\"John\",\"name\":\"John\"}");
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/regression-test/data/variant_p0/variant_hirachinal.out 
b/regression-test/data/variant_p0/variant_hirachinal.out
index c23fce09221..24179544e53 100644
--- a/regression-test/data/variant_p0/variant_hirachinal.out
+++ b/regression-test/data/variant_p0/variant_hirachinal.out
@@ -56,3 +56,9 @@
 -- !sql --
 3      \N      {"a":1,"b":2,"c":3,"d":4}
 
+-- !sql --
+{}
+{"age":30,"name":"John"}
+{}
+{"age":30,"name":"John"}
+
diff --git a/regression-test/suites/variant_p0/variant_hirachinal.groovy 
b/regression-test/suites/variant_p0/variant_hirachinal.groovy
index cfc660255ce..8ec3fde007c 100644
--- a/regression-test/suites/variant_p0/variant_hirachinal.groovy
+++ b/regression-test/suites/variant_p0/variant_hirachinal.groovy
@@ -96,5 +96,19 @@ suite("regression_test_variant_hirachinal", "variant_type"){
     qt_sql """select * from t order by a;"""
     qt_sql """select * from t where v is null;"""
 
-
+    sql "DROP TABLE IF EXISTS ${table_name}"
+    sql """
+        CREATE TABLE ${table_name} (
+            `k` bigint NULL,
+            `v` variant<PROPERTIES ("variant_max_subcolumns_count" = "1")> NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`k`)
+        DISTRIBUTED BY HASH(`k`) BUCKETS 1
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    sql """insert into ${table_name} values (1, '{"a": 1}'), (2, '{"a" : 1, 
"profile" : {"name" : "John", "age" : 30}, "profile_id" : 123}');"""
+    sql """insert into ${table_name} values (3, '{"a": 1}'), (4, '{"a" : 1, 
"profile" : {"name" : "John", "age" : 30}, "profile2" : 123}'); """
+    qt_sql """select v['profile'] from ${table_name} order by k;"""
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to