This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 4f6a7338379 [branch-4.1](variant) return raw string for element_at on
scalar-string variant (#64103) (#64424)
4f6a7338379 is described below
commit 4f6a733837920638ad8b9fa542ff324a5043b690
Author: Chenyang Sun <[email protected]>
AuthorDate: Tue Jun 16 15:03:04 2026 +0800
[branch-4.1](variant) return raw string for element_at on scalar-string
variant (#64103) (#64424)
cherry picked from #64103
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
be/src/exprs/function/function_variant_element.cpp | 9 +++++
.../function/function_variant_element_test.cpp | 40 +++++++++++++++++++++-
.../data/variant_p0/sql/select_from_value.out | 2 +-
.../suites/variant_p0/element_function.groovy | 28 +++++++++++++++
4 files changed, 77 insertions(+), 2 deletions(-)
diff --git a/be/src/exprs/function/function_variant_element.cpp
b/be/src/exprs/function/function_variant_element.cpp
index ad6240a97e3..88d9aecbe0b 100644
--- a/be/src/exprs/function/function_variant_element.cpp
+++ b/be/src/exprs/function/function_variant_element.cpp
@@ -389,6 +389,15 @@ private:
}
break;
}
+ case simdjson::ondemand::json_type::string: {
+ // Extract the raw (unescaped) string value rather than its JSON
+ // representation. simdjson::to_json_string would keep the
surrounding
+ // double quotes (e.g. "2026-05-20"), which leaks into the result
and
+ // makes scalar-string variants inconsistent with structured ones.
+ std::string_view value_str = value.get_string().value();
+ column->insert_data(value_str.data(), value_str.length());
+ break;
+ }
default: {
auto value_str = simdjson::to_json_string(value).value();
column->insert_data(value_str.data(), value_str.length());
diff --git a/be/test/exprs/function/function_variant_element_test.cpp
b/be/test/exprs/function/function_variant_element_test.cpp
index c16e2844ad9..3da210c0039 100644
--- a/be/test/exprs/function/function_variant_element_test.cpp
+++ b/be/test/exprs/function/function_variant_element_test.cpp
@@ -61,4 +61,42 @@ TEST(function_variant_element_test,
extract_from_sparse_column) {
EXPECT_EQ(result_string, "{\"age\":\"John\",\"name\":\"John\"}");
}
-} // namespace doris
\ No newline at end of file
+// CIR-20498: extracting a string property from a scalar-string-root variant
+// (the shape produced by `cast(text as variant)`) must return the raw string,
+// not its JSON token with surrounding double quotes.
+TEST(function_variant_element_test, extract_string_from_scalar_root) {
+ auto variant_column = ColumnVariant::create(0 /*max_subcolumns_count*/,
false);
+ auto root_column = ColumnString::create();
+ std::string doc = R"({"wsn":"SRFSPXFDVY","uploadTimeValue":"2026-05-20
18:40:02","n":49.98})";
+ root_column->insert_data(doc.data(), doc.size());
+ variant_column->create_root(std::make_shared<DataTypeString>(),
std::move(root_column));
+ variant_column->set_num_rows(1);
+ ASSERT_TRUE(variant_column->is_scalar_variant());
+
+ DataTypeSerDe::FormatOptions options;
+ auto tz = cctz::utc_time_zone();
+ options.timezone = &tz;
+
+ auto extract = [&](const std::string& key) {
+ ColumnPtr index_inner = ColumnString::create();
+ assert_cast<ColumnString*>(index_inner->assume_mutable().get())
+ ->insert_data(key.data(), key.size());
+ ColumnPtr index_column = ColumnConst::create(index_inner, 1);
+ ColumnPtr result;
+ auto status =
+ FunctionVariantElement::get_element_column(*variant_column,
index_column, &result);
+ EXPECT_TRUE(status.ok());
+ std::string out;
+ assert_cast<const ColumnVariant&>(*result.get())
+ .serialize_one_row_to_string(0, &out, options);
+ return out;
+ };
+
+ // string values: no surrounding quotes
+ EXPECT_EQ(extract("wsn"), "SRFSPXFDVY");
+ EXPECT_EQ(extract("uploadTimeValue"), "2026-05-20 18:40:02");
+ // non-string scalars keep their JSON representation
+ EXPECT_EQ(extract("n"), "49.98");
+}
+
+} // namespace doris
diff --git a/regression-test/data/variant_p0/sql/select_from_value.out
b/regression-test/data/variant_p0/sql/select_from_value.out
index ef562a658e9..1fe3c49651b 100644
--- a/regression-test/data/variant_p0/sql/select_from_value.out
+++ b/regression-test/data/variant_p0/sql/select_from_value.out
@@ -1,4 +1,4 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !select_from_value --
-"b"
+b
diff --git a/regression-test/suites/variant_p0/element_function.groovy
b/regression-test/suites/variant_p0/element_function.groovy
index 7b5e55ea53b..fb183db9690 100644
--- a/regression-test/suites/variant_p0/element_function.groovy
+++ b/regression-test/suites/variant_p0/element_function.groovy
@@ -29,4 +29,32 @@ suite("regression_test_variant_element_at", "p0") {
sql """insert into element_fn_test values (1, '{"arr1" : [1, 2, 3]}',
'{"arr2" : [4, 5, 6]}')"""
qt_sql """select array_first((x,y) -> (x - y) < 0, cast(v['arr1'] as
array<int>), cast(v1['arr2'] as array<int>)) from element_fn_test"""
+
+ // CIR-20498: extracting a string property from a scalar-string variant
+ // (e.g. `cast(text as variant)['key']`) must not leak the surrounding JSON
+ // double quotes. The root of such a variant is a raw JSON string, so the
+ // extraction goes through the simdjson document path; a string value must
be
+ // returned unescaped, consistently with the structured-subcolumn path.
+ def scalar = sql """select
cast('{"wsn":"SRFSPXFDVY","uploadTimeValue":"2026-05-20 18:40:02"}' as
variant)['wsn']"""
+ assertEquals("SRFSPXFDVY", scalar[0][0])
+
+ def sub = sql """select substring(cast('{"uploadTimeValue":"2026-05-20
18:40:02"}' as variant)['uploadTimeValue'], 1, 10)"""
+ assertEquals("2026-05-20", sub[0][0])
+
+ // values containing escaped characters must be unescaped, not kept as raw
JSON tokens
+ def escaped = sql """select cast('{"k":"a\\\\"b"}' as variant)['k']"""
+ assertEquals("a\"b", escaped[0][0])
+
+ // non-string scalars keep their existing JSON representation
+ def num = sql """select cast('{"n":49.98}' as variant)['n']"""
+ assertEquals("49.98", num[0][0])
+
+ // array / object values must keep their JSON text representation (no
unquoting):
+ // only the top-level string scalar is unquoted; quotes nested inside JSON
are
+ // part of the value and must be preserved.
+ def arr = sql """select cast('{"a":[1,2,3]}' as variant)['a']"""
+ assertEquals("[1,2,3]", arr[0][0])
+
+ def obj = sql """select cast('{"o":{"name":"john"}}' as variant)['o']"""
+ assertEquals('{"name":"john"}', obj[0][0])
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]