This is an automated email from the ASF dual-hosted git repository.

mrhhsg pushed a commit to branch fix-json-strict-extract
in repository https://gitbox.apache.org/repos/asf/doris.git

commit ab4c47c9aa8ca22563434743b7dbd9d2a194fd88
Author: Hu Shenggang <[email protected]>
AuthorDate: Sat May 16 22:41:45 2026 +0800

    [fix](json) Address strict JSON review comments
    
    ### What problem does this PR solve?
    
    Issue Number: None
    
    Related PR: #63309
    
    Problem Summary: Avoid the extra raw_json pre-pass in strict JSONB parsing, 
and keep const JSON plus const path extract results sized to the input block.
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test: Unit Test
        - ./run-be-ut.sh --run 
--filter=JsonBinaryValueTest.TestValidation:FunctionJsonbTEST.JsonbParseTest:FunctionJsonbTEST.JsonbParseErrorToNullTest:FunctionJsonbTEST.JsonValidStrictTest:FunctionJsonbTEST.JsonExtractConstConstMultiRow
        - build-support/check-format.sh
        - build-support/run-clang-tidy.sh --build-dir be/ut_build_ASAN (failed 
due existing complexity/NOLINT diagnostics and local toolchain stddef.h lookup)
    - Behavior changed: Yes. Const JSON plus const path non-string JSONB 
extract returns a correctly sized repeated result.
    - Does this need documentation: No
---
 be/src/exprs/function/function_jsonb.cpp       | 14 +++++++
 be/src/util/jsonb_parser_simd.h                | 23 -----------
 be/test/exprs/function/function_jsonb_test.cpp | 55 +++++++++++++++++++++++++-
 3 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/be/src/exprs/function/function_jsonb.cpp 
b/be/src/exprs/function/function_jsonb.cpp
index 40c10cb4e61..ce1cbfc3c37 100644
--- a/be/src/exprs/function/function_jsonb.cpp
+++ b/be/src/exprs/function/function_jsonb.cpp
@@ -450,6 +450,20 @@ public:
                     return create_all_null_result();
                 }
 
+                if (path_const[0]) {
+                    auto const_null_map = ColumnUInt8::create(1, 0);
+                    auto const_res = Impl::ColumnType::create();
+                    RETURN_IF_ERROR(Impl::scalar_vector(
+                            context, jsonb_data_column->get_data_at(0), rdata, 
roffsets,
+                            path_null_maps[0], const_res->get_data(), 
const_null_map->get_data()));
+                    DCHECK_EQ(const_res->size(), 1);
+                    auto nullable_column =
+                            ColumnNullable::create(std::move(const_res), 
std::move(const_null_map));
+                    block.get_by_position(result).column =
+                            ColumnConst::create(std::move(nullable_column), 
input_rows_count);
+                    return Status::OK();
+                }
+
                 RETURN_IF_ERROR(Impl::scalar_vector(context, 
jsonb_data_column->get_data_at(0),
                                                     rdata, roffsets, 
path_null_maps[0],
                                                     res->get_data(), 
null_map->get_data()));
diff --git a/be/src/util/jsonb_parser_simd.h b/be/src/util/jsonb_parser_simd.h
index 7632eb45fba..202ecb8d8b0 100644
--- a/be/src/util/jsonb_parser_simd.h
+++ b/be/src/util/jsonb_parser_simd.h
@@ -93,29 +93,6 @@ struct JsonbParser {
             simdjson::padded_string json_str {pch, len};
             simdjson::ondemand::document doc = 
simdjson_parser.iterate(json_str);
 
-            auto is_json_whitespace = [](char c) {
-                return c == ' ' || c == '\t' || c == '\n' || c == '\r';
-            };
-            const char* json_begin = json_str.data();
-            const char* json_end = json_str.data() + len;
-            while (json_begin < json_end && is_json_whitespace(*json_begin)) {
-                ++json_begin;
-            }
-            while (json_end > json_begin && is_json_whitespace(*(json_end - 
1))) {
-                --json_end;
-            }
-
-            std::string_view raw_json;
-            simdjson::error_code raw_res = doc.raw_json().get(raw_json);
-            if (raw_res != simdjson::SUCCESS) {
-                return Status::InvalidArgument(fmt::format("simdjson raw_json 
failed: {}",
-                                                           
simdjson::error_message(raw_res)));
-            }
-            if (raw_json.data() != json_begin || raw_json.data() + 
raw_json.size() != json_end) {
-                return Status::InvalidArgument("simdjson parse exception: 
trailing content");
-            }
-            doc.rewind();
-
             // simdjson process top level primitive types specially
             // so some repeated code here
             switch (doc.type()) {
diff --git a/be/test/exprs/function/function_jsonb_test.cpp 
b/be/test/exprs/function/function_jsonb_test.cpp
index 7db6b9febbd..9d7b96f02e4 100644
--- a/be/test/exprs/function/function_jsonb_test.cpp
+++ b/be/test/exprs/function/function_jsonb_test.cpp
@@ -16,8 +16,8 @@
 // under the License.
 
 #include <gtest/gtest.h>
-#include <stdint.h>
 
+#include <cstdint>
 #include <memory>
 #include <string>
 
@@ -384,6 +384,59 @@ TEST(FunctionJsonbTEST, JsonExtractCheckArg) {
     ASSERT_EQ(st.code(), ErrorCode::INVALID_ARGUMENT);
 }
 
+TEST(FunctionJsonbTEST, JsonExtractConstConstMultiRow) {
+    constexpr size_t input_rows_count = 3;
+    auto json_data_type = std::make_shared<DataTypeJsonb>();
+    auto path_data_type = std::make_shared<DataTypeString>();
+    auto return_type = make_nullable(std::make_shared<DataTypeUInt8>());
+
+    JsonbWriter writer;
+    ASSERT_TRUE(writer.writeStartObject());
+    ASSERT_TRUE(writer.writeKey("a"));
+    ASSERT_TRUE(writer.writeNull());
+    ASSERT_TRUE(writer.writeEndObject());
+
+    auto json_column = json_data_type->create_column();
+    json_column->insert_data(writer.getOutput()->getBuffer(), 
writer.getOutput()->getSize());
+
+    auto path_column = path_data_type->create_column();
+    path_column->insert_data("$.a", 3);
+
+    Block block;
+    block.insert({ColumnConst::create(std::move(json_column), 
input_rows_count), json_data_type,
+                  "json_col"});
+    block.insert({ColumnConst::create(std::move(path_column), 
input_rows_count), path_data_type,
+                  "path_col"});
+
+    FunctionBasePtr func = SimpleFunctionFactory::instance().get_function(
+            "json_extract_isnull", block.get_columns_with_type_and_name(), 
return_type);
+    ASSERT_TRUE(func != nullptr);
+
+    FunctionUtils fn_utils(return_type, {json_data_type, path_data_type}, 0);
+    auto* fn_ctx = fn_utils.get_fn_ctx();
+    auto st = func->open(fn_ctx, FunctionContext::FRAGMENT_LOCAL);
+    ASSERT_TRUE(st.ok()) << "open failed: " << st.to_string();
+    st = func->open(fn_ctx, FunctionContext::THREAD_LOCAL);
+    ASSERT_TRUE(st.ok()) << "open failed: " << st.to_string();
+
+    block.insert({nullptr, return_type, "result"});
+    auto result = block.columns() - 1;
+    st = func->execute(fn_ctx, block, {0, 1}, result, input_rows_count);
+    ASSERT_TRUE(st.ok()) << "execute failed: " << st.to_string();
+
+    auto result_column = 
block.get_by_position(result).column->convert_to_full_column_if_const();
+    ASSERT_EQ(result_column->size(), input_rows_count);
+    const auto& result_nullable = assert_cast<const 
ColumnNullable&>(*result_column);
+    const auto& result_data = assert_cast<const 
ColumnUInt8&>(result_nullable.get_nested_column());
+    for (size_t i = 0; i < input_rows_count; ++i) {
+        EXPECT_FALSE(result_nullable.is_null_at(i));
+        EXPECT_EQ(result_data.get_data()[i], 1);
+    }
+
+    static_cast<void>(func->close(fn_ctx, FunctionContext::THREAD_LOCAL));
+    static_cast<void>(func->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL));
+}
+
 TEST(FunctionJsonbTEST, JsonParseCheckArg) {
     ColumnsWithTypeAndName args;
     args.emplace_back(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to