This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8cb5a4210f7 [fix](variant) Disable strict mode for variant internal 
cast to fix INSERT INTO SELECT returning all NULLs (#60881)
8cb5a4210f7 is described below

commit 8cb5a4210f794abca49b9aa0355120877aba2f32
Author: lihangyu <[email protected]>
AuthorDate: Sat Feb 28 10:02:43 2026 +0800

    [fix](variant) Disable strict mode for variant internal cast to fix INSERT 
INTO SELECT returning all NULLs (#60881)
    
    ### Bug
    INSERT INTO SELECT with LEFT JOIN on CTE that casts variant subcolumns
    (e.g., cast(content['val'] as decimal)) returns all NULLs for the
    right-side
    CTE fields, while a direct SELECT produces correct results.
    
    ### Root cause
    In INSERT context, strict mode is enabled. When cast_from_variant_impl
    clones the FunctionContext, it inherits strict mode. The variant root
    column may
    contain null/empty JSONB entries for rows where the subcolumn doesn't
    exist
    (mixed-schema variant data). In strict mode, these null entries cause
    the ENTIRE
    cast to fail and return all NULLs.
    
    ### Fix
    Explicitly disable strict mode in the cloned FunctionContext inside
    cast_from_variant_impl, since this is an internal type conversion within
    variant
    processing, not user-provided INSERT data validation.
    
    ### Tests:
    - BE unit test: CastFromVariantStrictModeRegression (variant→Int32,
    string→Int32 with strict mode enabled)
    - Regression test: test_variant_cast_strict_mode (INSERT INTO SELECT
    with LEFT JOIN on CTE with variant subcolumn casts, verifying results
    match SELECT)
---
 be/src/vec/functions/cast/cast_to_variant.h        |   8 ++
 .../function/cast/function_variant_cast_test.cpp   | 148 +++++++++++++++++++++
 .../variant_p0/test_variant_cast_strict_mode.out   |  19 +++
 .../test_variant_cast_strict_mode.groovy           | 131 ++++++++++++++++++
 4 files changed, 306 insertions(+)

diff --git a/be/src/vec/functions/cast/cast_to_variant.h 
b/be/src/vec/functions/cast/cast_to_variant.h
index 92139e86e28..491a3f1a92b 100644
--- a/be/src/vec/functions/cast/cast_to_variant.h
+++ b/be/src/vec/functions/cast/cast_to_variant.h
@@ -57,6 +57,14 @@ inline Status cast_from_variant_impl(FunctionContext* 
context, Block& block,
         auto new_context = context == nullptr ? nullptr : context->clone();
         if (new_context != nullptr) {
             new_context->set_jsonb_string_as_string(true);
+            // Disable strict mode for the inner JSONB→target conversion.
+            // The variant root column may contain null/empty JSONB entries 
for rows
+            // where the subcolumn doesn't exist (e.g., mixed-schema variant 
data).
+            // In strict mode (INSERT context), these null entries cause the 
ENTIRE
+            // cast to fail and return all NULLs. Since this is an internal 
type
+            // conversion within variant, not user-provided INSERT data 
validation,
+            // strict mode should not apply here.
+            new_context->set_enable_strict_mode(false);
         }
         // dst type nullable has been removed, so we should remove the inner 
nullable of root column
         auto wrapper =
diff --git a/be/test/vec/function/cast/function_variant_cast_test.cpp 
b/be/test/vec/function/cast/function_variant_cast_test.cpp
index 203606c236d..acfcb9e255e 100644
--- a/be/test/vec/function/cast/function_variant_cast_test.cpp
+++ b/be/test/vec/function/cast/function_variant_cast_test.cpp
@@ -24,9 +24,11 @@
 #include "runtime/primitive_type.h"
 #include "runtime/runtime_state.h"
 #include "vec/columns/column_array.h"
+#include "vec/columns/column_decimal.h"
 #include "vec/columns/column_variant.h"
 #include "vec/core/field.h"
 #include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_decimal.h"
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_number.h"
 #include "vec/data_types/data_type_string.h"
@@ -465,4 +467,150 @@ TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) {
     }
 }
 
+// Regression test for JIRA-233:
+// INSERT INTO SELECT with variant→target cast returns all NULLs in strict 
mode.
+// The bug was that cast_from_variant_impl inherited strict mode from the 
INSERT context,
+// causing internal JSONB→target conversion to fail for null entries, which 
made the
+// entire cast return all NULLs.
+TEST(FunctionVariantCast, CastFromVariantStrictModeRegression) {
+    // Test: variant with nullable root → Int32 with strict mode enabled
+    // Before fix: strict mode causes all NULLs
+    // After fix: non-null entries produce correct values, null entries stay 
NULL
+    {
+        auto variant_type = std::make_shared<DataTypeVariant>();
+        auto int32_type = std::make_shared<DataTypeInt32>();
+        auto nullable_int32_type = 
std::make_shared<DataTypeNullable>(int32_type);
+
+        // Create variant column with nullable integer root (some null, some 
not)
+        auto variant_col = ColumnVariant::create(0);
+        variant_col->create_root(
+                nullable_int32_type,
+                ColumnNullable::create(ColumnInt32::create(), 
ColumnUInt8::create()));
+        MutableColumnPtr data = variant_col->get_root();
+
+        // Row 0: value 42
+        data->insert(Field::create_field<TYPE_INT>(42));
+        // Row 1: NULL (simulating a row where the variant subcolumn doesn't 
exist)
+        data->insert(Field::create_field<TYPE_NULL>(Null()));
+        // Row 2: value 100
+        data->insert(Field::create_field<TYPE_INT>(100));
+        // Row 3: NULL
+        data->insert(Field::create_field<TYPE_NULL>(Null()));
+        // Row 4: value -5
+        data->insert(Field::create_field<TYPE_INT>(-5));
+
+        variant_col->finalize();
+
+        ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), 
variant_type, "variant_col"},
+                                          {nullptr, int32_type, "int32_type"}};
+
+        auto function =
+                SimpleFunctionFactory::instance().get_function("CAST", 
arguments, int32_type);
+        ASSERT_NE(function, nullptr);
+
+        Block block {arguments};
+        size_t result_column = block.columns();
+        block.insert({nullptr, int32_type, "result"});
+
+        RuntimeState state;
+        auto ctx = FunctionContext::create_context(&state, {}, {});
+
+        // Enable strict mode to simulate INSERT context (this is the key!)
+        ctx->set_enable_strict_mode(true);
+
+        ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 
5).ok());
+
+        auto result_col = block.get_by_position(result_column).column;
+        ASSERT_NE(result_col.get(), nullptr);
+
+        // The result should be a nullable column
+        const auto* nullable_result = assert_cast<const 
ColumnNullable*>(result_col.get());
+        ASSERT_EQ(nullable_result->size(), 5);
+
+        const auto& result_data =
+                assert_cast<const 
ColumnInt32&>(nullable_result->get_nested_column());
+        const auto& null_map = nullable_result->get_null_map_data();
+
+        // Row 0: value 42, not null
+        ASSERT_EQ(null_map[0], 0);
+        ASSERT_EQ(result_data.get_element(0), 42);
+
+        // Row 1: NULL
+        ASSERT_EQ(null_map[1], 1);
+
+        // Row 2: value 100, not null
+        ASSERT_EQ(null_map[2], 0);
+        ASSERT_EQ(result_data.get_element(2), 100);
+
+        // Row 3: NULL
+        ASSERT_EQ(null_map[3], 1);
+
+        // Row 4: value -5, not null
+        ASSERT_EQ(null_map[4], 0);
+        ASSERT_EQ(result_data.get_element(4), -5);
+    }
+
+    // Test 2: variant with string root → Int32 with strict mode
+    // Simulates casting variant['field'] that stored as string "123" to Int32
+    {
+        auto variant_type = std::make_shared<DataTypeVariant>();
+        auto int32_type = std::make_shared<DataTypeInt32>();
+        auto nullable_string_type =
+                
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+
+        auto variant_col = ColumnVariant::create(0);
+        variant_col->create_root(
+                nullable_string_type,
+                ColumnNullable::create(ColumnString::create(), 
ColumnUInt8::create()));
+        MutableColumnPtr data = variant_col->get_root();
+
+        // Row 0: "42"
+        data->insert(Field::create_field<TYPE_STRING>(String("42")));
+        // Row 1: NULL
+        data->insert(Field::create_field<TYPE_NULL>(Null()));
+        // Row 2: "100"
+        data->insert(Field::create_field<TYPE_STRING>(String("100")));
+
+        variant_col->finalize();
+
+        ColumnsWithTypeAndName arguments {{variant_col->get_ptr(), 
variant_type, "variant_col"},
+                                          {nullptr, int32_type, "int32_type"}};
+
+        auto function =
+                SimpleFunctionFactory::instance().get_function("CAST", 
arguments, int32_type);
+        ASSERT_NE(function, nullptr);
+
+        Block block {arguments};
+        size_t result_column = block.columns();
+        block.insert({nullptr, int32_type, "result"});
+
+        RuntimeState state;
+        auto ctx = FunctionContext::create_context(&state, {}, {});
+
+        // Enable strict mode (INSERT context)
+        ctx->set_enable_strict_mode(true);
+
+        ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column, 
3).ok());
+
+        auto result_col = block.get_by_position(result_column).column;
+        ASSERT_NE(result_col.get(), nullptr);
+
+        const auto* nullable_result = assert_cast<const 
ColumnNullable*>(result_col.get());
+        ASSERT_EQ(nullable_result->size(), 3);
+
+        const auto& result_data =
+                assert_cast<const 
ColumnInt32&>(nullable_result->get_nested_column());
+        const auto& null_map = nullable_result->get_null_map_data();
+
+        // After fix: non-null entries should produce correct values
+        ASSERT_EQ(null_map[0], 0);
+        ASSERT_EQ(result_data.get_element(0), 42);
+
+        ASSERT_EQ(null_map[1], 1); // NULL stays NULL
+
+        ASSERT_EQ(null_map[2], 0);
+        ASSERT_EQ(result_data.get_element(2), 100);
+    }
+}
+
 } // namespace doris::vectorized
\ No newline at end of file
diff --git a/regression-test/data/variant_p0/test_variant_cast_strict_mode.out 
b/regression-test/data/variant_p0/test_variant_cast_strict_mode.out
new file mode 100644
index 00000000000..ddd52dd911a
--- /dev/null
+++ b/regression-test/data/variant_p0/test_variant_cast_strict_mode.out
@@ -0,0 +1,19 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+1      100.50  a
+2      200.75  b
+3      \N      c
+4      300.25  d
+
+-- !insert --
+1      100.50  a
+2      200.75  b
+3      \N      c
+4      300.25  d
+
+-- !direct_insert --
+1      100.50  a
+2      200.75  b
+3      \N      c
+4      300.25  d
+
diff --git 
a/regression-test/suites/variant_p0/test_variant_cast_strict_mode.groovy 
b/regression-test/suites/variant_p0/test_variant_cast_strict_mode.groovy
new file mode 100644
index 00000000000..4e5d88738e8
--- /dev/null
+++ b/regression-test/suites/variant_p0/test_variant_cast_strict_mode.groovy
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Regression test for JIRA-233:
+// INSERT INTO SELECT with LEFT JOIN on CTE that casts variant subcolumns
+// returns all NULLs for the right-side CTE fields, while SELECT returns 
correct results.
+//
+// Root cause: In INSERT context, strict mode is enabled. When 
cast_from_variant_impl
+// clones the FunctionContext, it inherits strict mode. The variant root 
column may have
+// null/empty JSONB entries for rows where the subcolumn doesn't exist. In 
strict mode,
+// these cause the entire cast to fail and return all NULLs.
+
+suite("test_variant_cast_strict_mode", "variant_type") {
+
+    sql """ set default_variant_enable_doc_mode = false """
+
+    def variant_src = "test_variant_cast_strict_mode_src"
+    def target_tbl = "test_variant_cast_strict_mode_target"
+
+    sql "DROP TABLE IF EXISTS ${variant_src}"
+    sql "DROP TABLE IF EXISTS ${target_tbl}"
+
+    sql """
+        CREATE TABLE ${variant_src} (
+            `id` int NOT NULL,
+            `content` variant NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        )
+    """
+
+    // Insert rows: some have "val" field, some don't (mixed schema)
+    sql """
+        INSERT INTO ${variant_src} VALUES
+        (1, '{"val":"100.50", "name":"a"}'),
+        (2, '{"val":"200.75", "name":"b"}'),
+        (3, '{"name":"c"}'),
+        (4, '{"val":"300.25", "name":"d"}')
+    """
+
+    sql """
+        CREATE TABLE ${target_tbl} (
+            `id` int NULL,
+            `val_decimal` decimal(20,2) NULL,
+            `name_str` varchar(100) NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        )
+    """
+
+    // Step 1: Verify SELECT with variant cast returns correct results
+    qt_select """
+        WITH cte AS (
+            SELECT
+                id,
+                cast(content['val'] AS decimal(20,2)) AS val_decimal,
+                cast(content['name'] AS varchar) AS name_str
+            FROM ${variant_src}
+        )
+        SELECT * FROM cte ORDER BY id
+    """
+
+    // Step 2: INSERT INTO SELECT with LEFT JOIN pattern (mirrors the original 
issue)
+    // The LEFT JOIN pattern is key: v1 LEFT JOIN v3
+    // v3 contains the variant->decimal cast
+    sql """
+        INSERT INTO ${target_tbl}
+        WITH
+            v1 AS (
+                SELECT id FROM ${variant_src}
+            ),
+            v3 AS (
+                SELECT
+                    id,
+                    cast(content['val'] AS decimal(20,2)) AS val_decimal,
+                    cast(content['name'] AS varchar) AS name_str
+                FROM ${variant_src}
+            )
+        SELECT
+            v1.id,
+            v3.val_decimal,
+            v3.name_str
+        FROM v1
+        LEFT JOIN v3 ON v3.id = v1.id
+    """
+
+    // Step 3: Verify inserted results match SELECT results
+    // Before fix: val_decimal and name_str were all NULL
+    // After fix: should match the SELECT results
+    qt_insert """
+        SELECT * FROM ${target_tbl} ORDER BY id
+    """
+
+    // Step 4: Test direct INSERT INTO SELECT without LEFT JOIN (simpler case)
+    sql "TRUNCATE TABLE ${target_tbl}"
+    sql """
+        INSERT INTO ${target_tbl}
+        SELECT
+            id,
+            cast(content['val'] AS decimal(20,2)) AS val_decimal,
+            cast(content['name'] AS varchar) AS name_str
+        FROM ${variant_src}
+    """
+    qt_direct_insert """
+        SELECT * FROM ${target_tbl} ORDER BY id
+    """
+
+    // Cleanup
+    sql "DROP TABLE IF EXISTS ${variant_src}"
+    sql "DROP TABLE IF EXISTS ${target_tbl}"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to