This is an automated email from the ASF dual-hosted git repository.

morrySnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new fa64a2eb0f0 [fix](nereids) Guard LogicalView.computeOutput() against 
schema drift (IndexOutOfBoundsException) (#64007)
fa64a2eb0f0 is described below

commit fa64a2eb0f0c05598ef82358a514764eb84bdaae
Author: zhaorongsheng <[email protected]>
AuthorDate: Tue Jun 9 13:55:17 2026 +0800

    [fix](nereids) Guard LogicalView.computeOutput() against schema drift 
(IndexOutOfBoundsException) (#64007)
    
    ## Problem
    
    When querying a Doris view built with `SELECT *` over an
    external-catalog table (e.g. Hive), after the underlying table gains new
    columns via `ALTER TABLE ADD COLUMNS` + `REFRESH TABLE <base_table>`,
    the query crashes with:
    
    ```
    errCode = 2, detailMessage = Index 3 out of bounds for length 3
    ```
    
    **Root cause** (`LogicalView.computeOutput()`):
    
    The view body is re-analyzed against the refreshed base-table schema (4
    columns), producing `childOutput` with 4 slots. But
    `view.getFullSchema()` still returns 3 columns (the schema stored at
    view-creation time in the Hive metastore – not yet refreshed). The loop
    runs `i = 0..3` and calls `view.getFullSchema().get(3)`, crashing.
    
    The `CollectionUtils.isEmpty()` guard added in #40715 handles the
    `null`/empty case but not the under-sized case.
    
    ## Fix
    
    Promote `view.getFullSchema()` to a local variable to avoid repeated
    calls, then extend the guard to `i >= fullSchema.size()`:
    
    ```java
    List<Column> fullSchema = view.getFullSchema();
    if (CollectionUtils.isEmpty(fullSchema) || i >= fullSchema.size()) {
        qualified = originSlot.withQualifier(fullQualifiers);   // same as 
existing null-guard path
    } else {
        qualified = originSlot
                .withOneLevelTableAndColumnAndQualifier(view, 
fullSchema.get(i), fullQualifiers);
    }
    ```
    
    Extra slots (columns added after view creation) fall back to
    `withQualifier()`, preserving the correct column name/type from the
    child slot (which is already resolved against the refreshed catalog). No
    behavioral change for non-drifted views.
    
    ## Files Changed
    
    | File | Change |
    |------|--------|
    | 
`fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java`
 | Fix + import `Column` |
    | 
`regression-test/suites/external_table_p0/hive/test_hive_view_schema_drift.groovy`
 | New regression test |
    
    ## Issue
    
    Fixes: #64006
    
    ---------
    
    Signed-off-by: zhaorongsheng <[email protected]>
    Co-authored-by: zhaorongsheng <[email protected]>
---
 .../nereids/trees/plans/logical/LogicalView.java   |  19 +-
 .../trees/plans/logical/LogicalViewTest.java       | 245 +++++++++++++++++++++
 .../hive/test_hive_view_schema_drift.groovy        | 115 ++++++++++
 3 files changed, 374 insertions(+), 5 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java
index a7dd046cb36..a42d75dc26d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.nereids.trees.plans.logical;
 
+import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.ViewIf;
 import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.memo.GroupExpression;
@@ -121,16 +122,24 @@ public class LogicalView<BODY extends Plan> extends 
LogicalUnary<BODY> {
         List<Slot> childOutput = child().getOutput();
         ImmutableList.Builder<Slot> currentOutput = ImmutableList.builder();
         List<String> fullQualifiers = this.view.getFullQualifiers();
-        for (int i = 0; i < childOutput.size(); i++) {
+        List<Column> fullSchema = view.getFullSchema();
+        // ATTN: because bug intro by #40715, after replace view, full schema 
will be empty or null.
+        //   So, we must guard here to avoid NPE or out of bound exception.
+        // When fullSchema is present it defines the view's output contract 
(built from
+        //   CreateViewInfo.finalCols). Use Math.min() as the loop bound to 
handle both
+        //   directions of schema drift without leaking undeclared columns:
+        //   - child has MORE slots (base table gained columns after view 
creation): truncate
+        //   - child has FEWER slots (base table lost columns): stop at child 
size
+        boolean hasSchema = !CollectionUtils.isEmpty(fullSchema);
+        int limit = hasSchema ? Math.min(childOutput.size(), 
fullSchema.size()) : childOutput.size();
+        for (int i = 0; i < limit; i++) {
             Slot originSlot = childOutput.get(i);
             Slot qualified;
-            // ATTN: because bug intro by #40715, after replace view, full 
schema will be empty or null.
-            //   So, we must just here to avoid NPE or out of bound exception.
-            if (CollectionUtils.isEmpty(view.getFullSchema())) {
+            if (!hasSchema) {
                 qualified = originSlot.withQualifier(fullQualifiers);
             } else {
                 qualified = originSlot
-                        .withOneLevelTableAndColumnAndQualifier(view, 
view.getFullSchema().get(i), fullQualifiers);
+                        .withOneLevelTableAndColumnAndQualifier(view, 
fullSchema.get(i), fullQualifiers);
             }
             currentOutput.add(qualified);
         }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/logical/LogicalViewTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/logical/LogicalViewTest.java
new file mode 100644
index 00000000000..210a34be65b
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/logical/LogicalViewTest.java
@@ -0,0 +1,245 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.plans.logical;
+
+import org.apache.doris.catalog.AggregateType;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Type;
+import org.apache.doris.catalog.ViewIf;
+import org.apache.doris.nereids.trees.expressions.Slot;
+
+import com.google.common.collect.ImmutableList;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;
+
+import java.lang.reflect.Proxy;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Unit tests for {@link LogicalView#computeOutput()}.
+ *
+ * <p>Focuses on the schema-drift scenario: when the underlying external table 
(e.g. Hive)
+ * gains new columns after the view was created, {@code view.getFullSchema()} 
is smaller
+ * than {@code child().getOutput()} and the loop used to crash with
+ * {@code IndexOutOfBoundsException}.
+ */
+public class LogicalViewTest {
+
+    // -----------------------------------------------------------------------
+    // Helpers
+    // -----------------------------------------------------------------------
+
+    private static Slot slot(String name) {
+        Slot slot = Mockito.mock(Slot.class);
+        Mockito.when(slot.getName()).thenReturn(name);
+        Mockito.when(slot.withQualifier(Mockito.anyList())).thenReturn(slot);
+        Mockito.when(slot.withOneLevelTableAndColumnAndQualifier(
+                Mockito.any(), Mockito.any(), 
Mockito.anyList())).thenReturn(slot);
+        return slot;
+    }
+
+    private static Column col(String name) {
+        return new Column(name, Type.STRING, false, AggregateType.NONE, "", 
"");
+    }
+
+    /**
+     * Build a minimal ViewIf stub via JDK Proxy (avoids ByteBuddy/Mockito
+     * which triggers TableIf → ColumnStatistic class-loading issues in unit 
tests).
+     */
+    @SuppressWarnings("unchecked")
+    private static ViewIf mockView(List<Column> schema, List<String> 
qualifiers) {
+        return (ViewIf) Proxy.newProxyInstance(
+                ViewIf.class.getClassLoader(),
+                new Class<?>[] {ViewIf.class},
+                (proxy, method, args) -> {
+                    switch (method.getName()) {
+                        case "getFullSchema": return schema;
+                        case "getFullQualifiers": return qualifiers;
+                        case "getViewText": return "";
+                        default: return null;
+                    }
+                });
+    }
+
+    /**
+     * Build a mock LogicalPlan child whose {@code getOutput()} returns the 
given slots.
+     * Also stubs {@code getAllChildrenTypes()} so AbstractTreeNode's 
constructor does not
+     * crash with NPE when calling {@code containsTypes.or(childTypes)}.
+     */
+    private static LogicalPlan mockChild(List<Slot> slots) {
+        LogicalPlan child = Mockito.mock(LogicalPlan.class);
+        Mockito.when(child.getOutput()).thenReturn(slots);
+        Mockito.when(child.getAllChildrenTypes()).thenReturn(new BitSet());
+        return child;
+    }
+
+    // -----------------------------------------------------------------------
+    // Tests
+    // -----------------------------------------------------------------------
+
+    /**
+     * Normal case: child output size == fullSchema size.
+     * All slots should be decorated with their corresponding stored column.
+     */
+    @Test
+    public void testComputeOutputNormalCase() {
+        List<Column> schema = ImmutableList.of(col("id"), col("name"), 
col("age"));
+        List<Slot> childSlots = ImmutableList.of(slot("id"), slot("name"), 
slot("age"));
+
+        ViewIf view = mockView(schema, ImmutableList.of("hive", "test", "v"));
+        LogicalView<LogicalPlan> logicalView = new LogicalView<>(view, 
mockChild(childSlots));
+
+        List<Slot> output = logicalView.computeOutput();
+
+        Assertions.assertEquals(3, output.size(), "Output size should match 
schema size");
+        Assertions.assertEquals("id", output.get(0).getName());
+        Assertions.assertEquals("name", output.get(1).getName());
+        Assertions.assertEquals("age", output.get(2).getName());
+    }
+
+    /**
+     * Schema-drift case: child output has MORE slots than fullSchema.
+     *
+     * <p>This is the regression test for the IndexOutOfBoundsException bug:
+     * after {@code ALTER TABLE ADD COLUMNS} + {@code REFRESH TABLE} on the 
base table,
+     * {@code childOutput.size()} grows beyond {@code 
view.getFullSchema().size()}.
+     *
+     * <p>The fix truncates output to the view's declared schema width so that:
+     * (1) no IndexOutOfBoundsException is thrown, and
+     * (2) the view's output contract (as seen in {@code DESC view}) is 
preserved.
+     */
+    @Test
+    public void testComputeOutputSchemaDrift_moreColumnsThanSchema() {
+        // View was created with 3-column base table → fullSchema has 3 entries
+        List<Column> schema = ImmutableList.of(col("id"), col("name"), 
col("age"));
+        // After ADD COLUMN + REFRESH TABLE, the child produces 4 slots
+        List<Slot> childSlots = ImmutableList.of(slot("id"), slot("name"), 
slot("age"), slot("score"));
+
+        ViewIf view = mockView(schema, ImmutableList.of("hive", "test", "v"));
+        LogicalView<LogicalPlan> logicalView = new LogicalView<>(view, 
mockChild(childSlots));
+
+        // Must NOT throw IndexOutOfBoundsException
+        List<Slot> output = 
Assertions.assertDoesNotThrow(logicalView::computeOutput,
+                "computeOutput() must not throw when fullSchema is shorter 
than childOutput");
+
+        // Output is truncated to the view's declared schema width (3), not 
the child width (4).
+        // The new 'score' column is not visible until the view itself is 
refreshed.
+        Assertions.assertEquals(3, output.size(),
+                "Output size should be truncated to fullSchema size to 
preserve view contract");
+        Assertions.assertEquals("id", output.get(0).getName());
+        Assertions.assertEquals("name", output.get(1).getName());
+        Assertions.assertEquals("age", output.get(2).getName());
+    }
+
+    /**
+     * Schema-drift case: child output has FEWER slots than fullSchema.
+     *
+     * <p>Defensive check: if a column was somehow removed from the base table
+     * while the view schema is wider, we must not access a nonexistent child 
slot.
+     * The output should simply reflect however many columns the child returns.
+     */
+    @Test
+    public void testComputeOutputSchemaDrift_fewerColumnsThanSchema() {
+        // fullSchema wider than actual child (defensive scenario)
+        List<Column> schema = ImmutableList.of(col("id"), col("name"), 
col("age"), col("score"));
+        List<Slot> childSlots = ImmutableList.of(slot("id"), slot("name"));
+
+        ViewIf view = mockView(schema, ImmutableList.of("hive", "test", "v"));
+        LogicalView<LogicalPlan> logicalView = new LogicalView<>(view, 
mockChild(childSlots));
+
+        List<Slot> output = logicalView.computeOutput();
+
+        Assertions.assertEquals(2, output.size(),
+                "Output size should equal child output size (loop bound)");
+        Assertions.assertEquals("id", output.get(0).getName());
+        Assertions.assertEquals("name", output.get(1).getName());
+    }
+
+    /**
+     * Empty schema case (guard introduced by #40715): fullSchema is empty.
+     * All slots must fall back to withQualifier().
+     */
+    @Test
+    public void testComputeOutputEmptySchema() {
+        List<Slot> childSlots = ImmutableList.of(slot("id"), slot("name"));
+
+        ViewIf view = mockView(Collections.emptyList(), 
ImmutableList.of("hive", "test", "v"));
+        LogicalView<LogicalPlan> logicalView = new LogicalView<>(view, 
mockChild(childSlots));
+
+        List<Slot> output = 
Assertions.assertDoesNotThrow(logicalView::computeOutput,
+                "computeOutput() must not throw for empty fullSchema");
+
+        Assertions.assertEquals(2, output.size());
+        Assertions.assertEquals("id", output.get(0).getName());
+        Assertions.assertEquals("name", output.get(1).getName());
+    }
+
+    /**
+     * Null schema case (guard introduced by #40715): getFullSchema() returns 
null.
+     * All slots must fall back to withQualifier().
+     */
+    @Test
+    public void testComputeOutputNullSchema() {
+        List<Slot> childSlots = ImmutableList.of(slot("id"), slot("name"));
+
+        ViewIf view = mockView(null, ImmutableList.of("hive", "test", "v"));
+        LogicalView<LogicalPlan> logicalView = new LogicalView<>(view, 
mockChild(childSlots));
+
+        List<Slot> output = 
Assertions.assertDoesNotThrow(logicalView::computeOutput,
+                "computeOutput() must not throw for null fullSchema");
+
+        Assertions.assertEquals(2, output.size());
+    }
+
+    /**
+     * Single new column added (minimal drift). Verifies the exact boundary 
condition
+     * at index {@code fullSchema.size()}: output is truncated to schema width 
(1).
+     */
+    @Test
+    public void testComputeOutputSchemaDrift_singleColumnAdded() {
+        // 1-column schema, child returns 2 slots after schema drift
+        List<Column> schema = ImmutableList.of(col("id"));
+        List<Slot> childSlots = ImmutableList.of(slot("id"), slot("extra"));
+
+        ViewIf view = mockView(schema, ImmutableList.of("hive", "test", "v"));
+        LogicalView<LogicalPlan> logicalView = new LogicalView<>(view, 
mockChild(childSlots));
+
+        List<Slot> output = 
Assertions.assertDoesNotThrow(logicalView::computeOutput);
+
+        // Truncated to schema width; the new 'extra' column is not exposed.
+        Assertions.assertEquals(1, output.size());
+        Assertions.assertEquals("id", output.get(0).getName());
+    }
+
+    /**
+     * Child returns no output (empty relation). Must return an empty list and 
not crash.
+     */
+    @Test
+    public void testComputeOutputEmptyChild() {
+        List<Column> schema = ImmutableList.of(col("id"), col("name"));
+        ViewIf view = mockView(schema, ImmutableList.of("hive", "test", "v"));
+        LogicalView<LogicalPlan> logicalView = new LogicalView<>(view, 
mockChild(ImmutableList.of()));
+
+        List<Slot> output = logicalView.computeOutput();
+
+        Assertions.assertTrue(output.isEmpty());
+    }
+}
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_view_schema_drift.groovy
 
b/regression-test/suites/external_table_p0/hive/test_hive_view_schema_drift.groovy
new file mode 100644
index 00000000000..2232143b245
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/hive/test_hive_view_schema_drift.groovy
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Regression test for: LogicalView.computeOutput() IndexOutOfBoundsException 
when
+// an underlying Hive table gains new columns (schema drift) after the Hive 
view was created.
+//
+// Repro:
+//   1. Create a Hive base table (3 cols) and a native Hive VIEW on it.
+//   2. In Doris, register the Hive catalog and query the external view — OK 
(3 cols).
+//   3. ADD COLUMN to the Hive base table via hive_docker.
+//   4. REFRESH TABLE <base_table> in Doris (view HMS schema NOT refreshed).
+//   5. Query the external view again — used to crash:
+//        errCode = 2, detailMessage = Index 3 out of bounds for length 3
+//      because LogicalView.computeOutput() iterated childOutput (4 slots from 
the
+//      re-analyzed view body) but called view.getFullSchema().get(i) on a 
3-element
+//      list (the Hive view's HMS schema at creation time).
+//
+// The fix: use Math.min(childOutput.size(), fullSchema.size()) as the loop 
bound,
+// preserving the view's declared output contract while preventing the crash.
+
+suite("test_hive_view_schema_drift", "p0,external,hive_docker") {
+
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("disable Hive test.")
+        return;
+    }
+
+    for (String hivePrefix : ["hive2", "hive3"]) {
+        setHivePrefix(hivePrefix)
+        String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+        String catalog_name = "test_${hivePrefix}_view_schema_drift"
+        String db = "test_view_schema_drift_db"
+        String base_table = "test_view_schema_drift_base"
+        String hive_view = "test_view_schema_drift_view"
+
+        try {
+            // ---- Register Hive catalog in Doris ----
+            sql """drop catalog if exists ${catalog_name}"""
+            sql """CREATE CATALOG ${catalog_name} PROPERTIES (
+                'type'='hms',
+                'hive.metastore.uris' = 
'thrift://${externalEnvIp}:${hms_port}',
+                'hadoop.username' = 'hive'
+            )"""
+
+            // ---- Create Hive database, base table (3 cols), and a native 
Hive VIEW ----
+            // The view is created through hive_docker so it is a native Hive 
view
+            // (ExternalView in Doris). Its HMS schema records exactly 3 
columns.
+            hive_docker """drop database if exists ${db} cascade"""
+            hive_docker """create database ${db}"""
+            hive_docker """
+                create table ${db}.${base_table} (
+                    id     bigint,
+                    name   string,
+                    age    string
+                )
+                partitioned by (dt string)
+                stored as parquet
+            """
+            hive_docker """
+                create view ${db}.${hive_view} as
+                    select id, name, age from ${db}.${base_table}
+            """
+
+            sql """switch ${catalog_name}"""
+            sql """use ${db}"""
+
+            // ---- Baseline: query the Hive view (3 columns) ----
+            def beforeDrift = sql """select * from ${hive_view} where 1=0"""
+            assertTrue(beforeDrift.isEmpty(), "Expected empty result before 
schema drift")
+
+            // ---- Schema drift: add a column to the Hive base table ----
+            hive_docker """alter table ${db}.${base_table} add columns (score 
string comment 'new col')"""
+
+            // ---- Refresh only the base table (view HMS schema is NOT 
refreshed) ----
+            // After this, Doris re-analyzes the view body against the 
4-column base table,
+            // producing childOutput with 4 slots, while 
ExternalView.getFullSchema() still
+            // returns 3 columns from the Hive metastore → 
IndexOutOfBoundsException before fix.
+            sql """refresh table ${base_table}"""
+
+            // ---- Base table now exposes 5 columns (id, name, age, score + 
partition col dt) ----
+            def descBase = sql """desc ${base_table}"""
+            assertEquals(5, descBase.size(),
+                    "Base table should have 5 columns after ADD COLUMN + 
REFRESH (including partition column)")
+
+            // ---- Querying the external Hive view must NOT throw 
IndexOutOfBoundsException ----
+            // The view's HMS schema still has 3 cols (view not refreshed), so 
the output
+            // is truncated to the view's declared width (3 cols) — the new 
'score' column
+            // is not visible until the view itself is refreshed.
+            def afterDrift = sql """select * from ${hive_view} where 1=0"""
+            assertTrue(afterDrift.isEmpty(), "Expected empty result after 
schema drift (WHERE 1=0)")
+
+        } finally {
+            try {
+                hive_docker """drop database if exists ${db} cascade"""
+            } catch (Exception ignored) {}
+            sql """drop catalog if exists ${catalog_name}"""
+        }
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to