This is an automated email from the ASF dual-hosted git repository.
morrySnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new fa64a2eb0f0 [fix](nereids) Guard LogicalView.computeOutput() against
schema drift (IndexOutOfBoundsException) (#64007)
fa64a2eb0f0 is described below
commit fa64a2eb0f0c05598ef82358a514764eb84bdaae
Author: zhaorongsheng <[email protected]>
AuthorDate: Tue Jun 9 13:55:17 2026 +0800
[fix](nereids) Guard LogicalView.computeOutput() against schema drift
(IndexOutOfBoundsException) (#64007)
## Problem
When querying a Doris view built with `SELECT *` over an
external-catalog table (e.g. Hive), after the underlying table gains new
columns via `ALTER TABLE ADD COLUMNS` + `REFRESH TABLE <base_table>`,
the query crashes with:
```
errCode = 2, detailMessage = Index 3 out of bounds for length 3
```
**Root cause** (`LogicalView.computeOutput()`):
The view body is re-analyzed against the refreshed base-table schema (4
columns), producing `childOutput` with 4 slots. But
`view.getFullSchema()` still returns 3 columns (the schema stored at
view-creation time in the Hive metastore – not yet refreshed). The loop
runs `i = 0..3` and calls `view.getFullSchema().get(3)`, crashing.
The `CollectionUtils.isEmpty()` guard added in #40715 handles the
`null`/empty case but not the under-sized case.
## Fix
Promote `view.getFullSchema()` to a local variable to avoid repeated
calls, then extend the guard to `i >= fullSchema.size()`:
```java
List<Column> fullSchema = view.getFullSchema();
if (CollectionUtils.isEmpty(fullSchema) || i >= fullSchema.size()) {
qualified = originSlot.withQualifier(fullQualifiers); // same as
existing null-guard path
} else {
qualified = originSlot
.withOneLevelTableAndColumnAndQualifier(view,
fullSchema.get(i), fullQualifiers);
}
```
Extra slots (columns added after view creation) fall back to
`withQualifier()`, preserving the correct column name/type from the
child slot (which is already resolved against the refreshed catalog). No
behavioral change for non-drifted views.
## Files Changed
| File | Change |
|------|--------|
|
`fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java`
| Fix + import `Column` |
|
`regression-test/suites/external_table_p0/hive/test_hive_view_schema_drift.groovy`
| New regression test |
## Issue
Fixes: #64006
---------
Signed-off-by: zhaorongsheng <[email protected]>
Co-authored-by: zhaorongsheng <[email protected]>
---
.../nereids/trees/plans/logical/LogicalView.java | 19 +-
.../trees/plans/logical/LogicalViewTest.java | 245 +++++++++++++++++++++
.../hive/test_hive_view_schema_drift.groovy | 115 ++++++++++
3 files changed, 374 insertions(+), 5 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java
index a7dd046cb36..a42d75dc26d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java
@@ -17,6 +17,7 @@
package org.apache.doris.nereids.trees.plans.logical;
+import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.ViewIf;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.memo.GroupExpression;
@@ -121,16 +122,24 @@ public class LogicalView<BODY extends Plan> extends
LogicalUnary<BODY> {
List<Slot> childOutput = child().getOutput();
ImmutableList.Builder<Slot> currentOutput = ImmutableList.builder();
List<String> fullQualifiers = this.view.getFullQualifiers();
- for (int i = 0; i < childOutput.size(); i++) {
+ List<Column> fullSchema = view.getFullSchema();
+ // ATTN: because bug intro by #40715, after replace view, full schema
will be empty or null.
+ // So, we must guard here to avoid NPE or out of bound exception.
+ // When fullSchema is present it defines the view's output contract
(built from
+ // CreateViewInfo.finalCols). Use Math.min() as the loop bound to
handle both
+ // directions of schema drift without leaking undeclared columns:
+ // - child has MORE slots (base table gained columns after view
creation): truncate
+ // - child has FEWER slots (base table lost columns): stop at child
size
+ boolean hasSchema = !CollectionUtils.isEmpty(fullSchema);
+ int limit = hasSchema ? Math.min(childOutput.size(),
fullSchema.size()) : childOutput.size();
+ for (int i = 0; i < limit; i++) {
Slot originSlot = childOutput.get(i);
Slot qualified;
- // ATTN: because bug intro by #40715, after replace view, full
schema will be empty or null.
- // So, we must just here to avoid NPE or out of bound exception.
- if (CollectionUtils.isEmpty(view.getFullSchema())) {
+ if (!hasSchema) {
qualified = originSlot.withQualifier(fullQualifiers);
} else {
qualified = originSlot
- .withOneLevelTableAndColumnAndQualifier(view,
view.getFullSchema().get(i), fullQualifiers);
+ .withOneLevelTableAndColumnAndQualifier(view,
fullSchema.get(i), fullQualifiers);
}
currentOutput.add(qualified);
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/logical/LogicalViewTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/logical/LogicalViewTest.java
new file mode 100644
index 00000000000..210a34be65b
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/logical/LogicalViewTest.java
@@ -0,0 +1,245 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.plans.logical;
+
+import org.apache.doris.catalog.AggregateType;
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Type;
+import org.apache.doris.catalog.ViewIf;
+import org.apache.doris.nereids.trees.expressions.Slot;
+
+import com.google.common.collect.ImmutableList;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;
+
+import java.lang.reflect.Proxy;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Unit tests for {@link LogicalView#computeOutput()}.
+ *
+ * <p>Focuses on the schema-drift scenario: when the underlying external table
(e.g. Hive)
+ * gains new columns after the view was created, {@code view.getFullSchema()}
is smaller
+ * than {@code child().getOutput()} and the loop used to crash with
+ * {@code IndexOutOfBoundsException}.
+ */
+public class LogicalViewTest {
+
+ // -----------------------------------------------------------------------
+ // Helpers
+ // -----------------------------------------------------------------------
+
+ private static Slot slot(String name) {
+ Slot slot = Mockito.mock(Slot.class);
+ Mockito.when(slot.getName()).thenReturn(name);
+ Mockito.when(slot.withQualifier(Mockito.anyList())).thenReturn(slot);
+ Mockito.when(slot.withOneLevelTableAndColumnAndQualifier(
+ Mockito.any(), Mockito.any(),
Mockito.anyList())).thenReturn(slot);
+ return slot;
+ }
+
+ private static Column col(String name) {
+ return new Column(name, Type.STRING, false, AggregateType.NONE, "",
"");
+ }
+
+ /**
+ * Build a minimal ViewIf stub via JDK Proxy (avoids ByteBuddy/Mockito
+ * which triggers TableIf → ColumnStatistic class-loading issues in unit
tests).
+ */
+ @SuppressWarnings("unchecked")
+ private static ViewIf mockView(List<Column> schema, List<String>
qualifiers) {
+ return (ViewIf) Proxy.newProxyInstance(
+ ViewIf.class.getClassLoader(),
+ new Class<?>[] {ViewIf.class},
+ (proxy, method, args) -> {
+ switch (method.getName()) {
+ case "getFullSchema": return schema;
+ case "getFullQualifiers": return qualifiers;
+ case "getViewText": return "";
+ default: return null;
+ }
+ });
+ }
+
+ /**
+ * Build a mock LogicalPlan child whose {@code getOutput()} returns the
given slots.
+ * Also stubs {@code getAllChildrenTypes()} so AbstractTreeNode's
constructor does not
+ * crash with NPE when calling {@code containsTypes.or(childTypes)}.
+ */
+ private static LogicalPlan mockChild(List<Slot> slots) {
+ LogicalPlan child = Mockito.mock(LogicalPlan.class);
+ Mockito.when(child.getOutput()).thenReturn(slots);
+ Mockito.when(child.getAllChildrenTypes()).thenReturn(new BitSet());
+ return child;
+ }
+
+ // -----------------------------------------------------------------------
+ // Tests
+ // -----------------------------------------------------------------------
+
+ /**
+ * Normal case: child output size == fullSchema size.
+ * All slots should be decorated with their corresponding stored column.
+ */
+ @Test
+ public void testComputeOutputNormalCase() {
+ List<Column> schema = ImmutableList.of(col("id"), col("name"),
col("age"));
+ List<Slot> childSlots = ImmutableList.of(slot("id"), slot("name"),
slot("age"));
+
+ ViewIf view = mockView(schema, ImmutableList.of("hive", "test", "v"));
+ LogicalView<LogicalPlan> logicalView = new LogicalView<>(view,
mockChild(childSlots));
+
+ List<Slot> output = logicalView.computeOutput();
+
+ Assertions.assertEquals(3, output.size(), "Output size should match
schema size");
+ Assertions.assertEquals("id", output.get(0).getName());
+ Assertions.assertEquals("name", output.get(1).getName());
+ Assertions.assertEquals("age", output.get(2).getName());
+ }
+
+ /**
+ * Schema-drift case: child output has MORE slots than fullSchema.
+ *
+ * <p>This is the regression test for the IndexOutOfBoundsException bug:
+ * after {@code ALTER TABLE ADD COLUMNS} + {@code REFRESH TABLE} on the
base table,
+ * {@code childOutput.size()} grows beyond {@code
view.getFullSchema().size()}.
+ *
+ * <p>The fix truncates output to the view's declared schema width so that:
+ * (1) no IndexOutOfBoundsException is thrown, and
+ * (2) the view's output contract (as seen in {@code DESC view}) is
preserved.
+ */
+ @Test
+ public void testComputeOutputSchemaDrift_moreColumnsThanSchema() {
+ // View was created with 3-column base table → fullSchema has 3 entries
+ List<Column> schema = ImmutableList.of(col("id"), col("name"),
col("age"));
+ // After ADD COLUMN + REFRESH TABLE, the child produces 4 slots
+ List<Slot> childSlots = ImmutableList.of(slot("id"), slot("name"),
slot("age"), slot("score"));
+
+ ViewIf view = mockView(schema, ImmutableList.of("hive", "test", "v"));
+ LogicalView<LogicalPlan> logicalView = new LogicalView<>(view,
mockChild(childSlots));
+
+ // Must NOT throw IndexOutOfBoundsException
+ List<Slot> output =
Assertions.assertDoesNotThrow(logicalView::computeOutput,
+ "computeOutput() must not throw when fullSchema is shorter
than childOutput");
+
+ // Output is truncated to the view's declared schema width (3), not
the child width (4).
+ // The new 'score' column is not visible until the view itself is
refreshed.
+ Assertions.assertEquals(3, output.size(),
+ "Output size should be truncated to fullSchema size to
preserve view contract");
+ Assertions.assertEquals("id", output.get(0).getName());
+ Assertions.assertEquals("name", output.get(1).getName());
+ Assertions.assertEquals("age", output.get(2).getName());
+ }
+
+ /**
+ * Schema-drift case: child output has FEWER slots than fullSchema.
+ *
+ * <p>Defensive check: if a column was somehow removed from the base table
+ * while the view schema is wider, we must not access a nonexistent child
slot.
+ * The output should simply reflect however many columns the child returns.
+ */
+ @Test
+ public void testComputeOutputSchemaDrift_fewerColumnsThanSchema() {
+ // fullSchema wider than actual child (defensive scenario)
+ List<Column> schema = ImmutableList.of(col("id"), col("name"),
col("age"), col("score"));
+ List<Slot> childSlots = ImmutableList.of(slot("id"), slot("name"));
+
+ ViewIf view = mockView(schema, ImmutableList.of("hive", "test", "v"));
+ LogicalView<LogicalPlan> logicalView = new LogicalView<>(view,
mockChild(childSlots));
+
+ List<Slot> output = logicalView.computeOutput();
+
+ Assertions.assertEquals(2, output.size(),
+ "Output size should equal child output size (loop bound)");
+ Assertions.assertEquals("id", output.get(0).getName());
+ Assertions.assertEquals("name", output.get(1).getName());
+ }
+
+ /**
+ * Empty schema case (guard introduced by #40715): fullSchema is empty.
+ * All slots must fall back to withQualifier().
+ */
+ @Test
+ public void testComputeOutputEmptySchema() {
+ List<Slot> childSlots = ImmutableList.of(slot("id"), slot("name"));
+
+ ViewIf view = mockView(Collections.emptyList(),
ImmutableList.of("hive", "test", "v"));
+ LogicalView<LogicalPlan> logicalView = new LogicalView<>(view,
mockChild(childSlots));
+
+ List<Slot> output =
Assertions.assertDoesNotThrow(logicalView::computeOutput,
+ "computeOutput() must not throw for empty fullSchema");
+
+ Assertions.assertEquals(2, output.size());
+ Assertions.assertEquals("id", output.get(0).getName());
+ Assertions.assertEquals("name", output.get(1).getName());
+ }
+
+ /**
+ * Null schema case (guard introduced by #40715): getFullSchema() returns
null.
+ * All slots must fall back to withQualifier().
+ */
+ @Test
+ public void testComputeOutputNullSchema() {
+ List<Slot> childSlots = ImmutableList.of(slot("id"), slot("name"));
+
+ ViewIf view = mockView(null, ImmutableList.of("hive", "test", "v"));
+ LogicalView<LogicalPlan> logicalView = new LogicalView<>(view,
mockChild(childSlots));
+
+ List<Slot> output =
Assertions.assertDoesNotThrow(logicalView::computeOutput,
+ "computeOutput() must not throw for null fullSchema");
+
+ Assertions.assertEquals(2, output.size());
+ }
+
+ /**
+ * Single new column added (minimal drift). Verifies the exact boundary
condition
+ * at index {@code fullSchema.size()}: output is truncated to schema width
(1).
+ */
+ @Test
+ public void testComputeOutputSchemaDrift_singleColumnAdded() {
+ // 1-column schema, child returns 2 slots after schema drift
+ List<Column> schema = ImmutableList.of(col("id"));
+ List<Slot> childSlots = ImmutableList.of(slot("id"), slot("extra"));
+
+ ViewIf view = mockView(schema, ImmutableList.of("hive", "test", "v"));
+ LogicalView<LogicalPlan> logicalView = new LogicalView<>(view,
mockChild(childSlots));
+
+ List<Slot> output =
Assertions.assertDoesNotThrow(logicalView::computeOutput);
+
+ // Truncated to schema width; the new 'extra' column is not exposed.
+ Assertions.assertEquals(1, output.size());
+ Assertions.assertEquals("id", output.get(0).getName());
+ }
+
+ /**
+ * Child returns no output (empty relation). Must return an empty list and
not crash.
+ */
+ @Test
+ public void testComputeOutputEmptyChild() {
+ List<Column> schema = ImmutableList.of(col("id"), col("name"));
+ ViewIf view = mockView(schema, ImmutableList.of("hive", "test", "v"));
+ LogicalView<LogicalPlan> logicalView = new LogicalView<>(view,
mockChild(ImmutableList.of()));
+
+ List<Slot> output = logicalView.computeOutput();
+
+ Assertions.assertTrue(output.isEmpty());
+ }
+}
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_view_schema_drift.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_view_schema_drift.groovy
new file mode 100644
index 00000000000..2232143b245
--- /dev/null
+++
b/regression-test/suites/external_table_p0/hive/test_hive_view_schema_drift.groovy
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Regression test for: LogicalView.computeOutput() IndexOutOfBoundsException
when
+// an underlying Hive table gains new columns (schema drift) after the Hive
view was created.
+//
+// Repro:
+// 1. Create a Hive base table (3 cols) and a native Hive VIEW on it.
+// 2. In Doris, register the Hive catalog and query the external view — OK
(3 cols).
+// 3. ADD COLUMN to the Hive base table via hive_docker.
+// 4. REFRESH TABLE <base_table> in Doris (view HMS schema NOT refreshed).
+// 5. Query the external view again — used to crash:
+// errCode = 2, detailMessage = Index 3 out of bounds for length 3
+// because LogicalView.computeOutput() iterated childOutput (4 slots from
the
+// re-analyzed view body) but called view.getFullSchema().get(i) on a
3-element
+// list (the Hive view's HMS schema at creation time).
+//
+// The fix: use Math.min(childOutput.size(), fullSchema.size()) as the loop
bound,
+// preserving the view's declared output contract while preventing the crash.
+
+suite("test_hive_view_schema_drift", "p0,external,hive_docker") {
+
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disable Hive test.")
+ return;
+ }
+
+ for (String hivePrefix : ["hive2", "hive3"]) {
+ setHivePrefix(hivePrefix)
+ String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ String catalog_name = "test_${hivePrefix}_view_schema_drift"
+ String db = "test_view_schema_drift_db"
+ String base_table = "test_view_schema_drift_base"
+ String hive_view = "test_view_schema_drift_view"
+
+ try {
+ // ---- Register Hive catalog in Doris ----
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """CREATE CATALOG ${catalog_name} PROPERTIES (
+ 'type'='hms',
+ 'hive.metastore.uris' =
'thrift://${externalEnvIp}:${hms_port}',
+ 'hadoop.username' = 'hive'
+ )"""
+
+ // ---- Create Hive database, base table (3 cols), and a native
Hive VIEW ----
+ // The view is created through hive_docker so it is a native Hive
view
+ // (ExternalView in Doris). Its HMS schema records exactly 3
columns.
+ hive_docker """drop database if exists ${db} cascade"""
+ hive_docker """create database ${db}"""
+ hive_docker """
+ create table ${db}.${base_table} (
+ id bigint,
+ name string,
+ age string
+ )
+ partitioned by (dt string)
+ stored as parquet
+ """
+ hive_docker """
+ create view ${db}.${hive_view} as
+ select id, name, age from ${db}.${base_table}
+ """
+
+ sql """switch ${catalog_name}"""
+ sql """use ${db}"""
+
+ // ---- Baseline: query the Hive view (3 columns) ----
+ def beforeDrift = sql """select * from ${hive_view} where 1=0"""
+ assertTrue(beforeDrift.isEmpty(), "Expected empty result before
schema drift")
+
+ // ---- Schema drift: add a column to the Hive base table ----
+ hive_docker """alter table ${db}.${base_table} add columns (score
string comment 'new col')"""
+
+ // ---- Refresh only the base table (view HMS schema is NOT
refreshed) ----
+ // After this, Doris re-analyzes the view body against the
4-column base table,
+ // producing childOutput with 4 slots, while
ExternalView.getFullSchema() still
+ // returns 3 columns from the Hive metastore →
IndexOutOfBoundsException before fix.
+ sql """refresh table ${base_table}"""
+
+ // ---- Base table now exposes 5 columns (id, name, age, score +
partition col dt) ----
+ def descBase = sql """desc ${base_table}"""
+ assertEquals(5, descBase.size(),
+ "Base table should have 5 columns after ADD COLUMN +
REFRESH (including partition column)")
+
+ // ---- Querying the external Hive view must NOT throw
IndexOutOfBoundsException ----
+ // The view's HMS schema still has 3 cols (view not refreshed), so
the output
+ // is truncated to the view's declared width (3 cols) — the new
'score' column
+ // is not visible until the view itself is refreshed.
+ def afterDrift = sql """select * from ${hive_view} where 1=0"""
+ assertTrue(afterDrift.isEmpty(), "Expected empty result after
schema drift (WHERE 1=0)")
+
+ } finally {
+ try {
+ hive_docker """drop database if exists ${db} cascade"""
+ } catch (Exception ignored) {}
+ sql """drop catalog if exists ${catalog_name}"""
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]