cloud-fan commented on code in PR #55427:
URL: https://github.com/apache/spark/pull/55427#discussion_r3143833722
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala:
##########
@@ -407,25 +413,36 @@ object TableOutputResolver extends SQLConfHelper with
Logging {
}
}
- inputCols.zip(actualExpectedCols).flatMap { case (inputCol, expectedCol) =>
+ val matched = inputCols.zip(actualExpectedCols).flatMap { case (inputCol,
expectedCol) =>
val newColPath = colPath :+ expectedCol.name
(inputCol.dataType, expectedCol.dataType) match {
case (inputType: StructType, expectedType: StructType) =>
resolveStructType(
tableName, inputCol, inputType, expectedCol, expectedType,
- byName = false, conf, addError, newColPath, fillDefaultValue =
false)
+ byName = false, conf, addError, newColPath, fillDefaultValue)
case (inputType: ArrayType, expectedType: ArrayType) =>
resolveArrayType(
tableName, inputCol, inputType, expectedCol, expectedType,
- byName = false, conf, addError, newColPath, fillDefaultValue =
false)
+ byName = false, conf, addError, newColPath, fillDefaultValue)
case (inputType: MapType, expectedType: MapType) =>
resolveMapType(
tableName, inputCol, inputType, expectedCol, expectedType,
- byName = false, conf, addError, newColPath, fillDefaultValue =
false)
+ byName = false, conf, addError, newColPath, fillDefaultValue)
case _ =>
checkField(tableName, expectedCol, inputCol, byName = false, conf,
addError, newColPath)
}
}
+
+ val defaults = if (fillDefaultValue) {
+ actualExpectedCols.drop(inputCols.size).flatMap { expectedCol =>
+ getDefaultValueExprOrNullLit(expectedCol,
conf.useNullsForMissingDefaultColumnValues)
+ .map(expr => applyColumnMetadata(expr, expectedCol))
+ }
+ } else {
+ Nil
+ }
Review Comment:
When a trailing target column has no usable default — non-nullable with no
`DEFAULT`, or `useNullsForMissingDefaultColumnValues=false` with no explicit
`DEFAULT` — `getDefaultValueExprOrNullLit` returns `None` and the surrounding
`.flatMap { _.map(...) }` silently drops that column. The result `matched ++
defaults` is shorter than `expected`. Two failure modes:
- Top-level: `resolveOutputColumns` builds an undersized `Project` and the
V2 plan never becomes `outputResolved` (`V2WriteCommand.areCompatible` requires
`inAttrs.size == outAttrs.size`) — producing an obscure analyzer error instead
of the clean `INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA` the by-name path
emits.
- Nested: `resolveStructType` line 490 returns `None` on length mismatch,
propagating a cascading silent drop of the enclosing struct field.
The by-name path at lines 317-328 throws
`incompatibleDataToTableCannotFindDataError` in the same situation; suggest
mirroring:
```suggestion
val defaults = if (fillDefaultValue) {
actualExpectedCols.drop(inputCols.size).map { expectedCol =>
val defaultExpr = getDefaultValueExprOrNullLit(
expectedCol, conf.useNullsForMissingDefaultColumnValues)
if (defaultExpr.isEmpty) {
throw
QueryCompilationErrors.incompatibleDataToTableCannotFindDataError(
tableName, (colPath :+ expectedCol.name).quoted)
}
applyColumnMetadata(defaultExpr.get, expectedCol)
}
} else {
Nil
}
```
Repro (mirroring the existing by-name negative test at line 1839): same V2
table `(id int, salary int, dep string)`, set
`INSERT_INTO_NESTED_TYPE_COERCION_ENABLED=true` AND
`USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES=false`, then
`doInsertWithSchemaEvolution(t, Seq((1, 200)).toDF("id", "salary"))`
(by-position, source missing `dep`). Today this silently produces a 2-column
`Project`; with the suggested fix it errors symmetrically with the by-name
negative test. Worth adding a by-position counterpart there.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]