Github user hvanhovell commented on a diff in the pull request: https://github.com/apache/spark/pull/20757#discussion_r173429606 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala --- @@ -1421,13 +1421,36 @@ case class ValidateExternalType(child: Expression, expected: DataType) override def nullable: Boolean = child.nullable - override def dataType: DataType = RowEncoder.externalDataTypeForInput(expected) - - override def eval(input: InternalRow): Any = - throw new UnsupportedOperationException("Only code-generated evaluation is supported") + override val dataType: DataType = RowEncoder.externalDataTypeForInput(expected) private val errMsg = s" is not a valid external type for schema of ${expected.simpleString}" + private lazy val checkType: (Any) => Boolean = expected match { + case _: DecimalType => + (value: Any) => { + value.isInstanceOf[java.math.BigDecimal] || value.isInstanceOf[scala.math.BigDecimal] || + value.isInstanceOf[Decimal] + } + case _: ArrayType => + (value: Any) => { + value.getClass.isArray || value.isInstanceOf[Seq[_]] + } + case _ => + val dataTypeClazz = RowEncoder.getClassFromExternalType(dataType) --- End diff -- Does this always return the same result as `CodeGenerator.boxedType(dataType)` in terms of functionality? I don't think it does, since it misses support for `DateType`, `TimestampType`, `DecimalType`, `StringType`, `StructType`, `MapType`, `ArrayType` and `UserDefinedType`. The thing is that it does not really matter how this expression is currently used (for datasets), what matters is how the code generated version is implemented.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org