Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/22814#discussion_r228065259 --- Diff: external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala --- @@ -21,16 +21,31 @@ import org.apache.avro.Schema import org.apache.avro.generic.GenericDatumReader import org.apache.avro.io.{BinaryDecoder, DecoderFactory} -import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, UnaryExpression} +import org.apache.spark.SparkException +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, SpecificInternalRow, UnaryExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode} -import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType} +import org.apache.spark.sql.catalyst.util.{FailFastMode, ParseMode, PermissiveMode} +import org.apache.spark.sql.types._ -case class AvroDataToCatalyst(child: Expression, jsonFormatSchema: String) +case class AvroDataToCatalyst( + child: Expression, + jsonFormatSchema: String, + options: Map[String, String]) extends UnaryExpression with ExpectsInputTypes { override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType) - override lazy val dataType: DataType = SchemaConverters.toSqlType(avroSchema).dataType + override lazy val dataType: DataType = { + val dt = SchemaConverters.toSqlType(avroSchema).dataType + parseMode match { + // With PermissiveMode, the output Catalyst row might contain columns of null values for + // corrupt records, even if some of the columns are not nullable in the user-provided schema. + // Therefore we force the schema to be all nullable here. + case PermissiveMode => dt.asNullable --- End diff -- This looks going to be an external behaviour change to users from 2.4.0. Please update migration guide as well.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org