Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22814#discussion_r228065259
  
    --- Diff: 
external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala 
---
    @@ -21,16 +21,31 @@ import org.apache.avro.Schema
     import org.apache.avro.generic.GenericDatumReader
     import org.apache.avro.io.{BinaryDecoder, DecoderFactory}
     
    -import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, 
Expression, UnaryExpression}
    +import org.apache.spark.SparkException
    +import org.apache.spark.sql.AnalysisException
    +import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, 
Expression, SpecificInternalRow, UnaryExpression}
     import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, 
CodeGenerator, ExprCode}
    -import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType}
    +import org.apache.spark.sql.catalyst.util.{FailFastMode, ParseMode, 
PermissiveMode}
    +import org.apache.spark.sql.types._
     
    -case class AvroDataToCatalyst(child: Expression, jsonFormatSchema: String)
    +case class AvroDataToCatalyst(
    +    child: Expression,
    +    jsonFormatSchema: String,
    +    options: Map[String, String])
       extends UnaryExpression with ExpectsInputTypes {
     
       override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
     
    -  override lazy val dataType: DataType = 
SchemaConverters.toSqlType(avroSchema).dataType
    +  override lazy val dataType: DataType = {
    +    val dt = SchemaConverters.toSqlType(avroSchema).dataType
    +    parseMode match {
    +      // With PermissiveMode, the output Catalyst row might contain 
columns of null values for
    +      // corrupt records, even if some of the columns are not nullable in 
the user-provided schema.
    +      // Therefore we force the schema to be all nullable here.
    +      case PermissiveMode => dt.asNullable
    --- End diff --
    
    This looks going to be an external behaviour change to users from 2.4.0. 
Please update migration guide as well.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to