Github user viirya commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19492#discussion_r144753534
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
 ---
    @@ -536,26 +536,31 @@ case class JsonToStructs(
           timeZoneId = None)
     
       override def checkInputDataTypes(): TypeCheckResult = schema match {
    -    case _: StructType | ArrayType(_: StructType, _) =>
    +    case _: StructType | ArrayType(_: StructType | _: AtomicType, _) =>
           super.checkInputDataTypes()
         case _ => TypeCheckResult.TypeCheckFailure(
    -      s"Input schema ${schema.simpleString} must be a struct or an array 
of structs.")
    +      s"Input schema ${schema.simpleString} must be a struct or " +
    +        s"an array of structs or primitive types.")
       }
     
       @transient
    -  lazy val rowSchema = schema match {
    +  lazy val rowSchema: DataType = schema match {
         case st: StructType => st
         case ArrayType(st: StructType, _) => st
    +    case ArrayType(at: AtomicType, _) => ArrayType(at)
       }
     
       // This converts parsed rows to the desired output by the given schema.
       @transient
    -  lazy val converter = schema match {
    -    case _: StructType =>
    -      (rows: Seq[InternalRow]) => if (rows.length == 1) rows.head else null
    -    case ArrayType(_: StructType, _) =>
    -      (rows: Seq[InternalRow]) => new GenericArrayData(rows)
    -  }
    +  lazy val converter = (rows: Seq[Any]) =>
    --- End diff --
    
    This brings extra matching cost at runtime. Can we move matching outside?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to