Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/19492#discussion_r144753534 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala --- @@ -536,26 +536,31 @@ case class JsonToStructs( timeZoneId = None) override def checkInputDataTypes(): TypeCheckResult = schema match { - case _: StructType | ArrayType(_: StructType, _) => + case _: StructType | ArrayType(_: StructType | _: AtomicType, _) => super.checkInputDataTypes() case _ => TypeCheckResult.TypeCheckFailure( - s"Input schema ${schema.simpleString} must be a struct or an array of structs.") + s"Input schema ${schema.simpleString} must be a struct or " + + s"an array of structs or primitive types.") } @transient - lazy val rowSchema = schema match { + lazy val rowSchema: DataType = schema match { case st: StructType => st case ArrayType(st: StructType, _) => st + case ArrayType(at: AtomicType, _) => ArrayType(at) } // This converts parsed rows to the desired output by the given schema. @transient - lazy val converter = schema match { - case _: StructType => - (rows: Seq[InternalRow]) => if (rows.length == 1) rows.head else null - case ArrayType(_: StructType, _) => - (rows: Seq[InternalRow]) => new GenericArrayData(rows) - } + lazy val converter = (rows: Seq[Any]) => --- End diff -- This brings extra matching cost at runtime. Can we move matching outside?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org