Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20024#discussion_r159628763
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 ---
    @@ -199,13 +199,60 @@ case class Cast(child: Expression, dataType: 
DataType, timeZoneId: Option[String
     
       // [[func]] assumes the input is no longer null because eval already 
does the null check.
       @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = 
func(a.asInstanceOf[T])
    +  @inline private[this] def buildWriter[T](
    +      a: Any, buffer: UTF8StringBuilder, writer: (T, UTF8StringBuilder) => 
Unit): Unit = {
    +    writer(a.asInstanceOf[T], buffer)
    +  }
    +
    +  private[this] def buildElemWriter(
    +      from: DataType): (Any, UTF8StringBuilder) => Unit = from match {
    +    case BinaryType => buildWriter[Array[Byte]](_, _, (b, buf) => 
buf.append(b))
    +    case StringType => buildWriter[UTF8String](_, _, (b, buf) => 
buf.append(b))
    +    case DateType => buildWriter[Int](_, _,
    +      (d, buf) => buf.append(DateTimeUtils.dateToString(d)))
    +    case TimestampType => buildWriter[Long](_, _,
    +      (t, buf) => buf.append(DateTimeUtils.timestampToString(t)))
    +    case ar: ArrayType =>
    +      buildWriter[ArrayData](_, _, (array, buf) => {
    +         buf.append("[")
    +        if (array.numElements > 0) {
    +          val writeElemToBuffer = buildElemWriter(ar.elementType)
    +          writeElemToBuffer(array.get(0, ar.elementType), buf)
    +          var i = 1
    +          while (i < array.numElements) {
    +            buf.append(", ")
    +            writeElemToBuffer(array.get(i, ar.elementType), buf)
    +            i += 1
    +          }
    +        }
    +        buf.append("]")
    +      })
    +    case _ => buildWriter[Any](_, _, (o, buf) => 
buf.append(String.valueOf(o)))
    +  }
     
       // UDFToString
       private[this] def castToString(from: DataType): Any => Any = from match {
         case BinaryType => buildCast[Array[Byte]](_, UTF8String.fromBytes)
         case DateType => buildCast[Int](_, d => 
UTF8String.fromString(DateTimeUtils.dateToString(d)))
         case TimestampType => buildCast[Long](_,
           t => UTF8String.fromString(DateTimeUtils.timestampToString(t, 
timeZone)))
    +    case ar: ArrayType =>
    +      buildCast[ArrayData](_, array => {
    +        val res = new UTF8StringBuilder
    +        res.append("[")
    +        if (array.numElements > 0) {
    +          val writeElemToBuffer = buildElemWriter(ar.elementType)
    +          writeElemToBuffer(array.get(0, ar.elementType), res)
    --- End diff --
    
    I don't get it, why do we need the `buildElemWriter`? I think it's as 
simple as
    ```
    val toUTF8String = castToString(et)
    builder.append(toUTF8String(array.get(0, et)).asInstanceOf[UTF8String])
    var i = 1
    while ...
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to