[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

kiszk Tue, 10 Apr 2018 04:59:52 -0700

Github user kiszk commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21021#discussion_r180392603
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
    @@ -190,28 +161,118 @@ case class SortArray(base: Expression, 
ascendingOrder: Expression)
             if (o1 == null && o2 == null) {
               0
             } else if (o1 == null) {
    -          1
    +          1 * placeNullAtEnd
             } else if (o2 == null) {
    -          -1
    +          -1 * placeNullAtEnd
             } else {
               -ordering.compare(o1, o2)
             }
           }
         }
       }
     
    -  override def nullSafeEval(array: Any, ascending: Any): Any = {
    -    val elementType = base.dataType.asInstanceOf[ArrayType].elementType
    +  def sortEval(array: Any, ascending: Boolean): Any = {
    +    val elementType = 
arrayExpression.dataType.asInstanceOf[ArrayType].elementType
         val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
         if (elementType != NullType) {
    -      java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt 
else gt)
    +      java.util.Arrays.sort(data, if (ascending) lt else gt)
         }
         new GenericArrayData(data.asInstanceOf[Array[Any]])
       }
    +}
    +
    +/**
    + * Sorts the input array in ascending / descending order according to the 
natural ordering of
    + * the array elements and returns it.
    + */
    +// scalastyle:off line.size.limit
    +@ExpressionDescription(
    +  usage = "_FUNC_(array[, ascendingOrder]) - Sorts the input array in 
ascending or descending order according to the natural ordering of the array 
elements.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(array('b', 'd', 'c', 'a'), true);
    +       ["a","b","c","d"]
    +  """)
    +// scalastyle:on line.size.limit
    +case class SortArray(base: Expression, ascendingOrder: Expression)
    +  extends BinaryExpression with ArraySortUtil {
    +
    +  def this(e: Expression) = this(e, Literal(true))
    +
    +  override def left: Expression = base
    +  override def right: Expression = ascendingOrder
    +  override def dataType: DataType = base.dataType
    +  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
BooleanType)
    +
    +  override def arrayExpression: Expression = base
    +  override def placeNullAtEnd: Int = 1
    +
    +  override def checkInputDataTypes(): TypeCheckResult = base.dataType 
match {
    +    case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
    +      ascendingOrder match {
    +        case Literal(_: Boolean, BooleanType) =>
    +          TypeCheckResult.TypeCheckSuccess
    +        case _ =>
    +          TypeCheckResult.TypeCheckFailure(
    +            "Sort order in second argument requires a boolean literal.")
    +      }
    +    case ArrayType(dt, _) =>
    +      val dtSimple = dt.simpleString
    +      TypeCheckResult.TypeCheckFailure(
    +        s"$prettyName does not support sorting array of type $dtSimple 
which is not orderable")
    +    case _ =>
    +      TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array 
input.")
    +  }
    +
    +  override def nullSafeEval(array: Any, ascending: Any): Any = {
    +    sortEval(array, ascending.asInstanceOf[Boolean])
    +  }
     
       override def prettyName: String = "sort_array"
     }
     
    +/**
    + * Sorts the input array in ascending order according to the natural 
ordering of
    + * the array elements and returns it.
    + */
    +// scalastyle:off line.size.limit
    +@ExpressionDescription(
    +  usage = """
    +    _FUNC_(array) - Sorts the input array in ascending order. The elements 
of the input array must
    +      be orderable. Null elements will be placed at the end of the 
returned array.""",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
    +       ["a","b","c","d",null]
    +  """,
    +  since = "2.4.0")
    +// scalastyle:on line.size.limit
    +case class ArraySort(child: Expression) extends UnaryExpression with 
ArraySortUtil {
    --- End diff --
    
    As you can see the result in UT, `null` handing is different. As you 
suggested, to reuse existing code as possible, I refactored by using 
`ArraySortUtil`.



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21021: [SPARK-23921][SQL] Add array_sort function

Reply via email to