Github user viirya commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20935#discussion_r178425406
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
 ---
    @@ -323,18 +324,31 @@ private[columnar] final class 
DecimalColumnStats(precision: Int, scale: Int) ext
     }
     
     private[columnar] final class ObjectColumnStats(dataType: DataType) 
extends ColumnStats {
    +  protected var upper: Any = null
    +  protected var lower: Any = null
    +
       val columnType = ColumnType(dataType)
    +  val ordering = dataType match {
    +    case x if RowOrdering.isOrderable(dataType) && x != NullType =>
    +      Option(TypeUtils.getInterpretedOrdering(x))
    +    case _ => None
    +  }
     
       override def gatherStats(row: InternalRow, ordinal: Int): Unit = {
         if (!row.isNullAt(ordinal)) {
           val size = columnType.actualSize(row, ordinal)
           sizeInBytes += size
           count += 1
    +      ordering.foreach { order =>
    +        val value = row.get(ordinal, dataType)
    +        if (upper == null || order.gt(value, upper)) upper = value
    +        if (lower == null || order.lt(value, lower)) lower = value
    --- End diff --
    
    For unsafe row and array, doesn't we need to copy the value? In the added 
test this can't be tested because the random rows are all individual instances, 
however, it can be the same instance of unsafe row or array during query 
evaluation.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to