Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/20935#discussion_r178425406 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala --- @@ -323,18 +324,31 @@ private[columnar] final class DecimalColumnStats(precision: Int, scale: Int) ext } private[columnar] final class ObjectColumnStats(dataType: DataType) extends ColumnStats { + protected var upper: Any = null + protected var lower: Any = null + val columnType = ColumnType(dataType) + val ordering = dataType match { + case x if RowOrdering.isOrderable(dataType) && x != NullType => + Option(TypeUtils.getInterpretedOrdering(x)) + case _ => None + } override def gatherStats(row: InternalRow, ordinal: Int): Unit = { if (!row.isNullAt(ordinal)) { val size = columnType.actualSize(row, ordinal) sizeInBytes += size count += 1 + ordering.foreach { order => + val value = row.get(ordinal, dataType) + if (upper == null || order.gt(value, upper)) upper = value + if (lower == null || order.lt(value, lower)) lower = value --- End diff -- For unsafe row and array, doesn't we need to copy the value? In the added test this can't be tested because the random rows are all individual instances, however, it can be the same instance of unsafe row or array during query evaluation.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org