Github user pwoody commented on a diff in the pull request: https://github.com/apache/spark/pull/20935#discussion_r178442841 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala --- @@ -323,18 +324,31 @@ private[columnar] final class DecimalColumnStats(precision: Int, scale: Int) ext } private[columnar] final class ObjectColumnStats(dataType: DataType) extends ColumnStats { + protected var upper: Any = null + protected var lower: Any = null + val columnType = ColumnType(dataType) + val ordering = dataType match { + case x if RowOrdering.isOrderable(dataType) && x != NullType => + Option(TypeUtils.getInterpretedOrdering(x)) + case _ => None + } override def gatherStats(row: InternalRow, ordinal: Int): Unit = { if (!row.isNullAt(ordinal)) { val size = columnType.actualSize(row, ordinal) sizeInBytes += size count += 1 + ordering.foreach { order => + val value = row.get(ordinal, dataType) + if (upper == null || order.gt(value, upper)) upper = value + if (lower == null || order.lt(value, lower)) lower = value --- End diff -- Yes, thanks for catching this.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org