[GitHub] spark pull request #20935: [SPARK-23819][SQL] Fix InMemoryTableScanExec comp...

viirya Sat, 31 Mar 2018 19:54:07 -0700

Github user viirya commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20935#discussion_r178446592
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
 ---
    @@ -103,4 +140,90 @@ class ColumnStatsSuite extends SparkFunSuite {
           }
         }
       }
    +
    +  def testOrderableColumnStats[T](
    +      dataType: DataType,
    +      statsSupplier: () => OrderableSafeColumnStats[T],
    +      columnType: ColumnType[T],
    +      orderable: Boolean,
    +      initialStatistics: Array[Any]): Unit = {
    +
    +    test(s"${dataType.typeName}, $orderable: empty") {
    +      val objectStats = statsSupplier()
    +      objectStats.collectedStatistics.zip(initialStatistics).foreach {
    +        case (actual, expected) => assert(actual === expected)
    +      }
    +    }
    +
    +    test(s"${dataType.typeName}, $orderable: non-empty") {
    +      import org.apache.spark.sql.execution.columnar.ColumnarTestUtils._
    +      val objectStats = statsSupplier()
    +      val rows = Seq.fill(10)(makeRandomRow(columnType)) ++ 
Seq.fill(10)(makeNullRow(1))
    +      rows.foreach(objectStats.gatherStats(_, 0))
    +
    +      val stats = objectStats.collectedStatistics
    +      if (orderable) {
    +        val values = rows.take(10).map(_.get(0, columnType.dataType))
    +        val ordering = TypeUtils.getInterpretedOrdering(dataType)
    +
    +        assertResult(values.min(ordering), "Wrong lower bound")(stats(0))
    +        assertResult(values.max(ordering), "Wrong upper bound")(stats(1))
    +      } else {
    +        assertResult(null, "Wrong lower bound")(stats(0))
    +        assertResult(null, "Wrong upper bound")(stats(1))
    +      }
    +      assertResult(10, "Wrong null count")(stats(2))
    +      assertResult(20, "Wrong row count")(stats(3))
    +      assertResult(stats(4), "Wrong size in bytes") {
    +        rows.map { row =>
    +          if (row.isNullAt(0)) 4 else columnType.actualSize(row, 0)
    +        }.sum
    +      }
    +    }
    +  }
    +
    +  def testMapColumnStats(dataType: DataType, initialStatistics: 
Array[Any]): Unit = {
    +    val columnType = ColumnType(dataType)
    +
    +    test(s"${dataType.typeName}: empty") {
    +      val objectStats = new MapColumnStats(dataType)
    +      objectStats.collectedStatistics.zip(initialStatistics).foreach {
    +        case (actual, expected) => assert(actual === expected)
    +      }
    +    }
    +
    +    test(s"${dataType.typeName}: non-empty") {
    +      import org.apache.spark.sql.execution.columnar.ColumnarTestUtils._
    +      val objectStats = new MapColumnStats(dataType)
    +      val rows = Seq.fill(10)(makeRandomRow(columnType)) ++ 
Seq.fill(10)(makeNullRow(1))
    +      rows.foreach(objectStats.gatherStats(_, 0))
    +
    +      val stats = objectStats.collectedStatistics
    +      assertResult(null, "Wrong lower bound")(stats(0))
    +      assertResult(null, "Wrong upper bound")(stats(1))
    +      assertResult(10, "Wrong null count")(stats(2))
    +      assertResult(20, "Wrong row count")(stats(3))
    +      assertResult(stats(4), "Wrong size in bytes") {
    +        rows.map { row =>
    +          if (row.isNullAt(0)) 4 else columnType.actualSize(row, 0)
    +        }.sum
    +      }
    +    }
    +  }
    +
    +  test("Reuse UnsafeArrayData for stats") {
    --- End diff --
    
    We should also test against UnsafeRow too.



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #20935: [SPARK-23819][SQL] Fix InMemoryTableScanExec comp...

Reply via email to