Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/20265#discussion_r161421703 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala --- @@ -483,6 +484,64 @@ object OrcReadBenchmark { } } + def filterPushDownBenchmark(values: Int, width: Int): Unit = { + val benchmark = new Benchmark(s"Filter Pushdown", values) + + withTempPath { dir => + withTempTable("t1", "nativeOrcTable", "hiveOrcTable") { + import spark.implicits._ + val selectExpr = (1 to width).map(i => s"CAST(value AS STRING) c$i") + val whereExpr = (1 to width).map(i => s"NOT c$i LIKE '%not%exist%'").mkString(" AND ") --- End diff -- This is kind of the best case for PPD, as the data is sorted. I'm fine with it, but let's add some more cases, at least `==` and `>`. We should follow other benchmarks in this file to make it completed.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org