Github user xuanyuanking commented on a diff in the pull request: https://github.com/apache/spark/pull/16135#discussion_r156254083 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala --- @@ -352,4 +353,34 @@ class PartitionedTablePerfStatsSuite } } } + + test("SPARK-18700: table loaded only once even when resolved concurrently") { + withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "false") { + withTable("test") { + withTempDir { dir => + HiveCatalogMetrics.reset() + setupPartitionedHiveTable("test", dir, 50) + // select the table in multi-threads + val executorPool = Executors.newFixedThreadPool(10) + (1 to 10).map(threadId => { + val runnable = new Runnable { + override def run(): Unit = { + spark.sql("select * from test where partCol1 = 999").count() + } + } + executorPool.execute(runnable) + None + }) + executorPool.shutdown() + executorPool.awaitTermination(30, TimeUnit.SECONDS) + // check the cache hit, we use the metric of METRIC_FILES_DISCOVERED and + // METRIC_PARALLEL_LISTING_JOB_COUNT to check this, while the lock take effect, + // only one thread can really do the build, so the listing job count is 2, the other + // one is cache.load func. Also METRIC_FILES_DISCOVERED is $partition_num * 2 --- End diff -- @gatorsmile Xiao fixed this in https://github.com/apache/spark/pull/16481
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org