Github user xuanyuanking commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16135#discussion_r156254083
  
    --- Diff: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
 ---
    @@ -352,4 +353,34 @@ class PartitionedTablePerfStatsSuite
           }
         }
       }
    +
    +  test("SPARK-18700: table loaded only once even when resolved 
concurrently") {
    +    withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "false") {
    +      withTable("test") {
    +        withTempDir { dir =>
    +          HiveCatalogMetrics.reset()
    +          setupPartitionedHiveTable("test", dir, 50)
    +          // select the table in multi-threads
    +          val executorPool = Executors.newFixedThreadPool(10)
    +          (1 to 10).map(threadId => {
    +            val runnable = new Runnable {
    +              override def run(): Unit = {
    +                spark.sql("select * from test where partCol1 = 
999").count()
    +              }
    +            }
    +            executorPool.execute(runnable)
    +            None
    +          })
    +          executorPool.shutdown()
    +          executorPool.awaitTermination(30, TimeUnit.SECONDS)
    +          // check the cache hit, we use the metric of 
METRIC_FILES_DISCOVERED and
    +          // METRIC_PARALLEL_LISTING_JOB_COUNT to check this, while the 
lock take effect,
    +          // only one thread can really do the build, so the listing job 
count is 2, the other
    +          // one is cache.load func. Also METRIC_FILES_DISCOVERED is 
$partition_num * 2
    --- End diff --
    
    @gatorsmile Xiao fixed this in https://github.com/apache/spark/pull/16481


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to