Github user dongjoon-hyun commented on a diff in the pull request: https://github.com/apache/spark/pull/22313#discussion_r214743988 --- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala --- @@ -55,19 +59,52 @@ import org.apache.spark.sql.types._ * known to be convertible. */ private[orc] object OrcFilters extends Logging { + case class FilterWithTypeMap(filter: Filter, typeMap: Map[String, DataType]) + + private lazy val cacheExpireTimeout = + org.apache.spark.sql.execution.datasources.orc.OrcFilters.cacheExpireTimeout + + private lazy val searchArgumentCache = CacheBuilder.newBuilder() + .expireAfterAccess(cacheExpireTimeout, TimeUnit.SECONDS) + .build( + new CacheLoader[FilterWithTypeMap, Option[Builder]]() { + override def load(typeMapAndFilter: FilterWithTypeMap): Option[Builder] = { + buildSearchArgument( + typeMapAndFilter.typeMap, typeMapAndFilter.filter, SearchArgumentFactory.newBuilder()) + } + }) + + private def getOrBuildSearchArgumentWithNewBuilder( + dataTypeMap: Map[String, DataType], + expression: Filter): Option[Builder] = { + // When `spark.sql.orc.cache.sarg.timeout` is 0, cache is disabled. + if (cacheExpireTimeout > 0) { + searchArgumentCache.get(FilterWithTypeMap(expression, dataTypeMap)) + } else { + buildSearchArgument(dataTypeMap, expression, SearchArgumentFactory.newBuilder()) --- End diff -- Ya. It's possible. But, if we create a Guava loading cache and pass through all the cache management logic in Guava, it means a more overhead than this PR. In this PR, `spark.sql.orc.cache.sarg.timeout=0` means not creating the loading cache at all.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org