Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/22313#discussion_r214556040 --- Diff: sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala --- @@ -55,19 +59,52 @@ import org.apache.spark.sql.types._ * known to be convertible. */ private[orc] object OrcFilters extends Logging { + case class FilterWithTypeMap(filter: Filter, typeMap: Map[String, DataType]) + + private lazy val cacheExpireTimeout = + org.apache.spark.sql.execution.datasources.orc.OrcFilters.cacheExpireTimeout + + private lazy val searchArgumentCache = CacheBuilder.newBuilder() + .expireAfterAccess(cacheExpireTimeout, TimeUnit.SECONDS) + .build( + new CacheLoader[FilterWithTypeMap, Option[Builder]]() { + override def load(typeMapAndFilter: FilterWithTypeMap): Option[Builder] = { + buildSearchArgument( + typeMapAndFilter.typeMap, typeMapAndFilter.filter, SearchArgumentFactory.newBuilder()) + } + }) + + private def getOrBuildSearchArgumentWithNewBuilder( + dataTypeMap: Map[String, DataType], + expression: Filter): Option[Builder] = { + // When `spark.sql.orc.cache.sarg.timeout` is 0, cache is disabled. + if (cacheExpireTimeout > 0) { + searchArgumentCache.get(FilterWithTypeMap(expression, dataTypeMap)) + } else { + buildSearchArgument(dataTypeMap, expression, SearchArgumentFactory.newBuilder()) --- End diff -- When we set timeout to zero on the cache, the loaded element can be removed immediately. Maybe we don't need to check timeout like this and we can simplify the code.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org