Github user jerryshao commented on a diff in the pull request: https://github.com/apache/spark/pull/21895#discussion_r206726059 --- Diff: core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala --- @@ -973,6 +978,42 @@ private[history] object FsHistoryProvider { private[history] val CURRENT_LISTING_VERSION = 1L } +private[history] trait CachedFileSystemHelper extends Logging { + protected def fs: FileSystem + protected def expireTimeInSeconds: Long + + /** + * LRU cache containing the result for the already checked files. + */ + // Visible for testing. + private[history] val cache = CacheBuilder.newBuilder() + .expireAfterAccess(expireTimeInSeconds, TimeUnit.SECONDS) + .build[String, java.lang.Boolean]() --- End diff -- In the real word, there will be many event logs under the folder, this will lead to memory increase indefinitely and potentially lead to OOM. We have seen that customer has more than 100K event logs in this folder.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org