Github user caneGuy commented on a diff in the pull request: https://github.com/apache/spark/pull/20667#discussion_r170516775 --- Diff: core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala --- @@ -132,10 +133,15 @@ private[spark] object BlockManagerId { getCachedBlockManagerId(obj) } - val blockManagerIdCache = new ConcurrentHashMap[BlockManagerId, BlockManagerId]() + val blockManagerIdCache = new TimeStampedHashMap[BlockManagerId, BlockManagerId](true) - def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = { + def getCachedBlockManagerId(id: BlockManagerId, clearOldValues: Boolean = false): BlockManagerId = + { blockManagerIdCache.putIfAbsent(id, id) - blockManagerIdCache.get(id) + val blockManagerId = blockManagerIdCache.get(id) + if (clearOldValues) { + blockManagerIdCache.clearOldValues(System.currentTimeMillis - Utils.timeStringAsMs("10d")) --- End diff -- @Ngone51 Thanks.i also though about remove when we delete a block. In this case, it is history replaying which will trigger this problem,and we do not delete any block actually. Maybe use `weakreference` better as @jiangxb1987 mentioned?WDYT? Thanks again!
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org