Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/22504#discussion_r226458986 --- Diff: core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala --- @@ -800,14 +817,33 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) stale.foreach { log => if (log.appId.isEmpty) { logInfo(s"Deleting invalid / corrupt event log ${log.logPath}") - deleteLog(new Path(log.logPath)) + deleteLog(fs, new Path(log.logPath)) listing.delete(classOf[LogInfo], log.logPath) } } // Clean the blacklist from the expired entries. clearBlacklist(CLEAN_INTERVAL_S) } + /** + * Delete driver logs from the configured spark dfs dir that exceed the configured max age + */ + private[history] def cleanDriverLogs(): Unit = Utils.tryLog { + val driverLogDir = conf.get(DRIVER_LOG_DFS_DIR) + driverLogDir.foreach { dl => + val maxTime = clock.getTimeMillis() - + conf.get(MAX_DRIVER_LOG_AGE_S) * 1000 + val appDirs = driverLogFs.get.listLocatedStatus(new Path(dl)) + while (appDirs.hasNext()) { + val appDirStatus = appDirs.next() + if (appDirStatus.getModificationTime() < maxTime) { + logInfo(s"Deleting expired driver log for: ${appDirStatus.getPath().getName()}") + deleteLog(driverLogFs.get, appDirStatus.getPath()) --- End diff -- How are you differentiating logs that are being actively written to? There's this comment at the top of this class: "some filesystems do not appear to update the `modtime` value whenever data is flushed to an open file output stream" That was added because event logs actively being written to don't have their mtime updated. There's a lot of code in this class to deal with that. In that situation you may run into issues here; either deleting a log for an active app, which would then cause lots of errors in that app's logs, or spamming the SHS logs with errors that this log cannot be deleted (not sure what's HDFS's behavior, but I believe it's the former).
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org