gianm commented on code in PR #12901: URL: https://github.com/apache/druid/pull/12901#discussion_r945351663
########## indexing-service/src/main/java/org/apache/druid/indexing/overlord/TaskQueue.java: ########## @@ -568,62 +591,65 @@ private void notifyStatus(final Task task, final TaskStatus taskStatus, String r taskStatus.getId() ); - // Inform taskRunner that this task can be shut down - TaskLocation taskLocation = TaskLocation.unknown(); - try { - taskLocation = taskRunner.getTaskLocation(task.getId()); - taskRunner.shutdown(task.getId(), reasonFormat, args); + if (!taskStatus.isComplete()) { + // Nothing to do for incomplete statuses. + return; } - catch (Exception e) { - log.warn(e, "TaskRunner failed to cleanup task after completion: %s", task.getId()); - } - - int removed = 0; - - ///////// critical section + // Critical section: add this task to recentlyCompletedTasks, so it isn't managed while being cleaned up. giant.lock(); try { - // Remove from running tasks - for (int i = tasks.size() - 1; i >= 0; i--) { - if (tasks.get(i).getId().equals(task.getId())) { - removed++; - removeTaskInternal(tasks.get(i)); - break; - } - } - - // Remove from futures list - taskFutures.remove(task.getId()); + recentlyCompletedTasks.add(task.getId()); } finally { giant.unlock(); } - ///////// end critical + final TaskLocation taskLocation = taskRunner.getTaskLocation(task.getId()); - if (removed == 0) { - log.warn("Unknown task completed: %s", task.getId()); + // Save status to metadata store first, so if we crash while doing the rest of the shutdown, our successor Review Comment: The order switch solves the original race but creates a new one, which you have listed as case (2). It would create a situation where two threads are trying to clean up the same task at the same time. This may be fine, but I don't think it's prudent to rely on it being fine. Cleaner to ensure that only one thread tries to clean up the task. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org For additional commands, e-mail: commits-h...@druid.apache.org