sascha-coenen commented on a change in pull request #9350: Overlord to support
autoscalers per indexer/middlemanager category
URL: https://github.com/apache/druid/pull/9350#discussion_r393606276
##########
File path:
indexing-service/src/main/java/org/apache/druid/indexing/overlord/autoscaling/SimpleWorkerProvisioningStrategy.java
##########
@@ -102,69 +102,170 @@ public Provisioner makeProvisioner(WorkerTaskRunner
runner)
private final WorkerTaskRunner runner;
private final ScalingStats scalingStats = new
ScalingStats(config.getNumEventsToTrack());
- private final Set<String> currentlyProvisioning = new HashSet<>();
- private final Set<String> currentlyTerminating = new HashSet<>();
+ private final Map<String, Set<String>> currentlyProvisioningMap = new
HashMap<>();
+ private final Map<String, Set<String>> currentlyTerminatingMap = new
HashMap<>();
- private int targetWorkerCount = -1;
- private DateTime lastProvisionTime = DateTimes.nowUtc();
- private DateTime lastTerminateTime = lastProvisionTime;
+ private final Map<String, Integer> targetWorkerCountMap = new HashMap<>();
+ private final Map<String, DateTime> lastProvisionTimeMap = new HashMap<>();
+ private final Map<String, DateTime> lastTerminateTimeMap = new HashMap<>();
SimpleProvisioner(WorkerTaskRunner runner)
{
this.runner = runner;
}
+ private Map<String, List<TaskRunnerWorkItem>> groupTasksByCategories(
+ Collection<? extends TaskRunnerWorkItem> pendingTasks,
+ WorkerTaskRunner runner,
+ WorkerCategorySpec workerCategorySpec
+ )
+ {
+ Collection<Task> pendingTasksPayload = runner.getPendingTaskPayloads();
+ Map<String, List<Task>> taskPayloadsById = pendingTasksPayload.stream()
+
.collect(Collectors.groupingBy(Task::getId));
+
+ return pendingTasks.stream().collect(Collectors.groupingBy(task -> {
+ List<Task> taskPayloads = taskPayloadsById.get(task.getTaskId());
+ if (taskPayloads == null || taskPayloads.isEmpty()) {
+ return DefaultWorkerBehaviorConfig.DEFAULT_AUTOSCALER_CATEGORY;
+ }
+ return WorkerSelectUtils.getTaskCategory(
+ taskPayloads.get(0),
+ workerCategorySpec,
+ DefaultWorkerBehaviorConfig.DEFAULT_AUTOSCALER_CATEGORY
+ );
+ }));
+ }
+
@Override
public synchronized boolean doProvision()
{
Collection<? extends TaskRunnerWorkItem> pendingTasks =
runner.getPendingTasks();
+ log.debug("Pending tasks: %d %s", pendingTasks.size(), pendingTasks);
Collection<ImmutableWorkerInfo> workers = runner.getWorkers();
+ log.debug("Workers: %d %s", workers.size(), workers);
boolean didProvision = false;
- final DefaultWorkerBehaviorConfig workerConfig =
-
PendingTaskBasedWorkerProvisioningStrategy.getDefaultWorkerBehaviorConfig(workerConfigRef,
"provision", log);
+ final DefaultWorkerBehaviorConfig workerConfig =
ProvisioningUtil.getDefaultWorkerBehaviorConfig(
+ workerConfigRef,
+ "provision"
+ );
if (workerConfig == null) {
+ log.info("No worker config found. Skip provisioning.");
return false;
}
+ WorkerCategorySpec workerCategorySpec =
ProvisioningUtil.getWorkerCategorySpec(workerConfig);
+
+ // Group tasks by categories
+ Map<String, List<TaskRunnerWorkItem>> tasksByCategories =
groupTasksByCategories(
+ pendingTasks,
+ runner,
+ workerCategorySpec
+ );
+
+ Map<String, List<ImmutableWorkerInfo>> workersByCategories =
ProvisioningUtil.getWorkersByCategories(workers);
+
+ // Merge categories of tasks and workers
+ Set<String> allCategories = new HashSet<>(tasksByCategories.keySet());
+ allCategories.addAll(workersByCategories.keySet());
+
+ log.debug(
+ "Pending Tasks of %d categories (%s), Workers of %d categories (%s).
%d common categories: %s",
+ tasksByCategories.size(),
+ tasksByCategories.keySet(),
+ workersByCategories.size(),
+ workersByCategories.keySet(),
+ allCategories.size(),
+ allCategories
+ );
+
+ if (allCategories.isEmpty()) {
+ // Likely empty categories means initialization.
+ // Just try to spinup required amount of workers of each non empty
autoscalers
+ return initAutoscalers(workerConfig);
+ }
+
+ Map<String, AutoScaler> autoscalersByCategory =
ProvisioningUtil.mapAutoscalerByCategory(workerConfig.getAutoScalers());
+
+ for (String category : allCategories) {
+ List<? extends TaskRunnerWorkItem> categoryTasks =
tasksByCategories.getOrDefault(
+ category,
+ Collections.emptyList()
+ );
+ AutoScaler categoryAutoscaler =
ProvisioningUtil.getAutoscalerByCategory(category, autoscalersByCategory);
+
+ if (categoryAutoscaler == null) {
+ log.error("No autoScaler available, cannot execute doProvision for
workers of category %s", category);
+ continue;
+ }
+ // Correct category name by selected autoscaler
+ category = ProvisioningUtil.getAutoscalerCategory(categoryAutoscaler);
+
+ List<ImmutableWorkerInfo> categoryWorkers =
workersByCategories.getOrDefault(category, Collections.emptyList());
+ currentlyProvisioningMap.putIfAbsent(category, new HashSet<>());
+ Set<String> currentlyProvisioning =
this.currentlyProvisioningMap.get(category);
+ currentlyTerminatingMap.putIfAbsent(category, new HashSet<>());
+ Set<String> currentlyTerminating =
this.currentlyTerminatingMap.get(category);
+
+ didProvision = doProvision(
Review comment:
you are perfectly right about the repeated code logic. However, I'm not able
to refactor anything. For one thing, the way that responsibilities are spread
across classes seems to make it impossible to refactor just the affected
methods but would require a larger rewrite.
On a pragmatic level I'm also facing the issue that executing "mvn test" on
just the druid-indexing module takes 15 minutes. If I try to execute the
respective testsuite alone (SimpleProvisioningStrategyTest) in IntelliJ, I get
an ANTL error:
```
/opt/repos/druid/core/src/main/java/org/apache/druid/math/expr/ExprListenerImpl.java
Error:(28, 40) java: package org.apache.druid.math.expr.antlr does not exist
```
Can you advise?
Also, one innocent question: I see these "resolve conversation" buttons and
wonder whether I am supposed to mark a conversation as resolved if I believe to
have addressed a review comment or whether it is rather meant to be the
reviewer's privilege to do that.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]