keith-turner commented on code in PR #3262: URL: https://github.com/apache/accumulo/pull/3262#discussion_r1412550666
########## server/manager/src/main/java/org/apache/accumulo/manager/MultipleManagerUtil.java: ########## @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.manager; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.util.Pair; +import org.apache.accumulo.server.ServerContext; + +public class MultipleManagerUtil { + + /** + * Each Manager will be responsible for a range(s) of metadata tablets, but we don't want to split + * up a table's metadata tablets between managers as it will throw off the tablet balancing. If + * there are three managers, then we want to split up the metadata tablets roughly into thirds and + * have each manager responsible for one third, for example. + * + * @param context server context + * @param tables set of table ids + * @param numManagers number of managers + * @return list of num manager size, each element containing a set of tables for the manager to + * manage + */ + public static List<Set<TableId>> getTablesForManagers(ServerContext context, Set<TableId> tables, + int numManagers) { + + if (numManagers == 0) { + throw new IllegalStateException("No managers, one or more expected"); + } + + if (numManagers == 1) { + return List.of(tables); + } + + SortedSet<Pair<TableId,Long>> tableTabletCounts = new TreeSet<>(new Comparator<>() { + @Override + public int compare(Pair<TableId,Long> table1, Pair<TableId,Long> table2) { + // sort descending by number of tablets + int result = table1.getSecond().compareTo(table2.getSecond()); + if (result == 0) { + return table1.getFirst().compareTo(table2.getFirst()); + } + return -1 * result; + } + }); + tables.forEach(tid -> { Review Comment: Fluo partitions work among workers using an approach of having [one process determines the partitions](https://github.com/apache/fluo/blob/main/modules/core/src/main/java/org/apache/fluo/core/worker/finder/hash/PartitionManager.java) and puts those in ZK. Then all other workers use those partitions from ZK to know what to work on. This approach allows all of the workers to eventually settle on the same partitions which is what is needed here. Posting the Fluo code to show that its not a lot of code and encapsulates nicely. We could have a TabletManagementPartitioner that is created and tested as a stand alone task in its own PR that does this. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
