rreddy-22 commented on code in PR #13443:
URL: https://github.com/apache/kafka/pull/13443#discussion_r1164906996


##########
group-coordinator/src/main/java/org/apache/kafka/coordinator/group/assignor/ServerSideStickyRangeAssignor.java:
##########
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.coordinator.group.assignor;
+
+import org.apache.kafka.common.Uuid;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import static java.lang.Math.min;
+
+/**
+ * <p>The Server Side Sticky Range Assignor inherits properties of both the 
range assignor and the sticky assignor.
+ * Properties are :-
+ * <ul>
+ * <li> 1) Each consumer must get at least one partition per topic that it is 
subscribed to whenever the number of consumers is
+ *    less than or equal to the number of partitions for that topic. (Range) 
</li>
+ * <li> 2) Partitions should be assigned to consumers in a way that 
facilitates join operations where required. (Range) </li>
+ *    This can only be done if the topics are co-partitioned in the first place
+ *    Co-partitioned:-
+ *    Two streams are co-partitioned if the following conditions are met:-
+ * ->The keys must have the same schemas
+ * ->The topics involved must have the same number of partitions
+ * <li> 3) Consumers should retain as much as their previous assignment as 
possible. (Sticky) </li>
+ * </ul>
+ * </p>
+ *
+ * <p>The algorithm works mainly in 5 steps described below
+ * <ul>
+ * <li> 1) Get a map of the consumersPerTopic created using the member 
subscriptions.</li>
+ * <li> 2) Get a list of consumers (potentiallyUnfilled) that have not met the 
minimum required quota for assignment AND
+ * get a list of sticky partitions that we want to retain in the new 
assignment.</li>
+ * <li> 3) Add consumers from potentiallyUnfilled to Unfilled if they haven't 
met the total required quota = minQuota + (if necessary) extraPartition </li>
+ * <li> 4) Get a list of available partitions by calculating the difference 
between total partitions and assigned sticky partitions </li>
+ * <li> 5) Iterate through unfilled consumers and assign partitions from 
available partitions </li>
+ * </ul>
+ * </p>
+ *
+ */
+public class ServerSideStickyRangeAssignor implements PartitionAssignor {
+
+    public static final String RANGE_ASSIGNOR_NAME = "range";
+
+    @Override
+    public String name() {
+        return RANGE_ASSIGNOR_NAME;
+    }
+
+    private static <K, V> void putList(Map<K, List<V>> map, K key, V value) {
+        List<V> list = map.computeIfAbsent(key, k -> new ArrayList<>());
+        list.add(value);
+    }
+
+    private static <K, V> void putSet(Map<K, Set<V>> map, K key, V value) {
+        Set<V> set = map.computeIfAbsent(key, k -> new HashSet<>());
+        set.add(value);
+    }
+
+    static class Pair<T, U> {
+        private final T first;
+        private final U second;
+
+        public Pair(T first, U second) {
+            this.first = first;
+            this.second = second;
+        }
+
+        public T getFirst() {
+            return first;
+        }
+
+        public U getSecond() {
+            return second;
+        }
+
+        @Override
+        public String toString() {
+            return "(" + first + ", " + second + ")";
+        }
+    }
+
+    // Returns a map of the list of consumers per Topic (keyed by topicId)
+    private Map<Uuid, List<String>> consumersPerTopic(AssignmentSpec 
assignmentSpec) {
+        Map<Uuid, List<String>> mapTopicsToConsumers = new HashMap<>();
+        Map<String, AssignmentMemberSpec> membersData = assignmentSpec.members;
+
+        for (Map.Entry<String, AssignmentMemberSpec> memberEntry : 
membersData.entrySet()) {
+            String memberId = memberEntry.getKey();
+            AssignmentMemberSpec memberMetadata = memberEntry.getValue();
+            Collection<Uuid> topics = memberMetadata.subscribedTopics;
+            for (Uuid topicId: topics) {
+                putList(mapTopicsToConsumers, topicId, memberId);
+            }
+        }
+        return mapTopicsToConsumers;
+    }
+
+    private Map<Uuid, List<Integer>> 
getAvailablePartitionsPerTopic(AssignmentSpec assignmentSpec, Map<Uuid, 
Set<Integer>> assignedStickyPartitionsPerTopic) {
+        Map<Uuid, List<Integer>> availablePartitionsPerTopic = new HashMap<>();
+        Map<Uuid, AssignmentTopicMetadata> topicsMetadata = 
assignmentSpec.topics;
+
+        for (Map.Entry<Uuid, AssignmentTopicMetadata> topicMetadataEntry : 
topicsMetadata.entrySet()) {
+            Uuid topicId = topicMetadataEntry.getKey();
+            ArrayList<Integer> availablePartitionsForTopic = new ArrayList<>();
+            int numPartitions = topicsMetadata.get(topicId).numPartitions;
+            // since the loop iterates from 0 to n, the partitions will be in 
ascending order within the list of available partitions per topic
+            Set<Integer> assignedStickyPartitionsForTopic = 
assignedStickyPartitionsPerTopic.getOrDefault(topicId, new HashSet<>());
+            for (int i = 0; i < numPartitions; i++) {
+                if (!assignedStickyPartitionsForTopic.contains(i)) {
+                    availablePartitionsForTopic.add(i);
+                }
+            }
+            availablePartitionsPerTopic.put(topicId, 
availablePartitionsForTopic);
+        }
+        return availablePartitionsPerTopic;
+    }
+
+    @Override
+    public GroupAssignment assign(AssignmentSpec assignmentSpec) throws 
PartitionAssignorException {
+        Map<String, Map<Uuid, Set<Integer>>> membersWithNewAssignmentPerTopic 
= new HashMap<>();
+        // Step 1
+        Map<Uuid, List<String>> consumersPerTopic = 
consumersPerTopic(assignmentSpec);
+        // Step 2
+        Map<Uuid, List<Pair<String, Integer>>> unfilledConsumersPerTopic = new 
HashMap<>();
+        Map<Uuid, Set<Integer>> assignedStickyPartitionsPerTopic = new 
HashMap<>();
+
+        for (Map.Entry<Uuid, List<String>> topicEntry : 
consumersPerTopic.entrySet()) {
+            Uuid topicId = topicEntry.getKey();
+            // For each topic we have a temporary list of consumers stored in 
potentiallyUnfilledConsumers.
+            // The list is populated with consumers that satisfy one of the 
two conditions :-
+            // 1) Consumers that have the minimum required number of 
partitions .i.e numPartitionsPerConsumer BUT they could be assigned an extra 
partition later on.
+            //    In this case we add the consumer to the unfilled consumers 
map iff an extra partition needs to be assigned to it.
+            // 2) Consumers that don't have the minimum required partitions, 
so irrespective of whether they get an extra partition or not they get added to 
the unfilled map later.
+            List<Pair<String, Integer>> potentiallyUnfilledConsumers = new 
ArrayList<>();
+            List<String> consumersForTopic = topicEntry.getValue();
+
+            AssignmentTopicMetadata topicData = 
assignmentSpec.topics.get(topicId);
+            int numPartitionsForTopic = topicData.numPartitions;
+            // Initially, minRequiredQuota is the minimum number of partitions 
that each consumer should get i.e numPartitionsPerConsumer.
+            // Idle consumers case :- The numConsumers subscribed to a topic 
is greater than numPartitions. In such cases, all consumers get assigned via 
the "extra partitions" logic since min = 0.
+            int minRequiredQuota = numPartitionsForTopic / 
consumersForTopic.size();
+            // Each consumer can get only one extra partition per topic after 
receiving the minimum quota = numPartitionsPerConsumer
+            int numConsumersWithExtraPartition = numPartitionsForTopic % 
consumersForTopic.size();
+
+            for (String memberId: consumersForTopic) {
+
+                // Convert the set to a list first and sort the partitions in 
numeric order since we want the same partition numbers from each topic
+                // to go to the same consumer in case of co-partitioned topics.
+                Set<Integer> currentAssignmentSetForTopic =  
assignmentSpec.members.get(memberId).currentAssignmentPerTopic.getOrDefault(topicId,
 new HashSet<>());
+                // Size of the older assignment, this will be 0 when assign is 
called for the first time.
+                // The older assignment is required when a reassignment occurs 
to ensure stickiness.
+                int currentAssignmentSize = 
currentAssignmentSetForTopic.size();
+                List<Integer> currentAssignmentListForTopic = new 
ArrayList<>(currentAssignmentSetForTopic);
+
+                // If there are previously assigned partitions present, we 
want to retain them.
+                if (currentAssignmentSize > 0) {
+                    // We either need to retain currentSize number of 
partitions when currentSize < required OR required number of partitions 
otherwise.
+                    int retainedPartitionsCount = min(currentAssignmentSize, 
minRequiredQuota);
+                    Collections.sort(currentAssignmentListForTopic);
+                    for (int i = 0; i < retainedPartitionsCount; i++) {
+                        putSet(assignedStickyPartitionsPerTopic, topicId, 
currentAssignmentListForTopic.get(i));
+                        
membersWithNewAssignmentPerTopic.computeIfAbsent(memberId, k -> new 
HashMap<>()).computeIfAbsent(topicId, k -> new 
HashSet<>()).add(currentAssignmentListForTopic.get(i));
+                    }
+                }
+
+                // Number of partitions left to reach the minRequiredQuota.
+                int remaining = minRequiredQuota - currentAssignmentSize;
+
+                // There are 3 cases w.r.t value of remaining
+                // 1) remaining < 0 this means that the consumer has more than 
the min required amount.
+                // It could have an extra partition, so we check for that.
+                if (remaining < 0 && numConsumersWithExtraPartition > 0) {
+                    // In order to remain as sticky as possible, since the 
order of members can be different, we want the consumers that already had extra
+                    // partitions to retain them if it's still required, 
instead of assigning the extras to the first few consumers directly.
+                    // Ex:- If two consumers out of 3 are supposed to get an 
extra partition and currently 1 of them already has the extra, we want this 
consumer
+                    // to retain it first and later if we have partitions left 
they will be assigned to the first few consumers and the unfilled map is updated
+                    numConsumersWithExtraPartition--;
+                    // Since we already added the minimumRequiredQuota of 
partitions in the previous step (until minReq - 1), we just need to
+                    // add the extra partition which will be present at the 
index right after min quota is satisfied.
+                    putSet(assignedStickyPartitionsPerTopic, topicId, 
currentAssignmentListForTopic.get(minRequiredQuota));
+                    membersWithNewAssignmentPerTopic.computeIfAbsent(memberId, 
k -> new HashMap<>()).computeIfAbsent(topicId, k -> new 
HashSet<>()).add(currentAssignmentListForTopic.get(minRequiredQuota));
+                } else {
+                    // 3) If remaining = 0 it has min req partitions but there 
is scope for getting an extra partition later on, so it is a 
potentialUnfilledConsumer.

Review Comment:
   done mb with the numbers



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to