[GitHub] [kafka] philipnee commented on a diff in pull request #13380: KAFKA-14468: Committed API

via GitHub Tue, 21 Mar 2023 12:19:05 -0700


philipnee commented on code in PR #13380:
URL: https://github.com/apache/kafka/pull/13380#discussion_r1143888783



##########
clients/src/main/java/org/apache/kafka/clients/consumer/internals/CommitRequestManager.java:
##########
@@ -207,6 +226,232 @@ public NetworkClientDelegate.UnsentRequest 
toUnsentRequest() {
         }
     }
 
+    private class UnsentOffsetFetchRequest extends RequestState {
+        public final Set<TopicPartition> requestedPartitions;
+        public final GroupState.Generation requestedGeneration;
+        public CompletableFuture<Map<TopicPartition, OffsetAndMetadata>> 
future;
+
+        public UnsentOffsetFetchRequest(final Set<TopicPartition> partitions,
+                                        final GroupState.Generation generation,
+                                        final long retryBackoffMs) {
+            super(retryBackoffMs);
+            this.requestedPartitions = partitions;
+            this.requestedGeneration = generation;
+            this.future = new CompletableFuture<>();
+        }
+
+        public boolean sameRequest(final UnsentOffsetFetchRequest request) {
+            return Objects.equals(requestedGeneration, 
request.requestedGeneration) && 
requestedPartitions.equals(request.requestedPartitions);
+        }
+
+        public NetworkClientDelegate.UnsentRequest toUnsentRequest(final long 
currentTimeMs) {
+            OffsetFetchRequest.Builder builder = new 
OffsetFetchRequest.Builder(
+                    groupState.groupId,
+                    true,
+                    new ArrayList<>(this.requestedPartitions),
+                    throwOnFetchStableOffsetUnsupported);
+            NetworkClientDelegate.UnsentRequest unsentRequest = new 
NetworkClientDelegate.UnsentRequest(
+                    builder,
+                    coordinatorRequestManager.coordinator());
+            unsentRequest.future().whenComplete((r, t) -> {
+                onResponse(currentTimeMs, (OffsetFetchResponse) 
r.responseBody());
+            });
+            return unsentRequest;
+        }
+
+        public void onResponse(
+                final long currentTimeMs,
+                final OffsetFetchResponse response) {
+            Errors responseError = 
response.groupLevelError(groupState.groupId);
+            if (responseError != Errors.NONE) {
+                onFailure(currentTimeMs, responseError);
+                return;
+            }
+            onSuccess(currentTimeMs, response);
+        }
+
+        private void onFailure(final long currentTimeMs,
+                               final Errors responseError) {
+            log.debug("Offset fetch failed: {}", responseError.message());
+
+            // TODO: should we retry on COORDINATOR_NOT_AVAILABLE as well ?
+            if (responseError == Errors.COORDINATOR_LOAD_IN_PROGRESS) {
+                retry(currentTimeMs);
+            } else if (responseError == Errors.NOT_COORDINATOR) {
+                // re-discover the coordinator and retry
+                
coordinatorRequestManager.markCoordinatorUnknown(responseError.message(), 
Time.SYSTEM.milliseconds());
+                retry(currentTimeMs);
+            } else if (responseError == Errors.GROUP_AUTHORIZATION_FAILED) {
+                
future.completeExceptionally(GroupAuthorizationException.forGroupId(groupState.groupId));
+            } else {
+                future.completeExceptionally(new KafkaException("Unexpected 
error in fetch offset response: " + responseError.message()));
+            }
+        }
+
+        private void retry(final long currentTimeMs) {
+            onFailedAttempt(currentTimeMs);
+            onSendAttempt(currentTimeMs);
+            pendingRequests.addOffsetFetchRequest(this);
+        }
+
+        private void onSuccess(final long currentTimeMs,
+                               final OffsetFetchResponse response) {
+            Set<String> unauthorizedTopics = null;
+            Map<TopicPartition, OffsetFetchResponse.PartitionData> 
responseData =
+                    response.partitionDataMap(groupState.groupId);
+            Map<TopicPartition, OffsetAndMetadata> offsets = new 
HashMap<>(responseData.size());
+            Set<TopicPartition> unstableTxnOffsetTopicPartitions = new 
HashSet<>();
+            for (Map.Entry<TopicPartition, OffsetFetchResponse.PartitionData> 
entry : responseData.entrySet()) {
+                TopicPartition tp = entry.getKey();
+                OffsetFetchResponse.PartitionData partitionData = 
entry.getValue();
+                if (partitionData.hasError()) {
+                    Errors error = partitionData.error;
+                    log.debug("Failed to fetch offset for partition {}: {}", 
tp, error.message());
+
+                    if (error == Errors.UNKNOWN_TOPIC_OR_PARTITION) {
+                        future.completeExceptionally(new KafkaException("Topic 
or Partition " + tp + " does " +
+                                "not " +
+                                "exist"));
+                        return;
+                    } else if (error == Errors.TOPIC_AUTHORIZATION_FAILED) {
+                        if (unauthorizedTopics == null) {
+                            unauthorizedTopics = new HashSet<>();
+                        }
+                        unauthorizedTopics.add(tp.topic());
+                    } else if (error == Errors.UNSTABLE_OFFSET_COMMIT) {
+                        unstableTxnOffsetTopicPartitions.add(tp);
+                    } else {
+                        future.completeExceptionally(new 
KafkaException("Unexpected error in fetch offset " +
+                                "response for partition " + tp + ": " + 
error.message()));
+                        return;
+                    }
+                } else if (partitionData.offset >= 0) {
+                    // record the position with the offset (-1 indicates no 
committed offset to fetch);
+                    // if there's no committed offset, record as null
+                    offsets.put(tp, new 
OffsetAndMetadata(partitionData.offset, partitionData.leaderEpoch, 
partitionData.metadata));
+                } else {
+                    log.info("Found no committed offset for partition {}", tp);
+                    offsets.put(tp, null);
+                }
+            }
+
+            if (unauthorizedTopics != null) {
+                future.completeExceptionally(new 
TopicAuthorizationException(unauthorizedTopics));
+            } else if (!unstableTxnOffsetTopicPartitions.isEmpty()) {
+                // TODO: Optimization question: Do we need to retry all 
partitions upon a single partition error?
+                log.info("The following partitions still have unstable offsets 
" +
+                        "which are not cleared on the broker side: {}" +
+                        ", this could be either " +
+                        "transactional offsets waiting for completion, or " +
+                        "normal offsets waiting for replication after 
appending to local log", unstableTxnOffsetTopicPartitions);
+                retry(currentTimeMs);
+            } else {
+                future.complete(offsets);
+            }
+        }
+
+        private CompletableFuture<Map<TopicPartition, OffsetAndMetadata>> 
chainFuture(final CompletableFuture<Map<TopicPartition, OffsetAndMetadata>> 
future) {
+            return this.future.whenComplete((r, t) -> {
+                if (t != null) {
+                    future.completeExceptionally(t);
+                } else {
+                    future.complete(r);
+                }
+            });
+        }
+    }
+
+    /**
+     * <p>This is used to stage the unsent requests, i.e., {@link 
UnsentOffsetCommitRequest} and {@link UnsentOffsetFetchRequest}.
+     *
+     * <p>If the request is new. It will be enqueued and will be sent upon the 
next
+     * poll.
+     *
+     * <p>If the same request has been sent, the request's {@code 
CompletableFuture} will be completed upon the
+     * completion of the existing one.
+     *
+     * <p>There is a special handling for the {@link 
UnsentOffsetFetchRequest}. If a duplicated request was sent
+     * previously, we will chain the future to the current future.
+     */
+
+    class PendingRequests {
+        // Queue is used to ensure the sequence of commit
+        Queue<UnsentOffsetCommitRequest> unsentOffsetCommits = new 
LinkedList<>();
+        List<UnsentOffsetFetchRequest> unsentOffsetFetches = new ArrayList<>();
+        List<UnsentOffsetFetchRequest> inflightOffsetFetches = new 
ArrayList<>();
+
+        public boolean hasUnsentRequests() {
+            return !unsentOffsetCommits.isEmpty() || 
!unsentOffsetFetches.isEmpty();
+        }
+
+        public CompletableFuture<ClientResponse> addOffsetCommitRequest(final 
Map<TopicPartition, OffsetAndMetadata> offsets) {
+            UnsentOffsetCommitRequest request = new UnsentOffsetCommitRequest(
+                    offsets,
+                    groupState.groupId,
+                    groupState.groupInstanceId.orElse(null),
+                    groupState.generation);
+            unsentOffsetCommits.add(request);
+            return request.future();
+        }
+
+        /**
+         *  We want to avoid duplication when sending an {@link 
OffsetFetchRequest}. The following checks are done:
+         *  <li>1. dedupe against unsents: if a duplicated request was 
previously made, we chain the future</>
+         *  <li>2. dedupe against incompleted: If a duplicated request was 
sent but hasn't gotten a results, we chain
+         *  the future.</>
+         *
+         *  <p>If the request is new, we chain a call back to remove itself 
from the {@code inflightOffsetFetches}
+         *  upon completion.</>
+         */
+        private CompletableFuture<Map<TopicPartition, OffsetAndMetadata>> 
addOffsetFetchRequest(final UnsentOffsetFetchRequest request) {
+            Optional<UnsentOffsetFetchRequest> dupe =
+                    unsentOffsetFetches.stream().filter(r -> 
r.sameRequest(request)).findAny();
+            Optional<UnsentOffsetFetchRequest> inflight =
+                    inflightOffsetFetches.stream().filter(r -> 
r.sameRequest(request)).findAny();
+
+            if (dupe.isPresent() || inflight.isPresent()) {
+                log.info("Duplicated OffsetFetchRequest: " + 
request.requestedPartitions);
+                dupe.orElseGet(() -> 
inflight.get()).chainFuture(request.future);
+            } else {
+                // remove the request from the outbound buffer: 
inflightOffsetFetches
+                request.future.whenComplete((r, t) -> {
+                    if (!inflightOffsetFetches.remove(request)) {
+                        log.info("unable to remove request from the outbound 
buffer:" + request);
+                    }
+                });
+                this.unsentOffsetFetches.add(request);
+            }
+            return request.future;
+        }
+
+        private CompletableFuture<Map<TopicPartition, OffsetAndMetadata>> 
addOffsetFetchRequest(final Set<TopicPartition> partitions) {
+            UnsentOffsetFetchRequest request = new UnsentOffsetFetchRequest(
+                    partitions,
+                    groupState.generation,
+                    retryBackoffMs);
+            return addOffsetFetchRequest(request);
+        }
+
+        public List<NetworkClientDelegate.UnsentRequest> drain(final long 
currentTimeMs) {
+            List<NetworkClientDelegate.UnsentRequest> unsent = new 
ArrayList<>();
+            unsent.addAll(unsentOffsetCommits.stream()
+                    .map(UnsentOffsetCommitRequest::toUnsentRequest)
+                    .collect(Collectors.toList()));
+            List<UnsentOffsetFetchRequest> sendables = 
unsentOffsetFetches.stream()
+                    .filter(r -> r.canSendRequest(currentTimeMs))
+                    .collect(Collectors.toList());
+            inflightOffsetFetches.addAll(sendables);
+            unsent.addAll(sendables.stream()
+                    .peek(r -> r.onSendAttempt(currentTimeMs))
+                    .map(r -> r.toUnsentRequest(currentTimeMs))
+                    .collect(Collectors.toList()));
+            // empty the staged requests
+            unsentOffsetCommits.clear();

Review Comment:
   So `unsentOffsetCommits` and `unsentOffsetFetches` tracks all of the 
requests that haven't been returned by the poll call (which subsequently make 
to the NetworkClient).  So the poll calls drain() in the end, and dumping the 
unsentRequest into the `inflightOffsetFetches`.
   
   I think the naming and the way the code is structured is probably too 
confusing. Maybe there's a better way to bookkeep these inflight/unsent 
requests.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[GitHub] [kafka] philipnee commented on a diff in pull request #13380: KAFKA-14468: Committed API

Reply via email to