Github user vivekmittal commented on a diff in the pull request: https://github.com/apache/storm/pull/2104#discussion_r115406854 --- Diff: external/storm-kafka-client/src/main/java/org/apache/storm/kafka/spout/internal/OffsetManager.java --- @@ -68,13 +74,34 @@ public OffsetAndMetadata findNextCommitOffset() { KafkaSpoutMessageId nextCommitMsg = null; // this is a convenience variable to make it faster to create OffsetAndMetadata for (KafkaSpoutMessageId currAckedMsg : ackedMsgs) { // complexity is that of a linear scan on a TreeMap - if ((currOffset = currAckedMsg.offset()) == nextCommitOffset + 1) { // found the next offset to commit + currOffset = currAckedMsg.offset(); + if (currOffset == nextCommitOffset + 1) { // found the next offset to commit found = true; nextCommitMsg = currAckedMsg; nextCommitOffset = currOffset; - } else if (currAckedMsg.offset() > nextCommitOffset + 1) { // offset found is not continuous to the offsets listed to go in the next commit, so stop search - LOG.debug("topic-partition [{}] has non-continuous offset [{}]. It will be processed in a subsequent batch.", tp, currOffset); - break; + } else if (currOffset > nextCommitOffset + 1) { + if (emittedOffsets.contains(nextCommitOffset + 1)) { + LOG.debug("topic-partition [{}] has non-continuous offset [{}]. It will be processed in a subsequent batch.", tp, currOffset); + break; + } else { + /* + This case will arise in case of non contiguous offset being processed. + So, if the queue doesn't contain offset = committedOffset + 1 (possible + if the queue is compacted or deleted), the consumer should jump to + the next logical point in the queue. Next logical offset should be the + first element after committedOffset in the ascending ordered emitted set. + */ + LOG.debug("Processed non contiguous offset, the previously committed offset has been deleted from the topic. Committed: [{}], Processed: [{}]", committedOffset, currOffset); + final Long nextEmittedOffset = emittedOffsets.ceiling(nextCommitOffset); --- End diff -- Will fix it.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---