FrankYang0529 commented on code in PR #19699:
URL: https://github.com/apache/kafka/pull/19699#discussion_r2180223523


##########
transaction-coordinator/src/main/java/org/apache/kafka/coordinator/transaction/TransactionMetadata.java:
##########
@@ -0,0 +1,689 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.coordinator.transaction;
+
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.config.LogLevelConfig;
+import org.apache.kafka.common.protocol.Errors;
+import org.apache.kafka.common.record.RecordBatch;
+import org.apache.kafka.server.common.TransactionVersion;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.slf4j.MarkerFactory;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.function.Supplier;
+
+public class TransactionMetadata {
+    private static final Logger LOGGER = 
LoggerFactory.getLogger(TransactionMetadata.class);
+    private final String transactionalId;
+    private long producerId;
+    private long prevProducerId;
+    private long nextProducerId;
+    private short producerEpoch;
+    private short lastProducerEpoch;
+    private int txnTimeoutMs;
+    private TransactionState state;
+    private Set<TopicPartition> topicPartitions;
+    private volatile long txnStartTimestamp;
+    private volatile long txnLastUpdateTimestamp;
+    private TransactionVersion clientTransactionVersion;
+
+    // pending state is used to indicate the state that this transaction is 
going to
+    // transit to, and for blocking future attempts to transit it again if it 
is not legal;
+    // initialized as the same as the current state
+    private Optional<TransactionState> pendingState;
+
+    // Indicates that during a previous attempt to fence a producer, the 
bumped epoch may not have been
+    // successfully written to the log. If this is true, we will not bump the 
epoch again when fencing
+    private boolean hasFailedEpochFence;
+
+    private final ReentrantLock lock;
+
+    public static boolean isEpochExhausted(short producerEpoch) {
+        return producerEpoch >= Short.MAX_VALUE - 1;
+    }
+
+    /**
+     * @param transactionalId          transactional id
+     * @param producerId               producer id
+     * @param prevProducerId           producer id for the last committed 
transaction with this transactional ID
+     * @param nextProducerId           Latest producer ID sent to the producer 
for the given transactional ID
+     * @param producerEpoch            current epoch of the producer
+     * @param lastProducerEpoch        last epoch of the producer
+     * @param txnTimeoutMs             timeout to be used to abort long 
running transactions
+     * @param state                    current state of the transaction
+     * @param topicPartitions          current set of partitions that are part 
of this transaction
+     * @param txnStartTimestamp        time the transaction was started, i.e., 
when first partition is added
+     * @param txnLastUpdateTimestamp   updated when any operation updates the 
TransactionMetadata. To be used for expiration
+     * @param clientTransactionVersion TransactionVersion used by the client 
when the state was transitioned
+     */
+    public TransactionMetadata(String transactionalId,
+                               long producerId,
+                               long prevProducerId,
+                               long nextProducerId,
+                               short producerEpoch,
+                               short lastProducerEpoch,
+                               int txnTimeoutMs,
+                               TransactionState state,
+                               Set<TopicPartition> topicPartitions,
+                               long txnStartTimestamp,
+                               long txnLastUpdateTimestamp,
+                               TransactionVersion clientTransactionVersion) {
+        this.transactionalId = transactionalId;
+        this.producerId = producerId;
+        this.prevProducerId = prevProducerId;
+        this.nextProducerId = nextProducerId;
+        this.producerEpoch = producerEpoch;
+        this.lastProducerEpoch = lastProducerEpoch;
+        this.txnTimeoutMs = txnTimeoutMs;
+        this.state = state;
+        this.topicPartitions = new HashSet<>(topicPartitions);
+        this.txnStartTimestamp = txnStartTimestamp;
+        this.txnLastUpdateTimestamp = txnLastUpdateTimestamp;
+        this.clientTransactionVersion = clientTransactionVersion;
+        this.pendingState = Optional.empty();
+        this.hasFailedEpochFence = false;
+        this.lock = new ReentrantLock();
+    }
+
+    public <T> T inLock(Supplier<T> function) {
+        lock.lock();
+        try {
+            return function.get();
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    public void addPartitions(Collection<TopicPartition> partitions) {
+        topicPartitions.addAll(partitions);
+    }
+
+    public void removePartition(TopicPartition topicPartition) {
+        if (state != TransactionState.PREPARE_COMMIT && state != 
TransactionState.PREPARE_ABORT)
+            throw new IllegalStateException("Transaction metadata's current 
state is " + state + ", and its pending state is " +
+                pendingState + " while trying to remove partitions whose txn 
marker has been sent, this is not expected");
+
+        topicPartitions.remove(topicPartition);
+    }
+
+    // this is visible for test only
+    public TxnTransitMetadata prepareNoTransit() {
+        // do not call transitTo as it will set the pending state, a follow-up 
call to abort the transaction will set its pending state
+        return new TxnTransitMetadata(producerId, prevProducerId, 
nextProducerId, producerEpoch, lastProducerEpoch, txnTimeoutMs,
+            state, Set.copyOf(topicPartitions), txnStartTimestamp, 
txnLastUpdateTimestamp, clientTransactionVersion);
+    }
+
+    public TxnTransitMetadata prepareFenceProducerEpoch() {
+        if (producerEpoch == Short.MAX_VALUE)
+            throw new IllegalStateException("Cannot fence producer with epoch 
equal to Short.MaxValue since this would overflow");
+
+        // If we've already failed to fence an epoch (because the write to the 
log failed), we don't increase it again.
+        // This is safe because we never return the epoch to client if we fail 
to fence the epoch
+        short bumpedEpoch = hasFailedEpochFence ? producerEpoch : (short) 
(producerEpoch + 1);
+
+        TransitionData data = new 
TransitionData(TransactionState.PREPARE_EPOCH_FENCE);
+        data.producerEpoch = bumpedEpoch;
+        return prepareTransitionTo(data);
+    }
+
+    public TxnTransitMetadata prepareIncrementProducerEpoch(
+        int newTxnTimeoutMs,
+        Optional<Short> expectedProducerEpoch,
+        long updateTimestamp) {
+        if (isProducerEpochExhausted())
+            throw new IllegalStateException("Cannot allocate any more producer 
epochs for producerId " + producerId);
+
+        short bumpedEpoch = (short) (producerEpoch + 1);
+        short produceEpochResult;
+        short lastProducerEpochResult;
+
+        if (expectedProducerEpoch.isEmpty()) {
+            // If no expected epoch was provided by the producer, bump the 
current epoch and set the last epoch to -1
+            // In the case of a new producer, producerEpoch will be -1 and 
bumpedEpoch will be 0
+            produceEpochResult = bumpedEpoch;
+            lastProducerEpochResult = RecordBatch.NO_PRODUCER_EPOCH;
+        } else {
+            short expectedEpoch = expectedProducerEpoch.get();
+            if (producerEpoch == RecordBatch.NO_PRODUCER_EPOCH || 
expectedEpoch == producerEpoch) {
+                // If the expected epoch matches the current epoch, or if 
there is no current epoch, the producer is attempting
+                // to continue after an error and no other producer has been 
initialized. Bump the current and last epochs.
+                // The no current epoch case means this is a new producer; 
producerEpoch will be -1 and bumpedEpoch will be 0
+                produceEpochResult = bumpedEpoch;
+                lastProducerEpochResult = producerEpoch;
+            } else if (expectedEpoch == lastProducerEpoch) {
+                // If the expected epoch matches the previous epoch, it is a 
retry of a successful call, so just return the
+                // current epoch without bumping. There is no danger of this 
producer being fenced, because a new producer
+                // calling InitProducerId would have caused the last epoch to 
be set to -1.
+                // Note that if the IBP is prior to 2.4.IV1, the 
lastProducerId and lastProducerEpoch will not be written to
+                // the transaction log, so a retry that spans a coordinator 
change will fail. We expect this to be a rare case.
+                produceEpochResult = producerEpoch;
+                lastProducerEpochResult = lastProducerEpoch;
+            } else {
+                // Otherwise, the producer has a fenced epoch and should 
receive an PRODUCER_FENCED error
+                LOGGER.info("Expected producer epoch {} does not match current 
producer epoch {} or previous producer epoch {}",
+                    expectedEpoch, producerEpoch, lastProducerEpoch);
+                throw Errors.PRODUCER_FENCED.exception();
+            }
+        }
+
+        TransitionData data = new TransitionData(TransactionState.EMPTY);
+        data.producerEpoch = produceEpochResult;
+        data.lastProducerEpoch = lastProducerEpochResult;
+        data.txnTimeoutMs = newTxnTimeoutMs;
+        data.topicPartitions = Set.of();
+        data.txnStartTimestamp = -1L;
+        data.txnLastUpdateTimestamp = updateTimestamp;
+        return prepareTransitionTo(data);
+    }
+
+    public TxnTransitMetadata prepareProducerIdRotation(long newProducerId,
+                                                        int newTxnTimeoutMs,
+                                                        long updateTimestamp,
+                                                        boolean 
recordLastEpoch) {
+        if (hasPendingTransaction())
+            throw new IllegalStateException("Cannot rotate producer ids while 
a transaction is still pending");
+
+        TransitionData data = new TransitionData(TransactionState.EMPTY);
+        data.producerId = newProducerId;
+        data.producerEpoch = 0;
+        data.lastProducerEpoch = recordLastEpoch ? producerEpoch : 
RecordBatch.NO_PRODUCER_EPOCH;
+        data.txnTimeoutMs = newTxnTimeoutMs;
+        data.topicPartitions = Set.of();
+        data.txnStartTimestamp = -1L;
+        data.txnLastUpdateTimestamp = updateTimestamp;
+        return prepareTransitionTo(data);
+    }
+
+    public TxnTransitMetadata prepareAddPartitions(Set<TopicPartition> 
addedTopicPartitions,
+                                                   long updateTimestamp,
+                                                   TransactionVersion 
clientTransactionVersion) {
+        long newTxnStartTimestamp;
+        if (state == TransactionState.EMPTY || state == 
TransactionState.COMPLETE_ABORT || state == TransactionState.COMPLETE_COMMIT) {
+            newTxnStartTimestamp = updateTimestamp;
+        } else {
+            newTxnStartTimestamp = txnStartTimestamp;
+        }
+
+        Set<TopicPartition> newTopicPartitions = new 
HashSet<>(topicPartitions);
+        newTopicPartitions.addAll(addedTopicPartitions);
+
+        TransitionData data = new TransitionData(TransactionState.ONGOING);
+        data.topicPartitions = newTopicPartitions;
+        data.txnStartTimestamp = newTxnStartTimestamp;
+        data.txnLastUpdateTimestamp = updateTimestamp;
+        data.clientTransactionVersion = clientTransactionVersion;
+        return prepareTransitionTo(data);
+    }
+
+    public TxnTransitMetadata prepareAbortOrCommit(TransactionState newState,
+                                                   TransactionVersion 
clientTransactionVersion,
+                                                   long nextProducerId,
+                                                   long updateTimestamp,
+                                                   boolean noPartitionAdded) {
+        short updatedProducerEpoch;
+        short updatedLastProducerEpoch;
+
+        if (clientTransactionVersion.supportsEpochBump()) {
+            // We already ensured that we do not overflow here. MAX_SHORT is 
the highest possible value.
+            updatedProducerEpoch = (short) (producerEpoch + 1);
+            updatedLastProducerEpoch = producerEpoch;
+        } else {
+            updatedProducerEpoch = producerEpoch;
+            updatedLastProducerEpoch = lastProducerEpoch;
+        }
+
+        // With transaction V2, it is allowed to abort the transaction without 
adding any partitions. Then, the transaction
+        // start time is uncertain but it is still required. So we can use the 
update time as the transaction start time.
+        long newTxnStartTimestamp = noPartitionAdded ? updateTimestamp : 
txnStartTimestamp;
+
+        TransitionData data = new TransitionData(newState);
+        data.nextProducerId = nextProducerId;
+        data.producerEpoch = updatedProducerEpoch;
+        data.lastProducerEpoch = updatedLastProducerEpoch;
+        data.txnStartTimestamp = newTxnStartTimestamp;
+        data.txnLastUpdateTimestamp = updateTimestamp;
+        data.clientTransactionVersion = clientTransactionVersion;
+        return prepareTransitionTo(data);
+    }
+
+    public TxnTransitMetadata prepareComplete(long updateTimestamp) {
+        TransactionState newState = state == TransactionState.PREPARE_COMMIT ?
+            TransactionState.COMPLETE_COMMIT : TransactionState.COMPLETE_ABORT;
+
+        // Since the state change was successfully written to the log, unset 
the flag for a failed epoch fence
+        hasFailedEpochFence = false;
+
+        long updatedProducerId;
+        short updatedProducerEpoch;
+
+        // In the prepareComplete transition for the overflow case, the 
lastProducerEpoch is kept at MAX-1,
+        // which is the last epoch visible to the client.
+        // Internally, however, during the transition between 
prepareAbort/prepareCommit and prepareComplete, the producer epoch
+        // reaches MAX but the client only sees the transition as MAX-1 
followed by 0.
+        // When an epoch overflow occurs, we set the producerId to 
nextProducerId and reset the epoch to 0,
+        // but lastProducerEpoch remains MAX-1 to maintain consistency with 
what the client last saw.
+        if (clientTransactionVersion.supportsEpochBump() && nextProducerId != 
RecordBatch.NO_PRODUCER_ID) {
+            updatedProducerId = nextProducerId;
+            updatedProducerEpoch = 0;
+        } else {
+            updatedProducerId = producerId;
+            updatedProducerEpoch = producerEpoch;
+        }
+
+        TransitionData data = new TransitionData(newState);
+        data.producerId = updatedProducerId;
+        data.nextProducerId = RecordBatch.NO_PRODUCER_ID;
+        data.producerEpoch = updatedProducerEpoch;
+        data.topicPartitions = Set.of();
+        data.txnLastUpdateTimestamp = updateTimestamp;
+        return prepareTransitionTo(data);
+    }
+
+    public TxnTransitMetadata prepareDead() {
+        TransitionData data = new TransitionData(TransactionState.DEAD);
+        data.topicPartitions = Set.of();
+        return prepareTransitionTo(data);
+    }
+
+    /**
+     * Check if the epochs have been exhausted for the current producerId. We 
do not allow the client to use an
+     * epoch equal to Short.MaxValue to ensure that the coordinator will 
always be able to fence an existing producer.
+     */
+    public boolean isProducerEpochExhausted() {
+        return isEpochExhausted(producerEpoch);
+    }
+
+    /**
+     * Check if this is a distributed two phase commit transaction.
+     * Such transactions have no timeout (identified by maximum value for 
timeout).
+     */
+    public boolean isDistributedTwoPhaseCommitTxn() {
+        return txnTimeoutMs == Integer.MAX_VALUE;
+    }
+
+    private boolean hasPendingTransaction() {
+        return state == TransactionState.ONGOING ||
+            state == TransactionState.PREPARE_ABORT ||
+            state == TransactionState.PREPARE_COMMIT;
+    }
+
+    private TxnTransitMetadata prepareTransitionTo(TransitionData data) {
+        if (pendingState.isPresent())
+            throw new IllegalStateException("Preparing transaction state 
transition to " + state +
+                " while it already a pending state " + pendingState.get());
+
+        if (data.producerId < 0)
+            throw new IllegalArgumentException("Illegal new producer id " + 
producerId);
+
+        // The epoch is initialized to NO_PRODUCER_EPOCH when the 
TransactionMetadata
+        // is created for the first time and it could stay like this until 
transitioning
+        // to Dead.
+        if (data.state != TransactionState.DEAD && data.producerEpoch < 0)
+            throw new IllegalArgumentException("Illegal new producer epoch " + 
producerEpoch);
+
+        // check that the new state transition is valid and update the pending 
state if necessary
+        if (data.state.validPreviousStates().contains(this.state)) {
+            TxnTransitMetadata transitMetadata = new TxnTransitMetadata(
+                data.producerId, this.producerId, data.nextProducerId, 
data.producerEpoch, data.lastProducerEpoch,
+                data.txnTimeoutMs, data.state, 
Set.copyOf(data.topicPartitions),
+                data.txnStartTimestamp, data.txnLastUpdateTimestamp, 
data.clientTransactionVersion
+            );
+
+            LOGGER.debug("TransactionalId {} prepare transition from {} to 
{}", transactionalId, this.state, data.state);
+            pendingState = Optional.of(data.state);
+            return transitMetadata;
+        } else {
+            throw new IllegalStateException("Preparing transaction state 
transition to " + data.state + " failed since the target state " +
+                data.state + " is not a valid previous state of the current 
state " + this.state);
+        }
+    }
+
+    @SuppressWarnings("CyclomaticComplexity")
+    public void completeTransitionTo(TxnTransitMetadata transitMetadata) {
+        // metadata transition is valid only if all the following conditions 
are met:
+        //
+        // 1. the new state is already indicated in the pending state.
+        // 2. the epoch should be either the same value, the old value + 1, or 
0 if we have a new producerId.
+        // 3. the last update time is no smaller than the old value.
+        // 4. the old partitions set is a subset of the new partitions set.
+        //
+        // plus, we should only try to update the metadata after the 
corresponding log entry has been successfully
+        // written and replicated (see 
TransactionStateManager#appendTransactionToLog)
+        //
+        // if valid, transition is done via overwriting the whole object to 
ensure synchronization
+
+        TransactionState toState = pendingState.orElseThrow(() -> {
+            
LOGGER.error(MarkerFactory.getMarker(LogLevelConfig.FATAL_LOG_LEVEL),
+                "{}'s transition to {} failed since pendingState is not 
defined: this should not happen", this, transitMetadata);
+            return new IllegalStateException("TransactionalId " + 
transactionalId +
+                " completing transaction state transition while it does not 
have a pending state");
+        });
+
+        if (!toState.equals(transitMetadata.txnState())) {
+            throwStateTransitionFailure(transitMetadata);
+        } else {
+            switch (toState) {
+                case EMPTY: // from initPid
+                    if ((producerEpoch != transitMetadata.producerEpoch() && 
!validProducerEpochBump(transitMetadata)) ||
+                        !transitMetadata.topicPartitions().isEmpty() ||
+                        transitMetadata.txnStartTimestamp() != -1) {
+                        throwStateTransitionFailure(transitMetadata);
+                    }
+                    break;
+
+                case ONGOING: // from addPartitions
+                    if (!validProducerEpoch(transitMetadata) ||
+                        
!transitMetadata.topicPartitions().containsAll(topicPartitions) ||
+                        txnTimeoutMs != transitMetadata.txnTimeoutMs()) {
+                        throwStateTransitionFailure(transitMetadata);
+                    }
+                    break;
+
+                case PREPARE_ABORT: // from endTxn
+                case PREPARE_COMMIT:
+                    // In V2, we allow state transits from Empty, 
CompleteCommit and CompleteAbort to PrepareAbort. It is possible
+                    // their updated start time is not equal to the current 
start time.
+                    boolean allowedEmptyAbort = toState == 
TransactionState.PREPARE_ABORT && 
transitMetadata.clientTransactionVersion().supportsEpochBump() &&
+                        (state == TransactionState.EMPTY || state == 
TransactionState.COMPLETE_COMMIT || state == TransactionState.COMPLETE_ABORT);
+                    boolean validTimestamp = txnStartTimestamp == 
transitMetadata.txnStartTimestamp() || allowedEmptyAbort;
+
+                    if (!validProducerEpoch(transitMetadata) ||
+                        
!topicPartitions.equals(transitMetadata.topicPartitions()) ||
+                        txnTimeoutMs != transitMetadata.txnTimeoutMs() ||
+                        !validTimestamp) {
+                        throwStateTransitionFailure(transitMetadata);
+                    }
+                    break;
+
+                case COMPLETE_ABORT: // from write markers
+                case COMPLETE_COMMIT:
+                    if (!validProducerEpoch(transitMetadata) ||
+                        txnTimeoutMs != transitMetadata.txnTimeoutMs() ||
+                        transitMetadata.txnStartTimestamp() == -1) {
+                        throwStateTransitionFailure(transitMetadata);
+                    }
+                    break;
+
+                case PREPARE_EPOCH_FENCE:
+                    // We should never get here, since once we prepare to 
fence the epoch, we immediately set the pending state
+                    // to PrepareAbort, and then consequently to CompleteAbort 
after the markers are written.. So we should never
+                    // ever try to complete a transition to PrepareEpochFence, 
as it is not a valid previous state for any other state, and hence
+                    // can never be transitioned out of.
+                    throwStateTransitionFailure(transitMetadata);
+                    break;
+
+                case DEAD:
+                    // The transactionalId was being expired. The completion 
of the operation should result in removal of the
+                    // the metadata from the cache, so we should never 
realistically transition to the dead state.
+                    throw new IllegalStateException("TransactionalId " + 
transactionalId + " is trying to complete a transition to " +
+                        toState + ". This means that the transactionalId was 
being expired, and the only acceptable completion of " +
+                        "this operation is to remove the transaction metadata 
from the cache, not to persist the " + toState + " in the log.");
+
+                default:
+                    break;
+            }
+
+            LOGGER.debug("TransactionalId {} complete transition from {} to 
{}", transactionalId, state, transitMetadata);
+            producerId = transitMetadata.producerId();
+            prevProducerId = transitMetadata.prevProducerId();
+            nextProducerId = transitMetadata.nextProducerId();
+            producerEpoch = transitMetadata.producerEpoch();
+            lastProducerEpoch = transitMetadata.lastProducerEpoch();
+            txnTimeoutMs = transitMetadata.txnTimeoutMs();
+            topicPartitions = new HashSet<>(transitMetadata.topicPartitions());

Review Comment:
   I remove `new HashSet` here. Thanks for the review.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to