congbobo184 commented on code in PR #18273: URL: https://github.com/apache/pulsar/pull/18273#discussion_r1081490507
########## pulsar-broker/src/main/java/org/apache/pulsar/broker/transaction/buffer/impl/SnapshotSegmentAbortedTxnProcessorImpl.java: ########## @@ -0,0 +1,707 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing,2 + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.broker.transaction.buffer.impl; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.function.Supplier; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.mledger.AsyncCallbacks; +import org.apache.bookkeeper.mledger.Entry; +import org.apache.bookkeeper.mledger.ManagedLedgerException; +import org.apache.bookkeeper.mledger.impl.ManagedLedgerImpl; +import org.apache.bookkeeper.mledger.impl.PositionImpl; +import org.apache.bookkeeper.mledger.impl.ReadOnlyManagedLedgerImpl; +import org.apache.commons.collections4.map.LinkedMap; +import org.apache.commons.lang3.tuple.MutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.pulsar.broker.service.BrokerServiceException; +import org.apache.pulsar.broker.service.persistent.PersistentTopic; +import org.apache.pulsar.broker.systopic.SystemTopicClient; +import org.apache.pulsar.broker.transaction.buffer.AbortedTxnProcessor; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndex; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndexes; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndexesMetadata; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotSegment; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TxnIDData; +import org.apache.pulsar.client.api.Message; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.transaction.TxnID; +import org.apache.pulsar.client.impl.MessageIdImpl; +import org.apache.pulsar.client.impl.PulsarClientImpl; +import org.apache.pulsar.common.naming.SystemTopicNames; +import org.apache.pulsar.common.naming.TopicDomain; +import org.apache.pulsar.common.naming.TopicName; +import org.apache.pulsar.common.protocol.Commands; +import org.apache.pulsar.common.util.FutureUtil; + +@Slf4j +public class SnapshotSegmentAbortedTxnProcessorImpl implements AbortedTxnProcessor { + + /** + * Stored the unsealed aborted transaction IDs Whose size is always less than the snapshotSegmentCapacity. + * It will be persistent as a snapshot segment when its size reach the configured capacity. + */ + private LinkedList<TxnID> unsealedTxnIds; + + /** + * The map is used to clear the aborted transaction IDs persistent in the expired ledger. + * <p> + * The key PositionImpl {@link PositionImpl} is the persistent position of + * the latest transaction of a segment. + * The value TxnID {@link TxnID} is the latest Transaction ID in a segment. + * </p> + * + * <p> + * If the position is expired, the processor can get the according latest + * transaction ID in this map. And then the processor can clear all the + * transaction IDs in the aborts {@link SnapshotSegmentAbortedTxnProcessorImpl#aborts} + * that lower than the transaction ID. + * And then the processor can delete the segments persistently according to + * the positions. + * </p> + */ + private final LinkedMap<PositionImpl, TxnID> segmentIndex = new LinkedMap<>(); + + /** + * This map is used to check whether a transaction is an aborted transaction. + * <p> + * The transaction IDs is appended in order, so the processor can delete expired + * transaction IDs according to the latest expired transaction IDs in segmentIndex + * {@link SnapshotSegmentAbortedTxnProcessorImpl#segmentIndex}. + * </p> + */ + private final LinkedMap<TxnID, TxnID> aborts = new LinkedMap<>(); + /** + * This map stores the indexes of the snapshot segment. + * <p> + * The key is the persistent position of the marker of the last transaction in the segment. + * The value TransactionBufferSnapshotIndex {@link TransactionBufferSnapshotIndex} is the + * indexes of the snapshot segment. + * </p> + */ + private final LinkedMap<PositionImpl, TransactionBufferSnapshotIndex> indexes = new LinkedMap<>(); + + private final PersistentTopic topic; + + private volatile long lastSnapshotTimestamps; + + /** + * The number of the aborted transaction IDs in a segment. + * This is calculated according to the configured memory size. + */ + private final int snapshotSegmentCapacity; + /** + * Responsible for executing the persistent tasks. + * <p>Including:</p> + * <p> Update segment index.</p> + * <p> Write snapshot segment.</p> + * <p> Delete snapshot segment.</p> + * <p> Clear all snapshot segment. </p> + */ + private final PersistentWorker persistentWorker; + + private static final String SNAPSHOT_PREFIX = "multiple-"; + + public SnapshotSegmentAbortedTxnProcessorImpl(PersistentTopic topic) { + this.topic = topic; + this.persistentWorker = new PersistentWorker(topic); + /** + * Calculate the segment capital according to its size configuration. + * <p> + * The empty transaction segment size is 5. + * Adding a empty linkedList, the size increase to 6. + * Add the topic name the size increase to the 7 + topic.getName().length(). + * Add the aborted transaction IDs, the size increase to 8 + + * topic.getName().length() + 3 * aborted transaction ID size. + * </p> + */ + this.snapshotSegmentCapacity = (topic.getBrokerService().getPulsar() + .getConfiguration().getTransactionBufferSnapshotSegmentSize() - 8 - topic.getName().length()) / 3; + this.unsealedTxnIds = new LinkedList<>(); + } + + @Override + public void putAbortedTxnAndPosition(TxnID txnID, PositionImpl position) { + unsealedTxnIds.add(txnID); + aborts.put(txnID, txnID); + /** + * The size of lastAbortedTxns reaches the configuration of the size of snapshot segment. + * Append a task to persistent the segment with the aborted transaction IDs and the latest + * transaction mark persistent position passed by param. + */ + if (unsealedTxnIds.size() >= snapshotSegmentCapacity) { + LinkedList<TxnID> abortedSegment = unsealedTxnIds; + segmentIndex.put(position, txnID); + persistentWorker.appendTask(PersistentWorker.OperationType.WriteSegment, () -> + persistentWorker.takeSnapshotSegmentAsync(abortedSegment, position)); + this.unsealedTxnIds = new LinkedList<>(); + } + } + + @Override + public boolean checkAbortedTransaction(TxnID txnID) { + return aborts.containsKey(txnID); + } + + /** + * Check werther the position in segmentIndex {@link SnapshotSegmentAbortedTxnProcessorImpl#segmentIndex} + * is expired. If the position is not exist in the original topic, the according transaction is an invalid + * transaction. And the according segment is invalid, too. The transaction IDs before the transaction ID + * in the aborts are invalid, too. + */ + @Override + public void trimExpiredAbortedTxns() { + //Checking whether there are some segment expired. + List<PositionImpl> positionsNeedToDelete = new ArrayList<>(); + while (!segmentIndex.isEmpty() && !((ManagedLedgerImpl) topic.getManagedLedger()) + .ledgerExists(segmentIndex.firstKey().getLedgerId())) { + if (log.isDebugEnabled()) { + log.debug("[{}] Topic transaction buffer clear aborted transactions, maxReadPosition : {}", + topic.getName(), segmentIndex.firstKey()); + } + PositionImpl positionNeedToDelete = segmentIndex.firstKey(); + positionsNeedToDelete.add(positionNeedToDelete); + + TxnID theLatestDeletedTxnID = segmentIndex.remove(0); + while (!aborts.firstKey().equals(theLatestDeletedTxnID)) { + aborts.remove(0); + } + aborts.remove(0); + } + //Batch delete the expired segment + if (!positionsNeedToDelete.isEmpty()) { + persistentWorker.appendTask(PersistentWorker.OperationType.DeleteSegment, + () -> persistentWorker.deleteSnapshotSegment(positionsNeedToDelete)); + } + } + + private String buildKey(long sequenceId) { + return SNAPSHOT_PREFIX + sequenceId + "-" + this.topic.getName(); + } + + @Override + public CompletableFuture<Void> takeAbortedTxnsSnapshot(PositionImpl maxReadPosition) { + //Store the latest aborted transaction IDs in unsealedTxnIDs and the according the latest max read position. + TransactionBufferSnapshotIndexesMetadata metadata = new TransactionBufferSnapshotIndexesMetadata( + maxReadPosition.getLedgerId(), maxReadPosition.getEntryId(), + convertTypeToTxnIDData(unsealedTxnIds)); + return persistentWorker.appendTask(PersistentWorker.OperationType.UpdateIndex, () -> persistentWorker + .updateSnapshotIndex(metadata, indexes.values().stream().toList())); + } + + @Override + public CompletableFuture<PositionImpl> recoverFromSnapshot() { + return topic.getBrokerService().getPulsar().getTransactionBufferSnapshotServiceFactory() + .getTxnBufferSnapshotIndexService() + .createReader(TopicName.get(topic.getName())).thenComposeAsync(reader -> { + PositionImpl startReadCursorPosition = null; + TransactionBufferSnapshotIndexes persistentSnapshotIndexes = null; + boolean hasIndex = false; + try { + /** + * Read the transaction snapshot segment index. + * <p> + * The processor can get the sequence ID, unsealed transaction IDs, + * segment index list and max read position in the snapshot segment index. + * Then we can traverse the index list to read all aborted transaction IDs + * in segments to aborts. + * </p> + */ + while (reader.hasMoreEvents()) { + Message<TransactionBufferSnapshotIndexes> message = reader.readNextAsync() + .get(getSystemClientOperationTimeoutMs(), TimeUnit.MILLISECONDS); + if (topic.getName().equals(message.getKey())) { + TransactionBufferSnapshotIndexes transactionBufferSnapshotIndexes = message.getValue(); + if (transactionBufferSnapshotIndexes != null) { + hasIndex = true; + persistentSnapshotIndexes = transactionBufferSnapshotIndexes; + startReadCursorPosition = PositionImpl.get( + transactionBufferSnapshotIndexes.getSnapshot().getMaxReadPositionLedgerId(), + transactionBufferSnapshotIndexes.getSnapshot().getMaxReadPositionEntryId()); + } + } + } + } catch (TimeoutException ex) { + Throwable t = FutureUtil.unwrapCompletionException(ex); + String errorMessage = String.format("[%s] Transaction buffer recover fail by read " + + "transactionBufferSnapshot timeout!", topic.getName()); + log.error(errorMessage, t); + return FutureUtil.failedFuture( + new BrokerServiceException.ServiceUnitNotReadyException(errorMessage, t)); + } catch (Exception ex) { + log.error("[{}] Transaction buffer recover fail when read " + + "transactionBufferSnapshot!", topic.getName(), ex); + return FutureUtil.failedFuture(ex); + } finally { + closeReader(reader); + } + PositionImpl finalStartReadCursorPosition = startReadCursorPosition; + TransactionBufferSnapshotIndexes finalPersistentSnapshotIndexes = persistentSnapshotIndexes; + if (!hasIndex) { + return CompletableFuture.completedFuture(null); + } else { + this.unsealedTxnIds = convertTypeToTxnID(persistentSnapshotIndexes + .getSnapshot().getAborts()); + } + //Read snapshot segment to recover aborts. + ArrayList<CompletableFuture<Void>> completableFutures = new ArrayList<>(); + CompletableFuture<Void> openManagedLedgerFuture = new CompletableFuture<>(); + AtomicBoolean hasInvalidIndex = new AtomicBoolean(false); + AsyncCallbacks.OpenReadOnlyManagedLedgerCallback callback = new AsyncCallbacks + .OpenReadOnlyManagedLedgerCallback() { + @Override + public void openReadOnlyManagedLedgerComplete(ReadOnlyManagedLedgerImpl readOnlyManagedLedger, + Object ctx) { + finalPersistentSnapshotIndexes.getIndexList().forEach(index -> { + CompletableFuture<Void> handleSegmentFuture = new CompletableFuture<>(); + completableFutures.add(handleSegmentFuture); + readOnlyManagedLedger.asyncReadEntry( + new PositionImpl(index.getSegmentLedgerID(), + index.getSegmentEntryID()), + new AsyncCallbacks.ReadEntryCallback() { + @Override + public void readEntryComplete(Entry entry, Object ctx) { + handleSnapshotSegmentEntry(entry); + indexes.put(new PositionImpl( + index.abortedMarkLedgerID, + index.abortedMarkEntryID), + index); + entry.release(); + handleSegmentFuture.complete(null); + } + + @Override + public void readEntryFailed(ManagedLedgerException exception, Object ctx) { + if (exception instanceof ManagedLedgerException + .NonRecoverableLedgerException) { + /** + * The logic flow of deleting expired segment is: + * <p> + * 1. delete segment + * 2. update segment index + * </p> + * If the worker delete segment successfully + * but failed to update segment index, + * the segment can not be read according to the index. + * We update index again if there are invalid indexes. + */ + if (((ManagedLedgerImpl)topic.getManagedLedger()) + .ledgerExists(index.getAbortedMarkLedgerID())) { + log.error("[{}] Failed to read snapshot segment [{}:{}]", + topic.getName(), index.segmentLedgerID, + index.segmentEntryID, exception); + handleSegmentFuture.completeExceptionally(exception); + } else { + hasInvalidIndex.set(true); + } + } + } + }, null); + }); + openManagedLedgerFuture.complete(null); + } + + @Override + public void openReadOnlyManagedLedgerFailed(ManagedLedgerException exception, Object ctx) { + log.error("[{}] Failed to open readOnly managed ledger", topic, exception); + openManagedLedgerFuture.completeExceptionally(exception); + } + }; + + TopicName snapshotIndexTopicName = TopicName.get(TopicDomain.persistent.toString(), + TopicName.get(topic.getName()).getNamespaceObject(), + SystemTopicNames.TRANSACTION_BUFFER_SNAPSHOT_SEGMENTS); + this.topic.getBrokerService().getPulsar().getManagedLedgerFactory() + .asyncOpenReadOnlyManagedLedger(snapshotIndexTopicName + .getPersistenceNamingEncoding(), callback, + topic.getManagedLedger().getConfig(), + null); + /** + * Wait the processor recover completely and then allow TB + * to recover the messages after the startReadCursorPosition. + */ + return openManagedLedgerFuture + .thenCompose((ignore) -> FutureUtil.waitForAll(completableFutures)) + .thenCompose((i) -> { + /** + * Update the snapshot segment index if there exist invalid indexes. + */ + if (hasInvalidIndex.get()) { + persistentWorker.appendTask(PersistentWorker.OperationType.UpdateIndex, () + -> persistentWorker + .updateSnapshotIndex(finalPersistentSnapshotIndexes.getSnapshot(), + indexes.values().stream().toList())); + } + /** + * If there is no segment index, the persistent worker will write segment begin from 0. + */ + if (indexes.size() != 0) { + persistentWorker.sequenceID.set(indexes.get(indexes.lastKey()).sequenceID + 1); + } + /** + * Append the aborted txn IDs in the index metadata + * can keep the order of the aborted txn in the aborts. + * So that we can trim the expired snapshot segment in aborts + * according to the latest transaction IDs in the segmentIndex. + */ + convertTypeToTxnID(finalPersistentSnapshotIndexes.getSnapshot().getAborts()) + .forEach(txnID -> aborts.put(txnID, txnID)); + return CompletableFuture.completedFuture(finalStartReadCursorPosition); + }).exceptionally(ex -> { + log.error("[{}] Failed to recover snapshot segment", this.topic.getName(), ex); + return null; + }); + + }, topic.getBrokerService().getPulsar().getTransactionExecutorProvider() + .getExecutor(this)); + } + + @Override + public CompletableFuture<Void> clearAbortedTxnSnapshot() { + return persistentWorker.appendTask(PersistentWorker.OperationType.Clear, + persistentWorker::clearSnapshotSegmentAndIndexes); + } + + @Override + public long getLastSnapshotTimestamps() { + return this.lastSnapshotTimestamps; + } + + @Override + public CompletableFuture<Void> closeAsync() { + return persistentWorker.closeAsync(); + } + + private void handleSnapshotSegmentEntry(Entry entry) { + //decode snapshot from entry + ByteBuf headersAndPayload = entry.getDataBuffer(); + //skip metadata + Commands.parseMessageMetadata(headersAndPayload); + TransactionBufferSnapshotSegment snapshotSegment = Schema.AVRO(TransactionBufferSnapshotSegment.class) + .decode(Unpooled.wrappedBuffer(headersAndPayload).nioBuffer()); + + TxnIDData lastTxn = snapshotSegment.getAborts().get(snapshotSegment.getAborts().size() - 1); + segmentIndex.put(new PositionImpl(snapshotSegment.getPersistentPositionLedgerId(), + snapshotSegment.getPersistentPositionEntryId()), + new TxnID(lastTxn.getMostSigBits(), lastTxn.getLeastSigBits())); + convertTypeToTxnID(snapshotSegment.getAborts()).forEach(txnID -> aborts.put(txnID, txnID)); + } + + private long getSystemClientOperationTimeoutMs() throws Exception { + PulsarClientImpl pulsarClient = (PulsarClientImpl) topic.getBrokerService().getPulsar().getClient(); + return pulsarClient.getConfiguration().getOperationTimeoutMs(); + } + + private <T> void closeReader(SystemTopicClient.Reader<T> reader) { + reader.closeAsync().exceptionally(e -> { + log.error("[{}]Transaction buffer snapshot reader close error!", topic.getName(), e); + return null; + }); + } + + public class PersistentWorker { + protected final AtomicLong sequenceID = new AtomicLong(0); + + private final PersistentTopic topic; + + //Persistent snapshot segment and index at the single thread. + private final CompletableFuture<SystemTopicClient.Writer<TransactionBufferSnapshotSegment>> + snapshotSegmentsWriterFuture; + private final CompletableFuture<SystemTopicClient.Writer<TransactionBufferSnapshotIndexes>> + snapshotIndexWriterFuture; + + private enum OperationState { + None, + Operating, + Closed + } + private static final AtomicReferenceFieldUpdater<PersistentWorker, PersistentWorker.OperationState> + STATE_UPDATER = AtomicReferenceFieldUpdater.newUpdater(PersistentWorker.class, + PersistentWorker.OperationState.class, "operationState"); + + public enum OperationType { + UpdateIndex, + WriteSegment, + DeleteSegment, + Clear + } + + private volatile OperationState operationState = OperationState.None; + + ConcurrentLinkedDeque<Pair<OperationType, Pair<CompletableFuture<Void>, + Supplier<CompletableFuture<Void>>>>> taskQueue = new ConcurrentLinkedDeque<>(); + + public PersistentWorker(PersistentTopic topic) { + this.topic = topic; + this.snapshotSegmentsWriterFuture = this.topic.getBrokerService().getPulsar() + .getTransactionBufferSnapshotServiceFactory() + .getTxnBufferSnapshotSegmentService().createWriter(TopicName.get(topic.getName())) + .exceptionally(ex -> { + log.error("{} Failed to create snapshot index writer", topic.getName()); + topic.close(); + return null; + }); + this.snapshotIndexWriterFuture = this.topic.getBrokerService().getPulsar() + .getTransactionBufferSnapshotServiceFactory() + .getTxnBufferSnapshotIndexService().createWriter(TopicName.get(topic.getName())) + .exceptionally((ex) -> { + log.error("{} Failed to create snapshot writer", topic.getName()); + topic.close(); + return null; + }); + } + + public CompletableFuture<Void> appendTask(OperationType operationType, + Supplier<CompletableFuture<Void>> task) { + CompletableFuture<Void> taskExecutedResult = new CompletableFuture<>(); + switch (operationType) { + case UpdateIndex -> { + /** + * The update index operation can be canceled when the task queue is not empty, + * so it should be executed immediately instead of appending to the task queue. + * If the taskQueue is not empty, the worker will execute the task in the queue. + */ + if (!taskQueue.isEmpty()) { + topic.getBrokerService().getPulsar().getTransactionExecutorProvider() + .getExecutor(this).submit(this::executeTask); + return CompletableFuture.completedFuture(null); + } else if (STATE_UPDATER.compareAndSet(this, OperationState.None, OperationState.Operating)) { + return task.get().whenComplete((ignore, throwable) -> { + if (throwable != null && log.isDebugEnabled()) { + log.debug("[{}] Failed to update index snapshot", topic.getName(), throwable); + } + STATE_UPDATER.compareAndSet(this, OperationState.Operating, OperationState.None); + }); + } else { + return CompletableFuture.completedFuture(null); + } + } + /** + * Only the operations of WriteSegment and DeleteSegment will be appended into the taskQueue. + * The operation will be canceled when the worker is close which means the topic is deleted. + */ + case WriteSegment, DeleteSegment -> { + if (!STATE_UPDATER.get(this).equals(OperationState.Closed)) { + taskQueue.add(new MutablePair<>(operationType, new MutablePair<>(taskExecutedResult, task))); + executeTask(); + return taskExecutedResult; + } else { + return CompletableFuture.completedFuture(null); + } + } + case Clear -> { + /** + * Do not clear the snapshots if the topic is used. + * If the users want to delete a topic, he should stop the usage of the topic. + */ + if (STATE_UPDATER.compareAndSet(this, OperationState.None, OperationState.Closed)) { + taskQueue.forEach(pair -> + pair.getRight().getRight().get().completeExceptionally( + new BrokerServiceException.TransactionBufferClosedException( Review Comment: return `ServiceUnitNotReadyException` is enough, don't add TransactionBufferClosedException ########## pulsar-broker/src/main/java/org/apache/pulsar/broker/transaction/buffer/impl/SnapshotSegmentAbortedTxnProcessorImpl.java: ########## @@ -0,0 +1,721 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing,2 + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.broker.transaction.buffer.impl; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.function.Supplier; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.mledger.AsyncCallbacks; +import org.apache.bookkeeper.mledger.Entry; +import org.apache.bookkeeper.mledger.ManagedLedgerException; +import org.apache.bookkeeper.mledger.impl.ManagedLedgerImpl; +import org.apache.bookkeeper.mledger.impl.PositionImpl; +import org.apache.bookkeeper.mledger.impl.ReadOnlyManagedLedgerImpl; +import org.apache.commons.collections4.map.LinkedMap; +import org.apache.commons.lang3.tuple.MutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.pulsar.broker.service.BrokerServiceException; +import org.apache.pulsar.broker.service.persistent.PersistentTopic; +import org.apache.pulsar.broker.systopic.SystemTopicClient; +import org.apache.pulsar.broker.transaction.buffer.AbortedTxnProcessor; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndex; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndexes; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndexesMetadata; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotSegment; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TxnIDData; +import org.apache.pulsar.client.api.Message; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.transaction.TxnID; +import org.apache.pulsar.client.impl.MessageIdImpl; +import org.apache.pulsar.client.impl.PulsarClientImpl; +import org.apache.pulsar.common.naming.SystemTopicNames; +import org.apache.pulsar.common.naming.TopicDomain; +import org.apache.pulsar.common.naming.TopicName; +import org.apache.pulsar.common.protocol.Commands; +import org.apache.pulsar.common.util.FutureUtil; + +@Slf4j +public class SnapshotSegmentAbortedTxnProcessorImpl implements AbortedTxnProcessor { + + /** + * Stored the unsealed aborted transaction IDs Whose size is always less than the snapshotSegmentCapacity. + * It will be persistent as a snapshot segment when its size reach the configured capacity. + */ + private LinkedList<TxnID> unsealedTxnIds; + + /** + * The map is used to clear the aborted transaction IDs persistent in the expired ledger. + * <p> + * The key PositionImpl {@link PositionImpl} is the persistent position of + * the latest transaction of a segment. + * The value TxnID {@link TxnID} is the latest Transaction ID in a segment. + * </p> + * + * <p> + * If the position is expired, the processor can get the according latest + * transaction ID in this map. And then the processor can clear all the + * transaction IDs in the aborts {@link SnapshotSegmentAbortedTxnProcessorImpl#aborts} + * that lower than the transaction ID. + * And then the processor can delete the segments persistently according to + * the positions. + * </p> + */ + private final LinkedMap<PositionImpl, TxnID> segmentIndex = new LinkedMap<>(); + + /** + * This map is used to check whether a transaction is an aborted transaction. + * <p> + * The transaction IDs is appended in order, so the processor can delete expired + * transaction IDs according to the latest expired transaction IDs in segmentIndex + * {@link SnapshotSegmentAbortedTxnProcessorImpl#segmentIndex}. + * </p> + */ + private final LinkedMap<TxnID, TxnID> aborts = new LinkedMap<>(); + /** + * This map stores the indexes of the snapshot segment. + * <p> + * The key is the persistent position of the marker of the last transaction in the segment. + * The value TransactionBufferSnapshotIndex {@link TransactionBufferSnapshotIndex} is the + * indexes of the snapshot segment. + * </p> + */ + private final LinkedMap<PositionImpl, TransactionBufferSnapshotIndex> indexes = new LinkedMap<>(); + + private final PersistentTopic topic; + + private volatile long lastSnapshotTimestamps; + + /** + * The number of the aborted transaction IDs in a segment. + * This is calculated according to the configured memory size. + */ + private final int snapshotSegmentCapacity; + /** + * Responsible for executing the persistent tasks. + * <p>Including:</p> + * <p> Update segment index.</p> + * <p> Write snapshot segment.</p> + * <p> Delete snapshot segment.</p> + * <p> Clear all snapshot segment. </p> + */ + private final PersistentWorker persistentWorker; + + private static final String SNAPSHOT_PREFIX = "multiple-"; + + public SnapshotSegmentAbortedTxnProcessorImpl(PersistentTopic topic) { + this.topic = topic; + this.persistentWorker = new PersistentWorker(topic); + /* + Calculate the segment capital according to its size configuration. + <p> + The empty transaction segment size is 5. + Adding an empty linkedList, the size increase to 6. + Add the topic name the size increase to the 7 + topic.getName().length(). + Add the aborted transaction IDs, the size increase to 8 + + topic.getName().length() + 3 * aborted transaction ID size. + </p> + */ + this.snapshotSegmentCapacity = (topic.getBrokerService().getPulsar() + .getConfiguration().getTransactionBufferSnapshotSegmentSize() - 8 - topic.getName().length()) / 3; + this.unsealedTxnIds = new LinkedList<>(); + } + + @Override + public void putAbortedTxnAndPosition(TxnID txnID, PositionImpl position) { + unsealedTxnIds.add(txnID); + aborts.put(txnID, txnID); + /* + The size of lastAbortedTxns reaches the configuration of the size of snapshot segment. + Append a task to persistent the segment with the aborted transaction IDs and the latest + transaction mark persistent position passed by param. + */ + if (unsealedTxnIds.size() >= snapshotSegmentCapacity) { + LinkedList<TxnID> abortedSegment = unsealedTxnIds; + segmentIndex.put(position, txnID); + persistentWorker.appendTask(PersistentWorker.OperationType.WriteSegment, () -> + persistentWorker.takeSnapshotSegmentAsync(abortedSegment, position)); + this.unsealedTxnIds = new LinkedList<>(); + } + } + + @Override + public boolean checkAbortedTransaction(TxnID txnID) { + return aborts.containsKey(txnID); + } + + /** + * Check werther the position in segmentIndex {@link SnapshotSegmentAbortedTxnProcessorImpl#segmentIndex} + * is expired. If the position is not exist in the original topic, the according transaction is an invalid + * transaction. And the according segment is invalid, too. The transaction IDs before the transaction ID + * in the aborts are invalid, too. + */ + @Override + public void trimExpiredAbortedTxns() { + //Checking whether there are some segment expired. + List<PositionImpl> positionsNeedToDelete = new ArrayList<>(); + while (!segmentIndex.isEmpty() && !((ManagedLedgerImpl) topic.getManagedLedger()) + .ledgerExists(segmentIndex.firstKey().getLedgerId())) { + if (log.isDebugEnabled()) { + log.debug("[{}] Topic transaction buffer clear aborted transactions, maxReadPosition : {}", + topic.getName(), segmentIndex.firstKey()); + } + PositionImpl positionNeedToDelete = segmentIndex.firstKey(); + positionsNeedToDelete.add(positionNeedToDelete); + + TxnID theLatestDeletedTxnID = segmentIndex.remove(0); + while (!aborts.firstKey().equals(theLatestDeletedTxnID)) { + aborts.remove(0); + } + aborts.remove(0); + } + //Batch delete the expired segment + if (!positionsNeedToDelete.isEmpty()) { + persistentWorker.appendTask(PersistentWorker.OperationType.DeleteSegment, + () -> persistentWorker.deleteSnapshotSegment(positionsNeedToDelete)); + } + } + + private String buildKey(long sequenceId) { + return SNAPSHOT_PREFIX + sequenceId + "-" + this.topic.getName(); + } + + @Override + public CompletableFuture<Void> takeAbortedTxnsSnapshot(PositionImpl maxReadPosition) { + //Store the latest aborted transaction IDs in unsealedTxnIDs and the according the latest max read position. + TransactionBufferSnapshotIndexesMetadata metadata = new TransactionBufferSnapshotIndexesMetadata( + maxReadPosition.getLedgerId(), maxReadPosition.getEntryId(), + convertTypeToTxnIDData(unsealedTxnIds)); + return persistentWorker.appendTask(PersistentWorker.OperationType.UpdateIndex, () -> persistentWorker + .updateSnapshotIndex(metadata, indexes.values().stream().toList())); + } + + @Override + public CompletableFuture<PositionImpl> recoverFromSnapshot() { + return topic.getBrokerService().getPulsar().getTransactionBufferSnapshotServiceFactory() + .getTxnBufferSnapshotIndexService() + .createReader(TopicName.get(topic.getName())).thenComposeAsync(reader -> { + PositionImpl startReadCursorPosition = null; + TransactionBufferSnapshotIndexes persistentSnapshotIndexes = null; + boolean hasIndex = false; + try { + /* + Read the transaction snapshot segment index. + <p> + The processor can get the sequence ID, unsealed transaction IDs, + segment index list and max read position in the snapshot segment index. + Then we can traverse the index list to read all aborted transaction IDs + in segments to aborts. + </p> + */ + while (reader.hasMoreEvents()) { + Message<TransactionBufferSnapshotIndexes> message = reader.readNextAsync() + .get(getSystemClientOperationTimeoutMs(), TimeUnit.MILLISECONDS); + if (topic.getName().equals(message.getKey())) { + TransactionBufferSnapshotIndexes transactionBufferSnapshotIndexes = message.getValue(); + if (transactionBufferSnapshotIndexes != null) { + hasIndex = true; + persistentSnapshotIndexes = transactionBufferSnapshotIndexes; + startReadCursorPosition = PositionImpl.get( + transactionBufferSnapshotIndexes.getSnapshot().getMaxReadPositionLedgerId(), + transactionBufferSnapshotIndexes.getSnapshot().getMaxReadPositionEntryId()); + } + } + } + } catch (TimeoutException ex) { + Throwable t = FutureUtil.unwrapCompletionException(ex); + String errorMessage = String.format("[%s] Transaction buffer recover fail by read " + + "transactionBufferSnapshot timeout!", topic.getName()); + log.error(errorMessage, t); + return FutureUtil.failedFuture( + new BrokerServiceException.ServiceUnitNotReadyException(errorMessage, t)); + } catch (Exception ex) { + log.error("[{}] Transaction buffer recover fail when read " + + "transactionBufferSnapshot!", topic.getName(), ex); + return FutureUtil.failedFuture(ex); + } finally { + closeReader(reader); + } + PositionImpl finalStartReadCursorPosition = startReadCursorPosition; + TransactionBufferSnapshotIndexes finalPersistentSnapshotIndexes = persistentSnapshotIndexes; + if (!hasIndex) { + return CompletableFuture.completedFuture(null); + } else { + this.unsealedTxnIds = convertTypeToTxnID(persistentSnapshotIndexes + .getSnapshot().getAborts()); + } + //Read snapshot segment to recover aborts. + ArrayList<CompletableFuture<Void>> completableFutures = new ArrayList<>(); + CompletableFuture<Void> openManagedLedgerFuture = new CompletableFuture<>(); + AtomicBoolean hasInvalidIndex = new AtomicBoolean(false); + AsyncCallbacks.OpenReadOnlyManagedLedgerCallback callback = new AsyncCallbacks + .OpenReadOnlyManagedLedgerCallback() { + @Override + public void openReadOnlyManagedLedgerComplete(ReadOnlyManagedLedgerImpl readOnlyManagedLedger, + Object ctx) { + finalPersistentSnapshotIndexes.getIndexList().forEach(index -> { + CompletableFuture<Void> handleSegmentFuture = new CompletableFuture<>(); + completableFutures.add(handleSegmentFuture); + readOnlyManagedLedger.asyncReadEntry( + new PositionImpl(index.getSegmentLedgerID(), + index.getSegmentEntryID()), + new AsyncCallbacks.ReadEntryCallback() { + @Override + public void readEntryComplete(Entry entry, Object ctx) { + handleSnapshotSegmentEntry(entry); + indexes.put(new PositionImpl( + index.abortedMarkLedgerID, + index.abortedMarkEntryID), + index); + entry.release(); + handleSegmentFuture.complete(null); + } + + @Override + public void readEntryFailed(ManagedLedgerException exception, Object ctx) { + if (exception instanceof ManagedLedgerException + .NonRecoverableLedgerException) { + /* + The logic flow of deleting expired segment is: + <p> + 1. delete segment + 2. update segment index + </p> + If the worker delete segment successfully + but failed to update segment index, + the segment can not be read according to the index. + We update index again if there are invalid indexes. + */ + if (((ManagedLedgerImpl)topic.getManagedLedger()) + .ledgerExists(index.getAbortedMarkLedgerID())) { + log.error("[{}] Failed to read snapshot segment [{}:{}]", + topic.getName(), index.segmentLedgerID, + index.segmentEntryID, exception); + handleSegmentFuture.completeExceptionally(exception); + } else { + hasInvalidIndex.set(true); + } + } + } + }, null); + }); + openManagedLedgerFuture.complete(null); + } + + @Override + public void openReadOnlyManagedLedgerFailed(ManagedLedgerException exception, Object ctx) { + log.error("[{}] Failed to open readOnly managed ledger", topic, exception); + openManagedLedgerFuture.completeExceptionally(exception); + } + }; + + TopicName snapshotIndexTopicName = TopicName.get(TopicDomain.persistent.toString(), + TopicName.get(topic.getName()).getNamespaceObject(), + SystemTopicNames.TRANSACTION_BUFFER_SNAPSHOT_SEGMENTS); + this.topic.getBrokerService().getPulsar().getManagedLedgerFactory() + .asyncOpenReadOnlyManagedLedger(snapshotIndexTopicName + .getPersistenceNamingEncoding(), callback, + topic.getManagedLedger().getConfig(), + null); + /* + Wait the processor recover completely and then allow TB + to recover the messages after the startReadCursorPosition. + */ + return openManagedLedgerFuture + .thenCompose((ignore) -> FutureUtil.waitForAll(completableFutures)) + .thenCompose((i) -> { + /* + Update the snapshot segment index if there exist invalid indexes. + */ + if (hasInvalidIndex.get()) { + persistentWorker.appendTask(PersistentWorker.OperationType.UpdateIndex, () + -> persistentWorker + .updateSnapshotIndex(finalPersistentSnapshotIndexes.getSnapshot(), + indexes.values().stream().toList())); + } + /* + If there is no segment index, the persistent worker will write segment begin from 0. + */ + if (indexes.size() != 0) { + persistentWorker.sequenceID.set(indexes.get(indexes.lastKey()).sequenceID + 1); + } + /* + Append the aborted txn IDs in the index metadata + can keep the order of the aborted txn in the aborts. + So that we can trim the expired snapshot segment in aborts + according to the latest transaction IDs in the segmentIndex. + */ + convertTypeToTxnID(finalPersistentSnapshotIndexes.getSnapshot().getAborts()) + .forEach(txnID -> aborts.put(txnID, txnID)); + return CompletableFuture.completedFuture(finalStartReadCursorPosition); + }).exceptionally(ex -> { + log.error("[{}] Failed to recover snapshot segment", this.topic.getName(), ex); + return null; + }); + + }, topic.getBrokerService().getPulsar().getTransactionExecutorProvider() Review Comment: I have fogoted, why we need executor to excute this task? ########## pulsar-broker/src/main/java/org/apache/pulsar/broker/transaction/buffer/impl/SnapshotSegmentAbortedTxnProcessorImpl.java: ########## @@ -0,0 +1,721 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing,2 + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.broker.transaction.buffer.impl; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.function.Supplier; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.mledger.AsyncCallbacks; +import org.apache.bookkeeper.mledger.Entry; +import org.apache.bookkeeper.mledger.ManagedLedgerException; +import org.apache.bookkeeper.mledger.impl.ManagedLedgerImpl; +import org.apache.bookkeeper.mledger.impl.PositionImpl; +import org.apache.bookkeeper.mledger.impl.ReadOnlyManagedLedgerImpl; +import org.apache.commons.collections4.map.LinkedMap; +import org.apache.commons.lang3.tuple.MutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.pulsar.broker.service.BrokerServiceException; +import org.apache.pulsar.broker.service.persistent.PersistentTopic; +import org.apache.pulsar.broker.systopic.SystemTopicClient; +import org.apache.pulsar.broker.transaction.buffer.AbortedTxnProcessor; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndex; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndexes; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndexesMetadata; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotSegment; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TxnIDData; +import org.apache.pulsar.client.api.Message; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.transaction.TxnID; +import org.apache.pulsar.client.impl.MessageIdImpl; +import org.apache.pulsar.client.impl.PulsarClientImpl; +import org.apache.pulsar.common.naming.SystemTopicNames; +import org.apache.pulsar.common.naming.TopicDomain; +import org.apache.pulsar.common.naming.TopicName; +import org.apache.pulsar.common.protocol.Commands; +import org.apache.pulsar.common.util.FutureUtil; + +@Slf4j +public class SnapshotSegmentAbortedTxnProcessorImpl implements AbortedTxnProcessor { + + /** + * Stored the unsealed aborted transaction IDs Whose size is always less than the snapshotSegmentCapacity. + * It will be persistent as a snapshot segment when its size reach the configured capacity. + */ + private LinkedList<TxnID> unsealedTxnIds; + + /** + * The map is used to clear the aborted transaction IDs persistent in the expired ledger. + * <p> + * The key PositionImpl {@link PositionImpl} is the persistent position of + * the latest transaction of a segment. + * The value TxnID {@link TxnID} is the latest Transaction ID in a segment. + * </p> + * + * <p> + * If the position is expired, the processor can get the according latest + * transaction ID in this map. And then the processor can clear all the + * transaction IDs in the aborts {@link SnapshotSegmentAbortedTxnProcessorImpl#aborts} + * that lower than the transaction ID. + * And then the processor can delete the segments persistently according to + * the positions. + * </p> + */ + private final LinkedMap<PositionImpl, TxnID> segmentIndex = new LinkedMap<>(); + + /** + * This map is used to check whether a transaction is an aborted transaction. + * <p> + * The transaction IDs is appended in order, so the processor can delete expired + * transaction IDs according to the latest expired transaction IDs in segmentIndex + * {@link SnapshotSegmentAbortedTxnProcessorImpl#segmentIndex}. + * </p> + */ + private final LinkedMap<TxnID, TxnID> aborts = new LinkedMap<>(); + /** + * This map stores the indexes of the snapshot segment. + * <p> + * The key is the persistent position of the marker of the last transaction in the segment. + * The value TransactionBufferSnapshotIndex {@link TransactionBufferSnapshotIndex} is the + * indexes of the snapshot segment. + * </p> + */ + private final LinkedMap<PositionImpl, TransactionBufferSnapshotIndex> indexes = new LinkedMap<>(); + + private final PersistentTopic topic; + + private volatile long lastSnapshotTimestamps; + + /** + * The number of the aborted transaction IDs in a segment. + * This is calculated according to the configured memory size. + */ + private final int snapshotSegmentCapacity; + /** + * Responsible for executing the persistent tasks. + * <p>Including:</p> + * <p> Update segment index.</p> + * <p> Write snapshot segment.</p> + * <p> Delete snapshot segment.</p> + * <p> Clear all snapshot segment. </p> + */ + private final PersistentWorker persistentWorker; + + private static final String SNAPSHOT_PREFIX = "multiple-"; + + public SnapshotSegmentAbortedTxnProcessorImpl(PersistentTopic topic) { + this.topic = topic; + this.persistentWorker = new PersistentWorker(topic); + /* + Calculate the segment capital according to its size configuration. + <p> + The empty transaction segment size is 5. + Adding an empty linkedList, the size increase to 6. + Add the topic name the size increase to the 7 + topic.getName().length(). + Add the aborted transaction IDs, the size increase to 8 + + topic.getName().length() + 3 * aborted transaction ID size. + </p> + */ + this.snapshotSegmentCapacity = (topic.getBrokerService().getPulsar() + .getConfiguration().getTransactionBufferSnapshotSegmentSize() - 8 - topic.getName().length()) / 3; + this.unsealedTxnIds = new LinkedList<>(); + } + + @Override + public void putAbortedTxnAndPosition(TxnID txnID, PositionImpl position) { + unsealedTxnIds.add(txnID); + aborts.put(txnID, txnID); + /* + The size of lastAbortedTxns reaches the configuration of the size of snapshot segment. + Append a task to persistent the segment with the aborted transaction IDs and the latest + transaction mark persistent position passed by param. + */ + if (unsealedTxnIds.size() >= snapshotSegmentCapacity) { + LinkedList<TxnID> abortedSegment = unsealedTxnIds; + segmentIndex.put(position, txnID); + persistentWorker.appendTask(PersistentWorker.OperationType.WriteSegment, () -> + persistentWorker.takeSnapshotSegmentAsync(abortedSegment, position)); + this.unsealedTxnIds = new LinkedList<>(); + } + } + + @Override + public boolean checkAbortedTransaction(TxnID txnID) { + return aborts.containsKey(txnID); + } + + /** + * Check werther the position in segmentIndex {@link SnapshotSegmentAbortedTxnProcessorImpl#segmentIndex} + * is expired. If the position is not exist in the original topic, the according transaction is an invalid + * transaction. And the according segment is invalid, too. The transaction IDs before the transaction ID + * in the aborts are invalid, too. + */ + @Override + public void trimExpiredAbortedTxns() { + //Checking whether there are some segment expired. + List<PositionImpl> positionsNeedToDelete = new ArrayList<>(); + while (!segmentIndex.isEmpty() && !((ManagedLedgerImpl) topic.getManagedLedger()) + .ledgerExists(segmentIndex.firstKey().getLedgerId())) { + if (log.isDebugEnabled()) { + log.debug("[{}] Topic transaction buffer clear aborted transactions, maxReadPosition : {}", + topic.getName(), segmentIndex.firstKey()); + } + PositionImpl positionNeedToDelete = segmentIndex.firstKey(); + positionsNeedToDelete.add(positionNeedToDelete); + + TxnID theLatestDeletedTxnID = segmentIndex.remove(0); + while (!aborts.firstKey().equals(theLatestDeletedTxnID)) { + aborts.remove(0); + } + aborts.remove(0); + } + //Batch delete the expired segment + if (!positionsNeedToDelete.isEmpty()) { + persistentWorker.appendTask(PersistentWorker.OperationType.DeleteSegment, + () -> persistentWorker.deleteSnapshotSegment(positionsNeedToDelete)); + } + } + + private String buildKey(long sequenceId) { + return SNAPSHOT_PREFIX + sequenceId + "-" + this.topic.getName(); + } + + @Override + public CompletableFuture<Void> takeAbortedTxnsSnapshot(PositionImpl maxReadPosition) { + //Store the latest aborted transaction IDs in unsealedTxnIDs and the according the latest max read position. + TransactionBufferSnapshotIndexesMetadata metadata = new TransactionBufferSnapshotIndexesMetadata( + maxReadPosition.getLedgerId(), maxReadPosition.getEntryId(), + convertTypeToTxnIDData(unsealedTxnIds)); + return persistentWorker.appendTask(PersistentWorker.OperationType.UpdateIndex, () -> persistentWorker + .updateSnapshotIndex(metadata, indexes.values().stream().toList())); + } + + @Override + public CompletableFuture<PositionImpl> recoverFromSnapshot() { + return topic.getBrokerService().getPulsar().getTransactionBufferSnapshotServiceFactory() + .getTxnBufferSnapshotIndexService() + .createReader(TopicName.get(topic.getName())).thenComposeAsync(reader -> { + PositionImpl startReadCursorPosition = null; + TransactionBufferSnapshotIndexes persistentSnapshotIndexes = null; + boolean hasIndex = false; + try { + /* + Read the transaction snapshot segment index. + <p> + The processor can get the sequence ID, unsealed transaction IDs, + segment index list and max read position in the snapshot segment index. + Then we can traverse the index list to read all aborted transaction IDs + in segments to aborts. + </p> + */ + while (reader.hasMoreEvents()) { + Message<TransactionBufferSnapshotIndexes> message = reader.readNextAsync() + .get(getSystemClientOperationTimeoutMs(), TimeUnit.MILLISECONDS); + if (topic.getName().equals(message.getKey())) { + TransactionBufferSnapshotIndexes transactionBufferSnapshotIndexes = message.getValue(); + if (transactionBufferSnapshotIndexes != null) { + hasIndex = true; Review Comment: we don't need this flag, if don't have the index, the `startReadCursorPosition` and `persistentSnapshotIndexes` is null ########## pulsar-broker/src/main/java/org/apache/pulsar/broker/transaction/buffer/impl/SnapshotSegmentAbortedTxnProcessorImpl.java: ########## @@ -0,0 +1,721 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing,2 + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.broker.transaction.buffer.impl; + +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; +import java.util.function.Supplier; +import lombok.extern.slf4j.Slf4j; +import org.apache.bookkeeper.mledger.AsyncCallbacks; +import org.apache.bookkeeper.mledger.Entry; +import org.apache.bookkeeper.mledger.ManagedLedgerException; +import org.apache.bookkeeper.mledger.impl.ManagedLedgerImpl; +import org.apache.bookkeeper.mledger.impl.PositionImpl; +import org.apache.bookkeeper.mledger.impl.ReadOnlyManagedLedgerImpl; +import org.apache.commons.collections4.map.LinkedMap; +import org.apache.commons.lang3.tuple.MutablePair; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.pulsar.broker.service.BrokerServiceException; +import org.apache.pulsar.broker.service.persistent.PersistentTopic; +import org.apache.pulsar.broker.systopic.SystemTopicClient; +import org.apache.pulsar.broker.transaction.buffer.AbortedTxnProcessor; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndex; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndexes; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotIndexesMetadata; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TransactionBufferSnapshotSegment; +import org.apache.pulsar.broker.transaction.buffer.metadata.v2.TxnIDData; +import org.apache.pulsar.client.api.Message; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.transaction.TxnID; +import org.apache.pulsar.client.impl.MessageIdImpl; +import org.apache.pulsar.client.impl.PulsarClientImpl; +import org.apache.pulsar.common.naming.SystemTopicNames; +import org.apache.pulsar.common.naming.TopicDomain; +import org.apache.pulsar.common.naming.TopicName; +import org.apache.pulsar.common.protocol.Commands; +import org.apache.pulsar.common.util.FutureUtil; + +@Slf4j +public class SnapshotSegmentAbortedTxnProcessorImpl implements AbortedTxnProcessor { + + /** + * Stored the unsealed aborted transaction IDs Whose size is always less than the snapshotSegmentCapacity. + * It will be persistent as a snapshot segment when its size reach the configured capacity. + */ + private LinkedList<TxnID> unsealedTxnIds; + + /** + * The map is used to clear the aborted transaction IDs persistent in the expired ledger. + * <p> + * The key PositionImpl {@link PositionImpl} is the persistent position of + * the latest transaction of a segment. + * The value TxnID {@link TxnID} is the latest Transaction ID in a segment. + * </p> + * + * <p> + * If the position is expired, the processor can get the according latest + * transaction ID in this map. And then the processor can clear all the + * transaction IDs in the aborts {@link SnapshotSegmentAbortedTxnProcessorImpl#aborts} + * that lower than the transaction ID. + * And then the processor can delete the segments persistently according to + * the positions. + * </p> + */ + private final LinkedMap<PositionImpl, TxnID> segmentIndex = new LinkedMap<>(); + + /** + * This map is used to check whether a transaction is an aborted transaction. + * <p> + * The transaction IDs is appended in order, so the processor can delete expired + * transaction IDs according to the latest expired transaction IDs in segmentIndex + * {@link SnapshotSegmentAbortedTxnProcessorImpl#segmentIndex}. + * </p> + */ + private final LinkedMap<TxnID, TxnID> aborts = new LinkedMap<>(); + /** + * This map stores the indexes of the snapshot segment. + * <p> + * The key is the persistent position of the marker of the last transaction in the segment. + * The value TransactionBufferSnapshotIndex {@link TransactionBufferSnapshotIndex} is the + * indexes of the snapshot segment. + * </p> + */ + private final LinkedMap<PositionImpl, TransactionBufferSnapshotIndex> indexes = new LinkedMap<>(); + + private final PersistentTopic topic; + + private volatile long lastSnapshotTimestamps; + + /** + * The number of the aborted transaction IDs in a segment. + * This is calculated according to the configured memory size. + */ + private final int snapshotSegmentCapacity; + /** + * Responsible for executing the persistent tasks. + * <p>Including:</p> + * <p> Update segment index.</p> + * <p> Write snapshot segment.</p> + * <p> Delete snapshot segment.</p> + * <p> Clear all snapshot segment. </p> + */ + private final PersistentWorker persistentWorker; + + private static final String SNAPSHOT_PREFIX = "multiple-"; + + public SnapshotSegmentAbortedTxnProcessorImpl(PersistentTopic topic) { + this.topic = topic; + this.persistentWorker = new PersistentWorker(topic); + /* + Calculate the segment capital according to its size configuration. + <p> + The empty transaction segment size is 5. + Adding an empty linkedList, the size increase to 6. + Add the topic name the size increase to the 7 + topic.getName().length(). + Add the aborted transaction IDs, the size increase to 8 + + topic.getName().length() + 3 * aborted transaction ID size. + </p> + */ + this.snapshotSegmentCapacity = (topic.getBrokerService().getPulsar() + .getConfiguration().getTransactionBufferSnapshotSegmentSize() - 8 - topic.getName().length()) / 3; + this.unsealedTxnIds = new LinkedList<>(); + } + + @Override + public void putAbortedTxnAndPosition(TxnID txnID, PositionImpl position) { + unsealedTxnIds.add(txnID); + aborts.put(txnID, txnID); + /* + The size of lastAbortedTxns reaches the configuration of the size of snapshot segment. + Append a task to persistent the segment with the aborted transaction IDs and the latest + transaction mark persistent position passed by param. + */ + if (unsealedTxnIds.size() >= snapshotSegmentCapacity) { + LinkedList<TxnID> abortedSegment = unsealedTxnIds; + segmentIndex.put(position, txnID); + persistentWorker.appendTask(PersistentWorker.OperationType.WriteSegment, () -> + persistentWorker.takeSnapshotSegmentAsync(abortedSegment, position)); + this.unsealedTxnIds = new LinkedList<>(); + } + } + + @Override + public boolean checkAbortedTransaction(TxnID txnID) { + return aborts.containsKey(txnID); + } + + /** + * Check werther the position in segmentIndex {@link SnapshotSegmentAbortedTxnProcessorImpl#segmentIndex} + * is expired. If the position is not exist in the original topic, the according transaction is an invalid + * transaction. And the according segment is invalid, too. The transaction IDs before the transaction ID + * in the aborts are invalid, too. + */ + @Override + public void trimExpiredAbortedTxns() { + //Checking whether there are some segment expired. + List<PositionImpl> positionsNeedToDelete = new ArrayList<>(); + while (!segmentIndex.isEmpty() && !((ManagedLedgerImpl) topic.getManagedLedger()) + .ledgerExists(segmentIndex.firstKey().getLedgerId())) { + if (log.isDebugEnabled()) { + log.debug("[{}] Topic transaction buffer clear aborted transactions, maxReadPosition : {}", + topic.getName(), segmentIndex.firstKey()); + } + PositionImpl positionNeedToDelete = segmentIndex.firstKey(); + positionsNeedToDelete.add(positionNeedToDelete); + + TxnID theLatestDeletedTxnID = segmentIndex.remove(0); + while (!aborts.firstKey().equals(theLatestDeletedTxnID)) { + aborts.remove(0); + } + aborts.remove(0); + } + //Batch delete the expired segment + if (!positionsNeedToDelete.isEmpty()) { + persistentWorker.appendTask(PersistentWorker.OperationType.DeleteSegment, + () -> persistentWorker.deleteSnapshotSegment(positionsNeedToDelete)); + } + } + + private String buildKey(long sequenceId) { + return SNAPSHOT_PREFIX + sequenceId + "-" + this.topic.getName(); + } + + @Override + public CompletableFuture<Void> takeAbortedTxnsSnapshot(PositionImpl maxReadPosition) { + //Store the latest aborted transaction IDs in unsealedTxnIDs and the according the latest max read position. + TransactionBufferSnapshotIndexesMetadata metadata = new TransactionBufferSnapshotIndexesMetadata( + maxReadPosition.getLedgerId(), maxReadPosition.getEntryId(), + convertTypeToTxnIDData(unsealedTxnIds)); + return persistentWorker.appendTask(PersistentWorker.OperationType.UpdateIndex, () -> persistentWorker + .updateSnapshotIndex(metadata, indexes.values().stream().toList())); + } + + @Override + public CompletableFuture<PositionImpl> recoverFromSnapshot() { + return topic.getBrokerService().getPulsar().getTransactionBufferSnapshotServiceFactory() + .getTxnBufferSnapshotIndexService() + .createReader(TopicName.get(topic.getName())).thenComposeAsync(reader -> { + PositionImpl startReadCursorPosition = null; + TransactionBufferSnapshotIndexes persistentSnapshotIndexes = null; + boolean hasIndex = false; + try { + /* + Read the transaction snapshot segment index. + <p> + The processor can get the sequence ID, unsealed transaction IDs, + segment index list and max read position in the snapshot segment index. + Then we can traverse the index list to read all aborted transaction IDs + in segments to aborts. + </p> + */ + while (reader.hasMoreEvents()) { + Message<TransactionBufferSnapshotIndexes> message = reader.readNextAsync() + .get(getSystemClientOperationTimeoutMs(), TimeUnit.MILLISECONDS); + if (topic.getName().equals(message.getKey())) { + TransactionBufferSnapshotIndexes transactionBufferSnapshotIndexes = message.getValue(); + if (transactionBufferSnapshotIndexes != null) { + hasIndex = true; + persistentSnapshotIndexes = transactionBufferSnapshotIndexes; + startReadCursorPosition = PositionImpl.get( + transactionBufferSnapshotIndexes.getSnapshot().getMaxReadPositionLedgerId(), + transactionBufferSnapshotIndexes.getSnapshot().getMaxReadPositionEntryId()); + } + } + } + } catch (TimeoutException ex) { + Throwable t = FutureUtil.unwrapCompletionException(ex); + String errorMessage = String.format("[%s] Transaction buffer recover fail by read " + + "transactionBufferSnapshot timeout!", topic.getName()); + log.error(errorMessage, t); + return FutureUtil.failedFuture( + new BrokerServiceException.ServiceUnitNotReadyException(errorMessage, t)); + } catch (Exception ex) { + log.error("[{}] Transaction buffer recover fail when read " + + "transactionBufferSnapshot!", topic.getName(), ex); + return FutureUtil.failedFuture(ex); + } finally { + closeReader(reader); + } + PositionImpl finalStartReadCursorPosition = startReadCursorPosition; + TransactionBufferSnapshotIndexes finalPersistentSnapshotIndexes = persistentSnapshotIndexes; + if (!hasIndex) { + return CompletableFuture.completedFuture(null); + } else { + this.unsealedTxnIds = convertTypeToTxnID(persistentSnapshotIndexes + .getSnapshot().getAborts()); + } + //Read snapshot segment to recover aborts. + ArrayList<CompletableFuture<Void>> completableFutures = new ArrayList<>(); + CompletableFuture<Void> openManagedLedgerFuture = new CompletableFuture<>(); + AtomicBoolean hasInvalidIndex = new AtomicBoolean(false); + AsyncCallbacks.OpenReadOnlyManagedLedgerCallback callback = new AsyncCallbacks + .OpenReadOnlyManagedLedgerCallback() { + @Override + public void openReadOnlyManagedLedgerComplete(ReadOnlyManagedLedgerImpl readOnlyManagedLedger, + Object ctx) { + finalPersistentSnapshotIndexes.getIndexList().forEach(index -> { + CompletableFuture<Void> handleSegmentFuture = new CompletableFuture<>(); + completableFutures.add(handleSegmentFuture); + readOnlyManagedLedger.asyncReadEntry( + new PositionImpl(index.getSegmentLedgerID(), + index.getSegmentEntryID()), + new AsyncCallbacks.ReadEntryCallback() { + @Override + public void readEntryComplete(Entry entry, Object ctx) { + handleSnapshotSegmentEntry(entry); + indexes.put(new PositionImpl( + index.abortedMarkLedgerID, + index.abortedMarkEntryID), + index); + entry.release(); + handleSegmentFuture.complete(null); + } + + @Override + public void readEntryFailed(ManagedLedgerException exception, Object ctx) { + if (exception instanceof ManagedLedgerException Review Comment: if the exceptin instanceof NonRecoverableLedgerException represent the indec is Invalid right? ```suggestion if (!exception instanceof ManagedLedgerException ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
