jolshan commented on code in PR #13391: URL: https://github.com/apache/kafka/pull/13391#discussion_r1162044614
########## core/src/main/scala/kafka/server/ReplicaManager.scala: ########## @@ -616,66 +619,128 @@ class ReplicaManager(val config: KafkaConfig, responseCallback: Map[TopicPartition, PartitionResponse] => Unit, delayedProduceLock: Option[Lock] = None, recordConversionStatsCallback: Map[TopicPartition, RecordConversionStats] => Unit = _ => (), - requestLocal: RequestLocal = RequestLocal.NoCaching): Unit = { + requestLocal: RequestLocal = RequestLocal.NoCaching, + transactionalId: String = null, + transactionStatePartition: Option[Int] = None): Unit = { if (isValidRequiredAcks(requiredAcks)) { val sTime = time.milliseconds - val localProduceResults = appendToLocalLog(internalTopicsAllowed = internalTopicsAllowed, - origin, entriesPerPartition, requiredAcks, requestLocal) - debug("Produce to local log in %d ms".format(time.milliseconds - sTime)) - - val produceStatus = localProduceResults.map { case (topicPartition, result) => - topicPartition -> ProducePartitionStatus( - result.info.lastOffset + 1, // required offset - new PartitionResponse( - result.error, - result.info.firstOffset.map[Long](_.messageOffset).orElse(-1L), - result.info.logAppendTime, - result.info.logStartOffset, - result.info.recordErrors, - result.info.errorMessage + + val (verifiedEntriesPerPartition, notYetVerifiedEntriesPerPartition) = + if (transactionStatePartition.isEmpty || !config.transactionPartitionVerificationEnable) + (entriesPerPartition, Map.empty) + else + entriesPerPartition.partition { case (topicPartition, records) => + getPartitionOrException(topicPartition).hasOngoingTransaction(records.firstBatch().producerId()) + } + + def appendEntries(allEntries: Map[TopicPartition, MemoryRecords])(unverifiedEntries: Map[TopicPartition, Errors]): Unit = { + val verifiedEntries = + if (unverifiedEntries.isEmpty) + allEntries + else + allEntries.filter { case (tp, _) => + !unverifiedEntries.contains(tp) + } + + val localProduceResults = appendToLocalLog(internalTopicsAllowed = internalTopicsAllowed, + origin, verifiedEntries, requiredAcks, requestLocal) + debug("Produce to local log in %d ms".format(time.milliseconds - sTime)) + + val unverifiedResults = unverifiedEntries.map { case (topicPartition, error) => + // NOTE: Older clients return INVALID_RECORD, but newer clients will return INVALID_TXN_STATE + val message = if (error.equals(Errors.INVALID_RECORD)) "Partition was not added to the transaction" else error.message() + topicPartition -> LogAppendResult( + LogAppendInfo.UNKNOWN_LOG_APPEND_INFO, + Some(error.exception(message)) ) - ) // response status - } + } + + val allResults = localProduceResults ++ unverifiedResults + + val produceStatus = allResults.map { case (topicPartition, result) => + topicPartition -> ProducePartitionStatus( + result.info.lastOffset + 1, // required offset + new PartitionResponse( + result.error, + result.info.firstOffset.map[Long](_.messageOffset).orElse(-1L), + result.info.logAppendTime, + result.info.logStartOffset, + result.info.recordErrors, + result.info.errorMessage + ) + ) // response status + } - actionQueue.add { - () => - localProduceResults.foreach { - case (topicPartition, result) => - val requestKey = TopicPartitionOperationKey(topicPartition) - result.info.leaderHwChange match { - case LeaderHwChange.INCREASED => - // some delayed operations may be unblocked after HW changed - delayedProducePurgatory.checkAndComplete(requestKey) - delayedFetchPurgatory.checkAndComplete(requestKey) - delayedDeleteRecordsPurgatory.checkAndComplete(requestKey) - case LeaderHwChange.SAME => - // probably unblock some follower fetch requests since log end offset has been updated - delayedFetchPurgatory.checkAndComplete(requestKey) - case LeaderHwChange.NONE => + actionQueue.add { + () => + allResults.foreach { + case (topicPartition, result) => + val requestKey = TopicPartitionOperationKey(topicPartition) + result.info.leaderHwChange match { + case LeaderHwChange.INCREASED => + // some delayed operations may be unblocked after HW changed + delayedProducePurgatory.checkAndComplete(requestKey) + delayedFetchPurgatory.checkAndComplete(requestKey) + delayedDeleteRecordsPurgatory.checkAndComplete(requestKey) + case LeaderHwChange.SAME => + // probably unblock some follower fetch requests since log end offset has been updated + delayedFetchPurgatory.checkAndComplete(requestKey) + case LeaderHwChange.NONE => // nothing - } - } - } + } + } + } + + recordConversionStatsCallback(localProduceResults.map { case (k, v) => k -> v.info.recordConversionStats }) - recordConversionStatsCallback(localProduceResults.map { case (k, v) => k -> v.info.recordConversionStats }) + if (delayedProduceRequestRequired(requiredAcks, allEntries, allResults)) { + // create delayed produce operation + val produceMetadata = ProduceMetadata(requiredAcks, produceStatus) + val delayedProduce = new DelayedProduce(timeout, produceMetadata, this, responseCallback, delayedProduceLock) - if (delayedProduceRequestRequired(requiredAcks, entriesPerPartition, localProduceResults)) { - // create delayed produce operation - val produceMetadata = ProduceMetadata(requiredAcks, produceStatus) - val delayedProduce = new DelayedProduce(timeout, produceMetadata, this, responseCallback, delayedProduceLock) + // create a list of (topic, partition) pairs to use as keys for this delayed produce operation + val producerRequestKeys = allEntries.keys.map(TopicPartitionOperationKey(_)).toSeq - // create a list of (topic, partition) pairs to use as keys for this delayed produce operation - val producerRequestKeys = entriesPerPartition.keys.map(TopicPartitionOperationKey(_)).toSeq + // try to complete the request immediately, otherwise put it into the purgatory + // this is because while the delayed produce operation is being created, new + // requests may arrive and hence make this operation completable. + delayedProducePurgatory.tryCompleteElseWatch(delayedProduce, producerRequestKeys) - // try to complete the request immediately, otherwise put it into the purgatory - // this is because while the delayed produce operation is being created, new - // requests may arrive and hence make this operation completable. - delayedProducePurgatory.tryCompleteElseWatch(delayedProduce, producerRequestKeys) + } else { + // we can respond immediately + val produceResponseStatus = produceStatus.map { case (k, status) => k -> status.responseStatus } + responseCallback(produceResponseStatus) + } + } + if (notYetVerifiedEntriesPerPartition.isEmpty || addPartitionsToTxnManager.isEmpty) { + appendEntries(verifiedEntriesPerPartition)(Map.empty) } else { - // we can respond immediately - val produceResponseStatus = produceStatus.map { case (k, status) => k -> status.responseStatus } - responseCallback(produceResponseStatus) + // For unverified entries, send a request to verify. When verified, the append process will proceed via the callback. + val (error, node) = getTransactionCoordinator(transactionStatePartition.get) + + if (error != Errors.NONE) { + throw error.exception() // Can throw coordinator not available -- which is retriable Review Comment: it is retriable -- currently if the error is retriable we just retry. It seems like most retriable errors are not enumerated specifically. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org