sijie closed pull request #1525: Compaction considers messages with empty payload as deleting the key URL: https://github.com/apache/incubator-pulsar/pull/1525
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/client/impl/RawBatchConverter.java b/pulsar-broker/src/main/java/org/apache/pulsar/client/impl/RawBatchConverter.java index 4e628bc5f8..9ee31acf42 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/client/impl/RawBatchConverter.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/client/impl/RawBatchConverter.java @@ -113,7 +113,8 @@ public static boolean isBatch(RawMessage msg) { messagesRetained++; Commands.serializeSingleMessageInBatchWithPayload(singleMessageMetadataBuilder, singleMessagePayload, batchBuffer); - } else if (filter.test(singleMessageMetadataBuilder.getPartitionKey(), id)) { + } else if (filter.test(singleMessageMetadataBuilder.getPartitionKey(), id) + && singleMessagePayload.readableBytes() > 0) { messagesRetained++; Commands.serializeSingleMessageInBatchWithPayload(singleMessageMetadataBuilder, singleMessagePayload, batchBuffer); diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/compaction/TwoPhaseCompactor.java b/pulsar-broker/src/main/java/org/apache/pulsar/compaction/TwoPhaseCompactor.java index fbad47ea30..2eaa8d0c9c 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/compaction/TwoPhaseCompactor.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/compaction/TwoPhaseCompactor.java @@ -38,6 +38,7 @@ import org.apache.bookkeeper.client.BKException; import org.apache.bookkeeper.client.BookKeeper; import org.apache.bookkeeper.client.LedgerHandle; +import org.apache.commons.lang3.tuple.Pair; import org.apache.pulsar.broker.ServiceConfiguration; import org.apache.pulsar.common.api.Commands; @@ -122,9 +123,9 @@ private void phaseOneLoop(RawReader reader, id, ioe); } } else { - String key = extractKey(m); - if (key != null) { - latestForKey.put(key, id); + Pair<String,Integer> keyAndSize = extractKeyAndSize(m); + if (keyAndSize != null) { + latestForKey.put(keyAndSize.getLeft(), id); } } @@ -214,10 +215,11 @@ private void phaseTwoLoop(RawReader reader, MessageId to, Map<String, MessageId> messageToAdd = Optional.of(m); } } else { - String key = extractKey(m); - if (key == null) { // pass through messages without a key + Pair<String,Integer> keyAndSize = extractKeyAndSize(m); + if (keyAndSize == null) { // pass through messages without a key messageToAdd = Optional.of(m); - } else if (latestForKey.get(key).equals(id)) { + } else if (latestForKey.get(keyAndSize.getLeft()).equals(id) + && keyAndSize.getRight() > 0) { messageToAdd = Optional.of(m); } else { m.close(); @@ -307,11 +309,11 @@ private void phaseTwoLoop(RawReader reader, MessageId to, Map<String, MessageId> return bkf; } - private static String extractKey(RawMessage m) { + private static Pair<String,Integer> extractKeyAndSize(RawMessage m) { ByteBuf headersAndPayload = m.getHeadersAndPayload(); MessageMetadata msgMetadata = Commands.parseMessageMetadata(headersAndPayload); if (msgMetadata.hasPartitionKey()) { - return msgMetadata.getPartitionKey(); + return Pair.of(msgMetadata.getPartitionKey(), headersAndPayload.readableBytes()); } else { return null; } diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/compaction/CompactionTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/compaction/CompactionTest.java index a0f0f972e4..22e74f21a9 100644 --- a/pulsar-broker/src/test/java/org/apache/pulsar/compaction/CompactionTest.java +++ b/pulsar-broker/src/test/java/org/apache/pulsar/compaction/CompactionTest.java @@ -512,4 +512,72 @@ public void testKeyLessMessagesPassThrough() throws Exception { } } + + @Test + public void testEmptyPayloadDeletes() throws Exception { + String topic = "persistent://my-property/use/my-ns/my-topic1"; + + // subscribe before sending anything, so that we get all messages + pulsarClient.newConsumer().topic(topic).subscriptionName("sub1") + .readCompacted(true).subscribe().close(); + + try (Producer producerNormal = pulsarClient.newProducer().topic(topic).create(); + Producer producerBatch = pulsarClient.newProducer().topic(topic).maxPendingMessages(3) + .enableBatching(true).batchingMaxMessages(3) + .batchingMaxPublishDelay(1, TimeUnit.HOURS).create()) { + + // key0 persists through it all + producerNormal.sendAsync(MessageBuilder.create() + .setKey("key0") + .setContent("my-message-0".getBytes()).build()).get(); + + // key1 is added but then deleted + producerNormal.sendAsync(MessageBuilder.create() + .setKey("key1") + .setContent("my-message-1".getBytes()).build()).get(); + + producerNormal.sendAsync(MessageBuilder.create() + .setKey("key1").build()).get(); + + // key2 is added but deleted in same batch + producerBatch.sendAsync(MessageBuilder.create() + .setKey("key2") + .setContent("my-message-2".getBytes()).build()); + producerBatch.sendAsync(MessageBuilder.create() + .setKey("key3") + .setContent("my-message-3".getBytes()).build()); + producerBatch.sendAsync(MessageBuilder.create() + .setKey("key2").build()).get(); + + // key3 is added in previous batch, deleted in this batch + producerBatch.sendAsync(MessageBuilder.create() + .setKey("key3").build()); + producerBatch.sendAsync(MessageBuilder.create() + .setKey("key4") + .setContent("my-message-3".getBytes()).build()); + producerBatch.sendAsync(MessageBuilder.create() + .setKey("key4").build()).get(); + + // key4 is added, deleted, then resurrected + producerNormal.sendAsync(MessageBuilder.create() + .setKey("key4") + .setContent("my-message-4".getBytes()).build()).get(); + } + + // compact the topic + Compactor compactor = new TwoPhaseCompactor(conf, pulsarClient, bk, compactionScheduler); + compactor.compact(topic).get(); + + try (Consumer consumer = pulsarClient.newConsumer().topic(topic) + .subscriptionName("sub1").readCompacted(true).subscribe()){ + Message message1 = consumer.receive(); + Assert.assertEquals(message1.getKey(), "key0"); + Assert.assertEquals(new String(message1.getData()), "my-message-0"); + + Message message2 = consumer.receive(); + Assert.assertEquals(message2.getKey(), "key4"); + Assert.assertEquals(new String(message2.getData()), "my-message-4"); + } + } + } diff --git a/pulsar-client/src/main/java/org/apache/pulsar/client/impl/MessageBuilderImpl.java b/pulsar-client/src/main/java/org/apache/pulsar/client/impl/MessageBuilderImpl.java index 7714b993c2..056064a6ca 100644 --- a/pulsar-client/src/main/java/org/apache/pulsar/client/impl/MessageBuilderImpl.java +++ b/pulsar-client/src/main/java/org/apache/pulsar/client/impl/MessageBuilderImpl.java @@ -33,10 +33,10 @@ import com.google.common.base.Preconditions; public class MessageBuilderImpl<T> implements MessageBuilder<T> { - + private static final ByteBuffer EMPTY_CONTENT = ByteBuffer.allocate(0); private final MessageMetadata.Builder msgMetadataBuilder = MessageMetadata.newBuilder(); private final Schema<T> schema; - private ByteBuffer content; + private ByteBuffer content = EMPTY_CONTENT; public MessageBuilderImpl(Schema<T> schema) { this.schema = schema; ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services