[GitHub] storm pull request #1924: STORM-2343: New Kafka spout can stop emitting tupl...

HeartSaVioR Tue, 07 Mar 2017 00:53:15 -0800

Github user HeartSaVioR commented on a diff in the pull request:

    https://github.com/apache/storm/pull/1924#discussion_r104611695
  
    --- Diff: 
external/storm-kafka-client/src/test/java/org/apache/storm/kafka/spout/MaxUncommittedOffsetTest.java
 ---
    @@ -0,0 +1,245 @@
    +/*
    + * Copyright 2017 The Apache Software Foundation.
    + *
    + * Licensed under the Apache License, Version 2.0 (the "License");
    + * you may not use this file except in compliance with the License.
    + * You may obtain a copy of the License at
    + *
    + *      http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.storm.kafka.spout;
    +
    +import static 
org.apache.storm.kafka.spout.builders.SingleTopicKafkaSpoutConfiguration.getKafkaSpoutConfigBuilder;
    +import static org.hamcrest.CoreMatchers.is;
    +import static org.junit.Assert.assertThat;
    +import static org.mockito.Matchers.anyObject;
    +import static org.mockito.Mockito.mock;
    +import static org.mockito.Mockito.reset;
    +import static org.mockito.Mockito.spy;
    +import static org.mockito.Mockito.times;
    +import static org.mockito.Mockito.verify;
    +
    +import info.batey.kafka.unit.KafkaUnitRule;
    +import java.util.HashMap;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.stream.Collectors;
    +import java.util.stream.IntStream;
    +import kafka.producer.KeyedMessage;
    +import org.apache.kafka.clients.consumer.KafkaConsumer;
    +import 
org.apache.storm.kafka.spout.builders.SingleTopicKafkaSpoutConfiguration;
    +import org.apache.storm.kafka.spout.internal.KafkaConsumerFactory;
    +import org.apache.storm.kafka.spout.internal.KafkaConsumerFactoryDefault;
    +import org.apache.storm.spout.SpoutOutputCollector;
    +import org.apache.storm.task.TopologyContext;
    +import org.apache.storm.utils.Time;
    +import org.junit.Before;
    +import org.junit.Rule;
    +import org.junit.Test;
    +import org.mockito.ArgumentCaptor;
    +import org.mockito.MockitoAnnotations;
    +
    +public class MaxUncommittedOffsetTest {
    +
    +    @Rule
    +    public KafkaUnitRule kafkaUnitRule = new KafkaUnitRule();
    +
    +    private final TopologyContext topologyContext = 
mock(TopologyContext.class);
    +    private final Map<String, Object> conf = new HashMap<>();
    +    private final SpoutOutputCollector collector = 
mock(SpoutOutputCollector.class);
    +    private final long commitOffsetPeriodMs = 2_000;
    +    private final int numMessages = 100;
    +    private final int maxUncommittedOffsets = 10;
    +    //This is set to be the same as maxUncommittedOffsets since it is 
difficult to test maxUncommittedOffsets when maxPollRecords is not the same
    +    //If maxPollRecords is larger, a single call to poll will emit more 
than maxUncommittedOffsets messages
    +    //If maxPollRecords is lower, it will cap how far past the commit 
offset the spout can read when there are failed tuples ready for retry
    +    private final int maxPollRecords = maxUncommittedOffsets;
    +    private final int initialRetryDelaySecs = 60;
    +    private final KafkaSpoutConfig spoutConfig = 
getKafkaSpoutConfigBuilder(kafkaUnitRule.getKafkaPort())
    +        .setOffsetCommitPeriodMs(commitOffsetPeriodMs)
    +        .setMaxPollRecords(maxPollRecords)
    +        .setMaxUncommittedOffsets(maxUncommittedOffsets)
    +        .setRetry(new 
KafkaSpoutRetryExponentialBackoff(KafkaSpoutRetryExponentialBackoff.TimeInterval.seconds(initialRetryDelaySecs),
 KafkaSpoutRetryExponentialBackoff.TimeInterval.seconds(0),
    +            1, 
KafkaSpoutRetryExponentialBackoff.TimeInterval.seconds(initialRetryDelaySecs))) 
//Retry once after a minute
    +        .build();
    +    private KafkaConsumer<String, String> consumerSpy;
    +    private KafkaConsumerFactory<String, String> consumerFactory;
    +    private KafkaSpout<String, String> spout;
    +
    +    @Before
    +    public void setUp() {
    +        MockitoAnnotations.initMocks(this);
    +        this.consumerSpy = spy(new 
KafkaConsumerFactoryDefault().createConsumer(spoutConfig));
    +        this.consumerFactory = (kafkaSpoutConfig) -> consumerSpy;
    +        this.spout = new KafkaSpout<>(spoutConfig, consumerFactory);
    +    }
    +
    +    private void populateTopicData(String topicName, int msgCount) {
    +        kafkaUnitRule.getKafkaUnit().createTopic(topicName);
    +
    +        IntStream.range(0, msgCount).forEach(value -> {
    +            KeyedMessage<String, String> keyedMessage = new KeyedMessage<>(
    +                topicName, Integer.toString(value),
    +                Integer.toString(value));
    +
    +            kafkaUnitRule.getKafkaUnit().sendMessages(keyedMessage);
    +        });
    +    }
    +
    +    private void initializeSpout(int msgCount) {
    +        populateTopicData(SingleTopicKafkaSpoutConfiguration.TOPIC, 
msgCount);
    +        spout.open(conf, topologyContext, collector);
    +        spout.activate();
    +    }
    +
    +    public ArgumentCaptor<KafkaSpoutMessageId> 
emitMaxUncommittedOffsetsMessagesAndCheckNoMoreAreEmitted(int messageCount) {
    +        //The spout must respect maxUncommittedOffsets when 
requesting/emitting tuples
    +        initializeSpout(messageCount);
    +
    +        //Try to emit all messages. Ensure only maxUncommittedOffsets are 
emitted
    +        ArgumentCaptor<KafkaSpoutMessageId> messageIds = 
ArgumentCaptor.forClass(KafkaSpoutMessageId.class);
    +        for (int i = 0; i < messageCount; i++) {
    +            spout.nextTuple();
    +        };
    +        verify(collector, times(maxUncommittedOffsets)).emit(
    +            anyObject(),
    +            anyObject(),
    +            messageIds.capture());
    +        return messageIds;
    +    }
    +
    +    @Test
    +    public void 
testNextTupleCanEmitMoreMessagesWhenDroppingBelowMaxUncommittedOffsetsDueToCommit()
 {
    +        //The spout must respect maxUncommittedOffsets after committing a 
set of records
    +        try (Time.SimulatedTime simulatedTime = new Time.SimulatedTime()) {
    +            //First check that maxUncommittedOffsets is respected when 
emitting from scratch
    +            ArgumentCaptor<KafkaSpoutMessageId> messageIds = 
emitMaxUncommittedOffsetsMessagesAndCheckNoMoreAreEmitted(numMessages);
    --- End diff --
    
    Minor: Precondition here is that `numMessages >= 2 * 
(maxUncommittedOffsets)`.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

[GitHub] storm pull request #1924: STORM-2343: New Kafka spout can stop emitting tupl...

Reply via email to