Github user gdfm commented on a diff in the pull request:

    https://github.com/apache/incubator-samoa/pull/32#discussion_r35091125
  
    --- Diff: 
samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaReader.java ---
    @@ -0,0 +1,279 @@
    +package org.apache.samoa.streams.kafka;
    +
    +/*
    + * #%L
    + * SAMOA
    + * %%
    + * Copyright (C) 2014 - 2015 Apache Software Foundation
    + * %%
    + * Licensed under the Apache License, Version 2.0 (the "License");
    + * you may not use this file except in compliance with the License.
    + * You may obtain a copy of the License at
    + * 
    + *      http://www.apache.org/licenses/LICENSE-2.0
    + * 
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + * #L%
    + */
    +
    +import kafka.api.FetchRequest;
    +import kafka.api.FetchRequestBuilder;
    +import kafka.api.PartitionOffsetRequestInfo;
    +import kafka.common.ErrorMapping;
    +import kafka.common.TopicAndPartition;
    +import kafka.javaapi.*;
    +import kafka.javaapi.consumer.SimpleConsumer;
    +import kafka.message.MessageAndOffset;
    +
    +import java.io.UnsupportedEncodingException;
    +import java.nio.ByteBuffer;
    +import java.util.*;
    +
    +public class KafkaReader {
    +
    +    protected long readOffset;
    +
    +    private List<String> m_replicaBrokers = new ArrayList<String>();
    +
    +    public KafkaReader() {
    +        m_replicaBrokers = new ArrayList<String>();
    +        readOffset = 0L;
    +    }
    +
    +    public ArrayList<String> run(long a_maxReads, String a_topic, int 
a_partition, List<String> a_seedBrokers, int a_port) {
    +
    +        // find the meta data about the topic and partition we are 
interested in
    +        String answer = "";
    +        ArrayList<String> returnInstances = new ArrayList<String>();
    +        PartitionMetadata metadata = findLeader(a_seedBrokers, a_port, 
a_topic, a_partition);
    +        if (metadata == null) {
    +            System.out.println("Can't find metadata for Topic and 
Partition. Exiting");
    +            return null;
    +        }
    +        if (metadata.leader() == null) {
    +            System.out.println("Can't find Leader for Topic and Partition. 
Exiting");
    +            return null;
    +        }
    +        String leadBroker = metadata.leader().host();
    +        String clientName = "Client_" + a_topic + "_" + a_partition;
    +
    +        SimpleConsumer consumer = new SimpleConsumer(leadBroker, a_port, 
100000, 64 * 1024, clientName);
    +        //long readOffset = getLastOffset(consumer,a_topic, a_partition, 
kafka.api.OffsetRequest.EarliestTime(), clientName);
    +        //readOffset = 0L;
    +        int numErrors = 0;
    +        while (a_maxReads > 0) {
    +            if (consumer == null) {
    +                consumer = new SimpleConsumer(leadBroker, a_port, 100000, 
64 * 1024, clientName);
    +            }
    +            /**
    +             * Reading data
    +             */
    +            FetchRequest req = new FetchRequestBuilder()
    +                    .clientId(clientName)
    +                    .addFetch(a_topic, a_partition, readOffset, 100000)
    +                    .build();
    +            FetchResponse fetchResponse = null;
    +
    +            try {
    +                fetchResponse = consumer.fetch(req);
    +            } catch (Exception e) {
    +
    +            }
    +
    +            /**
    +             * SimpleConsumer does not handle lead broker failures, you 
have to handle it
    +             *  once the fetch returns an error, we log the reason, close 
the consumer then try to figure
    +             *  out who the new leader is
    +             */
    +            if (fetchResponse.hasError()) {
    +                numErrors++;
    +                // Something went wrong!
    +                short code = fetchResponse.errorCode(a_topic, a_partition);
    +                System.out.println("Error fetching data from the Broker:" 
+ leadBroker + " Reason: " + code);
    +                if (numErrors > 5) break;
    +                if (code == ErrorMapping.OffsetOutOfRangeCode()) {
    +                    // We asked for an invalid offset. For simple case ask 
for the last element to reset
    +                    //readOffset = getLastOffset(consumer,a_topic, 
a_partition, kafka.api.OffsetRequest.LatestTime(), clientName);
    +                    continue;
    +                }
    +                consumer.close();
    +                consumer = null;
    +                try {
    +                    leadBroker = findNewLeader(leadBroker, a_topic, 
a_partition, a_port);
    +                } catch (Exception e) {
    +                    // TODO Auto-generated catch block
    +                    e.printStackTrace();
    +                }
    +                continue;
    +            }
    +            //End Error handling
    +
    +            // Reading data cont.
    +            numErrors = 0;
    +
    +            long numRead = 0;
    +            Iterator it = (Iterator) fetchResponse.messageSet(a_topic, 
a_partition).iterator();
    +            MessageAndOffset messageAndOffset = null;
    +
    +            try {
    +                messageAndOffset = (MessageAndOffset) it.next();
    +            } catch (Exception e) {
    +                return null;
    +            }
    +
    +            //for (MessageAndOffset messageAndOffset : 
fetchResponse.messageSet(a_topic, a_partition)) {
    +            long currentOffset = messageAndOffset.offset();
    +            if (currentOffset < readOffset) {
    +                System.out.println("Found an old offset: " + currentOffset 
+ " Expecting: " + readOffset);
    +                continue;
    +            }
    +            readOffset = messageAndOffset.nextOffset();
    +            ByteBuffer payload = messageAndOffset.message().payload();
    +
    +            byte[] bytes = new byte[payload.limit()];
    +            payload.get(bytes);
    +            try {
    +                
System.out.println(String.valueOf(messageAndOffset.offset()) + ": " + new 
String(bytes, "UTF-8"));
    +                answer = String.valueOf(messageAndOffset.offset()) + ": " 
+ new String(bytes, "UTF-8");
    +                returnInstances.add(answer);
    +            } catch (UnsupportedEncodingException e) {
    +                e.printStackTrace();
    +            }
    +
    +            numRead++;
    +            a_maxReads--;
    +            //  break;
    +            //}
    +
    +            if (numRead == 0) {
    +                try {
    +                    Thread.sleep(1000);
    +                } catch (InterruptedException ie) {
    +                }
    +            }
    +        }
    +        if (consumer != null) consumer.close();
    +        return returnInstances;
    +    }
    +
    +    /**
    +     * Defines where to start reading data from
    +     * Helpers Available:
    +     * kafka.api.OffsetRequest.EarliestTime() => finds the beginning of 
the data in the logs and starts streaming
    +     * from there
    +     * kafka.api.OffsetRequest.LatestTime()   => will only stream new 
messages
    +     *
    +     * @param consumer
    +     * @param topic
    +     * @param partition
    +     * @param whichTime
    +     * @param clientName
    +     * @return
    +     */
    +    public static long getLastOffset(SimpleConsumer consumer, String 
topic, int partition, long whichTime, String clientName) {
    +        TopicAndPartition topicAndPartition = new TopicAndPartition(topic, 
partition);
    +        Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = 
new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();
    +        requestInfo.put(topicAndPartition, new 
PartitionOffsetRequestInfo(whichTime, 1));
    +        kafka.javaapi.OffsetRequest request = new 
kafka.javaapi.OffsetRequest(
    +                requestInfo, kafka.api.OffsetRequest.CurrentVersion(), 
clientName);
    +        OffsetResponse response = consumer.getOffsetsBefore(request);
    +
    +        if (response.hasError()) {
    +            System.out.println("Error fetching data Offset Data the 
Broker. Reason: " + response.errorCode(topic, partition));
    +            return 0;
    +        }
    +        long[] offsets = response.offsets(topic, partition);
    +        return offsets[0];
    +    }
    +
    +    /**
    +     * Uses the findLeader() logic we defined to find the new leader, 
except here we only try to connect to one of the
    +     * replicas for the topic/partition. This way if we can’t reach any 
of the Brokers with the data we are interested
    +     * in we give up and exit hard.
    +     *
    +     * @param a_oldLeader
    +     * @param a_topic
    +     * @param a_partition
    +     * @param a_port
    +     * @return
    +     * @throws Exception
    +     */
    +    private String findNewLeader(String a_oldLeader, String a_topic, int 
a_partition, int a_port) throws Exception {
    +        for (int i = 0; i < 3; i++) {
    +            boolean goToSleep = false;
    +            PartitionMetadata metadata = findLeader(m_replicaBrokers, 
a_port, a_topic, a_partition);
    +            if (metadata == null) {
    +                goToSleep = true;
    +            } else if (metadata.leader() == null) {
    +                goToSleep = true;
    +            } else if 
(a_oldLeader.equalsIgnoreCase(metadata.leader().host()) && i == 0) {
    +                // first time through if the leader hasn't changed give 
ZooKeeper a second to recover
    +                // second time, assume the broker did recover before 
failover, or it was a non-Broker issue
    +                //
    +                goToSleep = true;
    +            } else {
    +                return metadata.leader().host();
    +            }
    +            if (goToSleep) {
    +                try {
    +                    Thread.sleep(1000);
    +                } catch (InterruptedException ie) {
    +                }
    +            }
    +        }
    +        System.out.println("Unable to find new leader after Broker 
failure. Exiting");
    +        throw new Exception("Unable to find new leader after Broker 
failure. Exiting");
    +    }
    +
    +    /**
    +     * Query a live broker to find out leader information and replica 
information for a given topic and partition
    +     *
    +     * @param a_seedBrokers
    +     * @param a_port
    +     * @param a_topic
    +     * @param a_partition
    +     * @return
    +     */
    +    private PartitionMetadata findLeader(List<String> a_seedBrokers, int 
a_port, String a_topic, int a_partition) {
    +        PartitionMetadata returnMetaData = null;
    +        for (String seed : a_seedBrokers) {
    +            SimpleConsumer consumer = null;
    +            try {
    +                consumer = new SimpleConsumer(seed, a_port, 100000, 64 * 
1024, "leaderLookup"); //broker_host, broker_port, timeout, buffer_size, 
client_id
    --- End diff --
    
    I am not fully aware of Kafka internals, but I know that they have two APIs 
for consumers. Does this one use the new API?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

Reply via email to