Github user MikeThomsen commented on a diff in the pull request: https://github.com/apache/nifi/pull/2553#discussion_r174882429 --- Diff: nifi-nar-bundles/nifi-pulsar-bundle/nifi-pulsar-processors/src/main/java/org/apache/nifi/processors/pulsar/ConsumePulsar_1_0.java --- @@ -0,0 +1,392 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.pulsar; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Properties; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; + +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnScheduled; +import org.apache.nifi.annotation.lifecycle.OnStopped; +import org.apache.nifi.annotation.lifecycle.OnUnscheduled; +import org.apache.nifi.components.AllowableValue; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.pulsar.PulsarClientPool; +import org.apache.nifi.pulsar.PulsarConsumer; +import org.apache.nifi.pulsar.pool.PulsarConsumerFactory; +import org.apache.pulsar.client.api.Consumer; +import org.apache.pulsar.client.api.ConsumerConfiguration; +import org.apache.pulsar.client.api.Message; +import org.apache.pulsar.client.api.PulsarClientException; +import org.apache.pulsar.client.api.SubscriptionType; + +@Tags({"Pulsar", "Get", "Ingest", "Ingress", "Topic", "PubSub", "Consume"}) +@CapabilityDescription("Consumes messages from Apache Pulsar " + + "The complementary NiFi processor for sending messages is PublishPulsar.") +@InputRequirement(InputRequirement.Requirement.INPUT_FORBIDDEN) +public class ConsumePulsar_1_0 extends AbstractPulsarProcessor { + + static final AllowableValue EXCLUSIVE = new AllowableValue("Exclusive", "Exclusive", "There can be only 1 consumer on the same topic with the same subscription name"); + static final AllowableValue SHARED = new AllowableValue("Shared", "Shared", "Multiple consumer will be able to use the same subscription name and the messages"); + static final AllowableValue FAILOVER = new AllowableValue("Failover", "Failover", "Multiple consumer will be able to use the same subscription name but only 1 consumer " + + "will receive the messages. If that consumer disconnects, one of the other connected consumers will start receiving messages"); + + protected static final PropertyDescriptor TOPIC = new PropertyDescriptor.Builder() + .name("topic") + .displayName("Topic Name") + .description("The name of the Pulsar Topic.") + .required(true) + .addValidator(StandardValidators.NON_BLANK_VALIDATOR) + .build(); + + static final PropertyDescriptor SUBSCRIPTION = new PropertyDescriptor.Builder() + .name("Subscription") + .displayName("Subscription Name") + .description("The name of the Pulsar subscription to consume from.") + .required(true) + .addValidator(StandardValidators.NON_BLANK_VALIDATOR) + .build(); + + public static final PropertyDescriptor ASYNC_ENABLED = new PropertyDescriptor.Builder() + .name("Async Enabled") + .description("Control whether the messages will be consumed asyncronously or not. Messages consumed" + + " syncronously will be acknowledged immediately before processing the next message, while" + + " asyncronous messages will be acknowledged after the Pulsar broker responds.") + .required(true) + .addValidator(StandardValidators.BOOLEAN_VALIDATOR) + .defaultValue("false") + .build(); + + public static final PropertyDescriptor MAX_ASYNC_REQUESTS = new PropertyDescriptor.Builder() + .name("Maximum Async Requests") + .description("The maximum number of outstanding asynchronous consumer requests for this processor. " + + "Each asynchronous call requires memory, so avoid setting this value to high.") + .required(false) + .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) + .defaultValue("50") + .build(); + + public static final PropertyDescriptor ACK_TIMEOUT = new PropertyDescriptor.Builder() + .name("Acknowledgment Timeout") + .description("Set the timeout (in milliseconds) for unacked messages, truncated to the " + + "nearest millisecond. The timeout needs to be greater than 10 seconds.") + .required(false) + .addValidator(StandardValidators.POSITIVE_LONG_VALIDATOR) + .defaultValue("10000") + .build(); + + public static final PropertyDescriptor PRIORITY_LEVEL = new PropertyDescriptor.Builder() + .name("Consumer Priority Level") + .description("Sets priority level for the shared subscription consumers to which broker " + + "gives more priority while dispatching messages. Here, broker follows descending " + + "priorities. (eg: 0=max-priority, 1, 2,..) ") + .required(false) + .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) + .defaultValue("5") + .build(); + + public static final PropertyDescriptor RECEIVER_QUEUE_SIZE = new PropertyDescriptor.Builder() + .name("Consumer receive queue size.") + .description("The consumer receive queue controls how many messages can be accumulated " + + "by the Consumer before the application calls Consumer.receive(). Using a higher " + + "value could potentially increase the consumer throughput at the expense of bigger " + + "memory utilization. \n" + + "Setting the consumer queue size as zero, \n" + + "\t - Decreases the throughput of the consumer, by disabling pre-fetching of messages. \n" + + "\t - Doesn't support Batch-Message: if consumer receives any batch-message then it closes consumer " + + "connection with broker and consumer will not be able receive any further message unless batch-message " + + "in pipeline is removed") + .required(false) + .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) + .defaultValue("1000") + .build(); + + public static final PropertyDescriptor SUBSCRIPTION_TYPE = new PropertyDescriptor.Builder() + .name("Subscription Type") + .description("Select the subscription type to be used when subscribing to the topic.") + .required(false) + .allowableValues(EXCLUSIVE, SHARED, FAILOVER) + .defaultValue(SHARED.getValue()) + .build(); + + private static final List<PropertyDescriptor> PROPERTIES; + private static final Set<Relationship> RELATIONSHIPS; + + // Reuse the same consumer for a given topic / subscription + private PulsarConsumer consumer; + private ConsumerConfiguration consumerConfig; + + // Pool for running multiple consume Async requests + ExecutorService pool; + ExecutorCompletionService<Message> completionService; + + static { + final List<PropertyDescriptor> properties = new ArrayList<>(); + properties.add(PULSAR_CLIENT_SERVICE); + properties.add(TOPIC); + properties.add(SUBSCRIPTION); + properties.add(ASYNC_ENABLED); + properties.add(MAX_ASYNC_REQUESTS); + properties.add(ACK_TIMEOUT); + properties.add(PRIORITY_LEVEL); + properties.add(RECEIVER_QUEUE_SIZE); + properties.add(SUBSCRIPTION_TYPE); + + PROPERTIES = Collections.unmodifiableList(properties); + + final Set<Relationship> relationships = new HashSet<>(); + relationships.add(REL_SUCCESS); + RELATIONSHIPS = Collections.unmodifiableSet(relationships); + } + + @Override + public Set<Relationship> getRelationships() { + return RELATIONSHIPS; + } + + @Override + protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { + return PROPERTIES; + } + + @OnScheduled + public void init(ProcessContext context) { + pool = Executors.newFixedThreadPool(context.getProperty(MAX_ASYNC_REQUESTS).asInteger()); + completionService = new ExecutorCompletionService<>(pool); + } + + @OnUnscheduled + public void shutDown() { + // Stop all the async consumers + pool.shutdownNow(); + } + + @Override + public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { + + try { + if (context.getProperty(ASYNC_ENABLED).isSet() && context.getProperty(ASYNC_ENABLED).asBoolean()) { + // Launch consumers + consumeAsync(context, session); + + // Handle completed consumers + handleAsync(context, session); + + } else { + consume(context, session); + } + } catch (PulsarClientException e) { + getLogger().error("Unable to consume from Pulsar Topic ", e); + context.yield(); + throw new ProcessException(e); + } + + } + + private void handleAsync(ProcessContext context, ProcessSession session) { + + try { + Future<Message> done = completionService.take(); + Message msg = done.get(); + + if (msg != null) { + FlowFile flowFile = null; + final byte[] value = msg.getData(); + if (value != null && value.length > 0) { + flowFile = session.create(); + flowFile = session.write(flowFile, out -> { + out.write(value); + }); + } + + session.getProvenanceReporter().receive(flowFile, "From " + context.getProperty(TOPIC).getValue()); + session.transfer(flowFile, REL_SUCCESS); + session.commit(); + getWrappedConsumer(context).getConsumer().acknowledgeAsync(msg); + } + + } catch (InterruptedException | ExecutionException | PulsarClientException e) { + getLogger().error("Trouble consuming messages ", e); + } + + } + + @OnStopped + public void close(final ProcessContext context) { + + getLogger().info("Disconnecting Pulsar Consumer"); + if (consumer != null) { + + context.getProperty(PULSAR_CLIENT_SERVICE) + .asControllerService(PulsarClientPool.class) + .getConsumerPool().evict(consumer); + } + + consumer = null; + } + + /* + * For now let's assume that this processor will be configured to run for a longer + * duration than 0 milliseconds. So we will be grabbing as many messages off the topic + * as possible and committing them as FlowFiles + */ + private void consumeAsync(ProcessContext context, ProcessSession session) throws PulsarClientException { + + Consumer consumer = getWrappedConsumer(context).getConsumer(); + + completionService.submit(new Callable<Message>() { + @Override + public Message call() throws Exception { + return consumer.receiveAsync().get(); + } + }); + + } + + /* + * When this Processor expects to receive many small files, it may + * be advisable to create several FlowFiles from a single session + * before committing the session. Typically, this allows the Framework + * to treat the content of the newly created FlowFiles much more efficiently. + */ + private void consume(ProcessContext context, ProcessSession session) throws PulsarClientException { + + Consumer consumer = getWrappedConsumer(context).getConsumer(); + + final ComponentLog logger = getLogger(); + final Message msg; + FlowFile flowFile = null; + + try { + + msg = consumer.receive(); + final byte[] value = msg.getData(); + + if (value != null && value.length > 0) { + flowFile = session.create(); + flowFile = session.write(flowFile, out -> { + out.write(value); + }); + + session.getProvenanceReporter().receive(flowFile, "From " + context.getProperty(TOPIC).getValue()); + session.transfer(flowFile, REL_SUCCESS); --- End diff -- Indentation is off starting here.
---