Github user MikeThomsen commented on a diff in the pull request: https://github.com/apache/nifi/pull/2553#discussion_r174880893 --- Diff: nifi-nar-bundles/nifi-pulsar-bundle/nifi-pulsar-processors/src/main/java/org/apache/nifi/processors/pulsar/ConsumePulsar_1_0.java --- @@ -0,0 +1,392 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.pulsar; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Properties; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; + +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.annotation.lifecycle.OnScheduled; +import org.apache.nifi.annotation.lifecycle.OnStopped; +import org.apache.nifi.annotation.lifecycle.OnUnscheduled; +import org.apache.nifi.components.AllowableValue; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.pulsar.PulsarClientPool; +import org.apache.nifi.pulsar.PulsarConsumer; +import org.apache.nifi.pulsar.pool.PulsarConsumerFactory; +import org.apache.pulsar.client.api.Consumer; +import org.apache.pulsar.client.api.ConsumerConfiguration; +import org.apache.pulsar.client.api.Message; +import org.apache.pulsar.client.api.PulsarClientException; +import org.apache.pulsar.client.api.SubscriptionType; + +@Tags({"Pulsar", "Get", "Ingest", "Ingress", "Topic", "PubSub", "Consume"}) +@CapabilityDescription("Consumes messages from Apache Pulsar " + + "The complementary NiFi processor for sending messages is PublishPulsar.") +@InputRequirement(InputRequirement.Requirement.INPUT_FORBIDDEN) +public class ConsumePulsar_1_0 extends AbstractPulsarProcessor { + + static final AllowableValue EXCLUSIVE = new AllowableValue("Exclusive", "Exclusive", "There can be only 1 consumer on the same topic with the same subscription name"); + static final AllowableValue SHARED = new AllowableValue("Shared", "Shared", "Multiple consumer will be able to use the same subscription name and the messages"); + static final AllowableValue FAILOVER = new AllowableValue("Failover", "Failover", "Multiple consumer will be able to use the same subscription name but only 1 consumer " + + "will receive the messages. If that consumer disconnects, one of the other connected consumers will start receiving messages"); + + protected static final PropertyDescriptor TOPIC = new PropertyDescriptor.Builder() + .name("topic") + .displayName("Topic Name") + .description("The name of the Pulsar Topic.") + .required(true) + .addValidator(StandardValidators.NON_BLANK_VALIDATOR) + .build(); + + static final PropertyDescriptor SUBSCRIPTION = new PropertyDescriptor.Builder() + .name("Subscription") + .displayName("Subscription Name") + .description("The name of the Pulsar subscription to consume from.") + .required(true) + .addValidator(StandardValidators.NON_BLANK_VALIDATOR) + .build(); + + public static final PropertyDescriptor ASYNC_ENABLED = new PropertyDescriptor.Builder() + .name("Async Enabled") + .description("Control whether the messages will be consumed asyncronously or not. Messages consumed" + + " syncronously will be acknowledged immediately before processing the next message, while" + + " asyncronous messages will be acknowledged after the Pulsar broker responds.") + .required(true) + .addValidator(StandardValidators.BOOLEAN_VALIDATOR) + .defaultValue("false") + .build(); + + public static final PropertyDescriptor MAX_ASYNC_REQUESTS = new PropertyDescriptor.Builder() + .name("Maximum Async Requests") + .description("The maximum number of outstanding asynchronous consumer requests for this processor. " + + "Each asynchronous call requires memory, so avoid setting this value to high.") + .required(false) + .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) + .defaultValue("50") + .build(); + + public static final PropertyDescriptor ACK_TIMEOUT = new PropertyDescriptor.Builder() + .name("Acknowledgment Timeout") + .description("Set the timeout (in milliseconds) for unacked messages, truncated to the " + + "nearest millisecond. The timeout needs to be greater than 10 seconds.") + .required(false) + .addValidator(StandardValidators.POSITIVE_LONG_VALIDATOR) + .defaultValue("10000") + .build(); + + public static final PropertyDescriptor PRIORITY_LEVEL = new PropertyDescriptor.Builder() + .name("Consumer Priority Level") + .description("Sets priority level for the shared subscription consumers to which broker " + + "gives more priority while dispatching messages. Here, broker follows descending " + + "priorities. (eg: 0=max-priority, 1, 2,..) ") + .required(false) + .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) + .defaultValue("5") + .build(); + + public static final PropertyDescriptor RECEIVER_QUEUE_SIZE = new PropertyDescriptor.Builder() + .name("Consumer receive queue size.") + .description("The consumer receive queue controls how many messages can be accumulated " + + "by the Consumer before the application calls Consumer.receive(). Using a higher " + + "value could potentially increase the consumer throughput at the expense of bigger " + + "memory utilization. \n" + + "Setting the consumer queue size as zero, \n" + + "\t - Decreases the throughput of the consumer, by disabling pre-fetching of messages. \n" + + "\t - Doesn't support Batch-Message: if consumer receives any batch-message then it closes consumer " + + "connection with broker and consumer will not be able receive any further message unless batch-message " + + "in pipeline is removed") + .required(false) + .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) + .defaultValue("1000") + .build(); + + public static final PropertyDescriptor SUBSCRIPTION_TYPE = new PropertyDescriptor.Builder() + .name("Subscription Type") + .description("Select the subscription type to be used when subscribing to the topic.") + .required(false) + .allowableValues(EXCLUSIVE, SHARED, FAILOVER) + .defaultValue(SHARED.getValue()) + .build(); + + private static final List<PropertyDescriptor> PROPERTIES; + private static final Set<Relationship> RELATIONSHIPS; + + // Reuse the same consumer for a given topic / subscription + private PulsarConsumer consumer; + private ConsumerConfiguration consumerConfig; + + // Pool for running multiple consume Async requests + ExecutorService pool; + ExecutorCompletionService<Message> completionService; + + static { + final List<PropertyDescriptor> properties = new ArrayList<>(); + properties.add(PULSAR_CLIENT_SERVICE); + properties.add(TOPIC); + properties.add(SUBSCRIPTION); + properties.add(ASYNC_ENABLED); + properties.add(MAX_ASYNC_REQUESTS); + properties.add(ACK_TIMEOUT); + properties.add(PRIORITY_LEVEL); + properties.add(RECEIVER_QUEUE_SIZE); + properties.add(SUBSCRIPTION_TYPE); + + PROPERTIES = Collections.unmodifiableList(properties); + + final Set<Relationship> relationships = new HashSet<>(); + relationships.add(REL_SUCCESS); + RELATIONSHIPS = Collections.unmodifiableSet(relationships); + } + + @Override + public Set<Relationship> getRelationships() { + return RELATIONSHIPS; + } + + @Override + protected List<PropertyDescriptor> getSupportedPropertyDescriptors() { + return PROPERTIES; + } + + @OnScheduled + public void init(ProcessContext context) { + pool = Executors.newFixedThreadPool(context.getProperty(MAX_ASYNC_REQUESTS).asInteger()); + completionService = new ExecutorCompletionService<>(pool); + } + + @OnUnscheduled + public void shutDown() { + // Stop all the async consumers + pool.shutdownNow(); + } + + @Override + public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { + + try { + if (context.getProperty(ASYNC_ENABLED).isSet() && context.getProperty(ASYNC_ENABLED).asBoolean()) { --- End diff -- You don't have to call `isSet` here because it's a required field.
---