Added detailed documentation for the ExecuteFlumeSource/Sink processors Signed-off-by: Matt Gilman <matt.c.gil...@gmail.com>
Project: http://git-wip-us.apache.org/repos/asf/incubator-nifi/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-nifi/commit/e7e15fac Tree: http://git-wip-us.apache.org/repos/asf/incubator-nifi/tree/e7e15fac Diff: http://git-wip-us.apache.org/repos/asf/incubator-nifi/diff/e7e15fac Branch: refs/heads/develop Commit: e7e15facc4587a7cd0862d3b8f00a280b15392fa Parents: 286e473 Author: Joey Echeverria <joe...@gmail.com> Authored: Sun Jul 12 18:44:16 2015 -0700 Committer: Matt Gilman <matt.c.gil...@gmail.com> Committed: Tue Jul 14 14:50:17 2015 -0400 ---------------------------------------------------------------------- .../nifi/processors/flume/ExecuteFlumeSink.java | 7 +- .../processors/flume/ExecuteFlumeSource.java | 4 +- .../org.apache.nifi.processor.Processor | 4 +- .../additionalDetails.html | 159 +++++++++++++++++++ .../additionalDetails.html | 114 +++++++++++++ 5 files changed, 280 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/e7e15fac/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/java/org/apache/nifi/processors/flume/ExecuteFlumeSink.java ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/java/org/apache/nifi/processors/flume/ExecuteFlumeSink.java b/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/java/org/apache/nifi/processors/flume/ExecuteFlumeSink.java index 8ccb2d1..94251b4 100644 --- a/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/java/org/apache/nifi/processors/flume/ExecuteFlumeSink.java +++ b/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/java/org/apache/nifi/processors/flume/ExecuteFlumeSink.java @@ -21,7 +21,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import java.util.List; import java.util.Set; -import org.apache.flume.Context; import org.apache.flume.EventDeliveryException; import org.apache.flume.Sink; import org.apache.flume.conf.Configurables; @@ -44,14 +43,14 @@ import org.apache.nifi.processor.util.StandardValidators; /** * This processor runs a Flume sink */ -@Tags({"flume", "hadoop", "get", "sink"}) -@CapabilityDescription("Write FlowFile data to a Flume sink") +@Tags({"flume", "hadoop", "put", "sink"}) +@CapabilityDescription("Execute a Flume sink. Each input FlowFile is converted into a Flume Event for processing by the sink.") @TriggerSerially public class ExecuteFlumeSink extends AbstractFlumeProcessor { public static final PropertyDescriptor SINK_TYPE = new PropertyDescriptor.Builder() .name("Sink Type") - .description("The fully-qualified name of the Sink class") + .description("The component type name for the sink. For some sinks, this is a short, symbolic name (e.g. hdfs). For others, it's the fully-qualified name of the Sink class. See the Flume User Guide for details.") .required(true) .addValidator(createSinkValidator()) .build(); http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/e7e15fac/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/java/org/apache/nifi/processors/flume/ExecuteFlumeSource.java ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/java/org/apache/nifi/processors/flume/ExecuteFlumeSource.java b/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/java/org/apache/nifi/processors/flume/ExecuteFlumeSource.java index fa02750..fa4fbe6 100644 --- a/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/java/org/apache/nifi/processors/flume/ExecuteFlumeSource.java +++ b/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/java/org/apache/nifi/processors/flume/ExecuteFlumeSource.java @@ -50,13 +50,13 @@ import org.apache.nifi.processor.util.StandardValidators; * This processor runs a Flume source */ @Tags({"flume", "hadoop", "get", "source"}) -@CapabilityDescription("Generate FlowFile data from a Flume source") +@CapabilityDescription("Execute a Flume source. Each Flume Event is sent to the success relationship as a FlowFile") @TriggerSerially public class ExecuteFlumeSource extends AbstractFlumeProcessor { public static final PropertyDescriptor SOURCE_TYPE = new PropertyDescriptor.Builder() .name("Source Type") - .description("The fully-qualified name of the Source class") + .description("The component type name for the source. For some sources, this is a short, symbolic name (e.g. spooldir). For others, it's the fully-qualified name of the Source class. See the Flume User Guide for details.") .required(true) .addValidator(createSourceValidator()) .build(); http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/e7e15fac/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor b/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor index fae8727..f5b57e1 100644 --- a/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor +++ b/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/META-INF/services/org.apache.nifi.processor.Processor @@ -12,5 +12,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -org.apache.nifi.processors.flume.FlumeSourceProcessor -org.apache.nifi.processors.flume.FlumeSinkProcessor +org.apache.nifi.processors.flume.ExecuteFlumeSource +org.apache.nifi.processors.flume.ExecuteFlumeSink http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/e7e15fac/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/docs/org.apache.nifi.processors.flume.ExecuteFlumeSink/additionalDetails.html ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/docs/org.apache.nifi.processors.flume.ExecuteFlumeSink/additionalDetails.html b/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/docs/org.apache.nifi.processors.flume.ExecuteFlumeSink/additionalDetails.html new file mode 100644 index 0000000..be8b99f --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/docs/org.apache.nifi.processors.flume.ExecuteFlumeSink/additionalDetails.html @@ -0,0 +1,159 @@ +<!DOCTYPE html> +<html lang="en"> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<head> + <meta charset="utf-8" /> + <title>ExecuteFlumeSink</title> + <link rel="stylesheet" href="../../css/component-usage.css" type="text/css" /> +</head> + +<body> +<h2>Data Model</h2> +<p> + This processor executes an Apache Flume sink. FlowFiles are wrapped in Flume's + Event interface. The content of the FlowFile becomes the body of the Event and + the attributes of the FlowFile become Event headers. The following special + headers are also set: +</p> +<table id="headers"> + <tr> + <th>Flume Event Header</th> + <th>FlowFile Attribute</th> + </tr> + <tr> + <td>nifi.entry.date</td> + <td>FlowFile#getEntryDate()</td> + </tr> + <tr> + <td>nifi.id</td> + <td>FlowFile#getId()</td> + </tr> + <tr> + <td>nifi.last.queue.date</td> + <td>FlowFile#getLastQueueDate()</td> + </tr> + <tr> + <td>nifi.lineage.identifiers.${i}</td> + <td>FlowFile#getLineageIdentifiers()[i]</td> + </tr> + <tr> + <td>nifi.lineage.start.date</td> + <td>FlowFile#getLineageStartDate()</td> + </tr> + <tr> + <td>nifi.size</td> + <td>FlowFile#getSize()</td> + </tr> +</table> +<h2>Warning</h2> +<p> + In NiFi, the contents of a FlowFile are accessed via a stream, but in Flume it is + stored in a byte array. This means the full content will be loaded into memory when + a FlowFile is processed by the ExecuteFlumeSink processor. You should consider the + typical size of the FlowFiles you'll process and the batch size, if any, your sink + is configured with when setting NiFi's heap size. +</p> +<h2>Configuration Details</h2> +<p> + This processor is designed to execute arbitrary Flume sinks. Most of the details + of configuring the sink is deferred to Flume's built-in configuration system. + For details on the available settings for each sink type, refer to the Flume + <a href="http://flume.apache.org/FlumeUserGuide.html#flume-sinks">User Guide</a>. + Configuring the Flume sink is a four step process: +</p> +<ol> + <li>Set the Sink Type property to a valid Flume sink type.</li> + <li> + Set the Agent Name property to the name of the agent in your + Flume configuration. This is the prefix of the properties in the Flume + configuration file. Example: <code>tier1</code> + </li> + <li> + Set the Sink Name property to the name of the sink in your Flume + configuration. If Agent Name is <code>tier1</code>, then the Sink Name + is the value of the <code>tier1.sinks</code> property. Example: <code>sink-1</code> + </li> + <li> + Copy and paste the configuration for the sink from your Flume configuration + file into the Flume Configuration property. Assuming you're using + the same Agent Name and Sink Name as in the examples above, this will be all + of the properties that start with <code>tier1.sinks.sink-1</code>. + Do not copy the <code>tier1.sinks.sink-1.type</code> or + <code>tier1.sinks.sink-1.channel</code> properties. + </li> +</ol> +<h2>Usage Example</h2> +<p> + Assuming you had the following existing Flume configuration file: +</p> + <pre> +a1.sources = r1 +a1.sinks = k1 +a1.channels = c1 + +a1.sources.r1.type = netcat +a1.sources.r1.bind = localhost +a1.sources.r1.port = 44444 + +a1.sinks.k1.type = hdfs +a1.sinks.k1.channel = c1 +a1.sinks.k1.hdfs.path = /flume/events/%y-%m-%d/%H%M/%S +a1.sinks.k1.hdfs.filePrefix = events- +a1.sinks.k1.hdfs.round = true +a1.sinks.k1.hdfs.roundValue = 10 +a1.sinks.k1.hdfs.roundUnit = minute + +a1.channels.c1.type = memory +a1.channels.c1.capacity = 1000 +a1.channels.c1.transactionCapacity = 100 + +a1.sources.r1.channels = c1 +a1.sinks.k1.channel = c1</pre> +<p> + Then you'd configure the ExecuteFlumeSink as follows: +</p> +<table id="example"> + <tr> + <th>Property</th> + <th>Value</th> + </tr> + <tr> + <td>Sink Type</td> + <td>hdfs</td> + </tr> + <tr> + <td>Agent Name</td> + <td>a1</td> + </tr> + <tr> + <td>Sink Name</td> + <td>k1</td> + </tr> + <tr> + <td>Flume Configuration</td> + <td> + <code> + a1.sinks.k1.hdfs.path = /flume/events/%y-%m-%d/%H%M/%S<br> + a1.sinks.k1.hdfs.filePrefix = events-<br> + a1.sinks.k1.hdfs.round = true<br> + a1.sinks.k1.hdfs.roundValue = 10<br> + a1.sinks.k1.hdfs.roundUnit = minute + </code> + </td> + </tr> +</table> +</body> +</html> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-nifi/blob/e7e15fac/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/docs/org.apache.nifi.processors.flume.ExecuteFlumeSource/additionalDetails.html ---------------------------------------------------------------------- diff --git a/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/docs/org.apache.nifi.processors.flume.ExecuteFlumeSource/additionalDetails.html b/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/docs/org.apache.nifi.processors.flume.ExecuteFlumeSource/additionalDetails.html new file mode 100644 index 0000000..3d1062b --- /dev/null +++ b/nifi/nifi-nar-bundles/nifi-flume-bundle/nifi-flume-processors/src/main/resources/docs/org.apache.nifi.processors.flume.ExecuteFlumeSource/additionalDetails.html @@ -0,0 +1,114 @@ +<!DOCTYPE html> +<html lang="en"> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<head> + <meta charset="utf-8" /> + <title>ExecuteFlumeSource</title> + <link rel="stylesheet" href="../../css/component-usage.css" type="text/css" /> +</head> + +<body> +<h2>Data Model</h2> +<p> + This processor executes an Apache Flume source. Each Flume Event is turned into + a FlowFile. The content of the FlowFile is set to the body of the Event and + the Event headers become FlowFile attributes. +</p> +<h2>Configuration Details</h2> +<p> + This processor is designed to execute arbitrary Flume sources. Most of the details + of configuring the source is deferred to Flume's built-in configuration system. + For details on the available settings for each source type, refer to the Flume + <a href="http://flume.apache.org/FlumeUserGuide.html#flume-sources">User Guide</a>. + Configuring the Flume source is a four step process: +</p> +<ol> + <li>Set the Source Type property to a valid Flume source type.</li> + <li> + Set the Agent Name property to the name of the agent in your + Flume configuration. This is the prefix of the properties in the Flume + configuration file. Example: <code>tier1</code> + </li> + <li> + Set the Source Name property to the name of the source in your Flume + configuration. If Agent Name is <code>tier1</code>, then the Source Name + is the value of the <code>tier1.sources</code> property. Example: <code>src-1</code> + </li> + <li> + Copy and paste the configuration for the source from your Flume configuration + file into the Flume Configuration property. Assuming you're using + the same Agent Name and Source Name as in the examples above, this will be all + of the properties that start with <code>tier1.sources.src-1</code>. + Do not copy the <code>tier1.sources.src-1.type</code> or + <code>tier1.sources.src-1.channel</code> properties. + </li> +</ol> +<h2>Usage Example</h2> +<p> + Assuming you had the following existing Flume configuration file: +</p> + <pre> +a1.sources = r1 +a1.sinks = k1 +a1.channels = c1 + +a1.sources.r1.type = multiport_syslogtcp +a1.sources.r1.channels = c1 +a1.sources.r1.host = 0.0.0.0 +a1.sources.r1.ports = 10001 10002 10003 +a1.sources.r1.portHeader = port + +a1.sinks.k1.type = logger + +a1.channels.c1.type = memory +a1.channels.c1.capacity = 1000 +a1.channels.c1.transactionCapacity = 100 + +a1.sources.r1.channels = c1 +a1.sinks.k1.channel = c1</pre> +<p> + Then you'd configure the ExecuteFlumeSource as follows: +</p> +<table id="example"> + <tr> + <th>Property</th> + <th>Value</th> + </tr> + <tr> + <td>Source Type</td> + <td>multiport_syslogtcp</td> + </tr> + <tr> + <td>Agent Name</td> + <td>a1</td> + </tr> + <tr> + <td>Source Name</td> + <td>r1</td> + </tr> + <tr> + <td>Flume Configuration</td> + <td> + <code> + a1.sources.r1.host = 0.0.0.0<br> + a1.sources.r1.ports = 10001 10002 10003<br> + a1.sources.r1.portHeader = port + </code> + </td> + </tr> +</table> +</body> +</html> \ No newline at end of file