Repository: nifi Updated Branches: refs/heads/master 78daaf40f -> 2590c8831
NIFI-4979 - Fix ReportLineageToAtlas documentation errors Signed-off-by: Pierre Villard <pierre.villard...@gmail.com> This closes #2549. Project: http://git-wip-us.apache.org/repos/asf/nifi/repo Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/2590c883 Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/2590c883 Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/2590c883 Branch: refs/heads/master Commit: 2590c883187f0a4828151504b7dd48441eaff3fb Parents: 78daaf4 Author: Koji Kawamura <ijokaruma...@apache.org> Authored: Thu Mar 15 12:22:31 2018 +0900 Committer: Pierre Villard <pierre.villard...@gmail.com> Committed: Mon Mar 19 17:56:13 2018 +0100 ---------------------------------------------------------------------- .../atlas/reporting/ReportLineageToAtlas.java | 16 ++-- .../additionalDetails.html | 95 ++++++++++++++++---- 2 files changed, 91 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nifi/blob/2590c883/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/java/org/apache/nifi/atlas/reporting/ReportLineageToAtlas.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/java/org/apache/nifi/atlas/reporting/ReportLineageToAtlas.java b/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/java/org/apache/nifi/atlas/reporting/ReportLineageToAtlas.java index f1d9018..6ec4efb 100644 --- a/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/java/org/apache/nifi/atlas/reporting/ReportLineageToAtlas.java +++ b/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/java/org/apache/nifi/atlas/reporting/ReportLineageToAtlas.java @@ -92,10 +92,12 @@ import static org.apache.nifi.reporting.util.provenance.ProvenanceEventConsumer. import static org.apache.nifi.reporting.util.provenance.ProvenanceEventConsumer.PROVENANCE_START_POSITION; @Tags({"atlas", "lineage"}) -@CapabilityDescription("Publishes NiFi flow data set level lineage to Apache Atlas." + - " By reporting flow information to Atlas, an end-to-end Process and DataSet lineage such as across NiFi environments and other systems" + - " connected by technologies, for example NiFi Site-to-Site, Kafka topic or Hive tables." + - " There are limitations and required configurations for both NiFi and Atlas. See 'Additional Details' for further description.") +@CapabilityDescription("Report NiFi flow data set level lineage to Apache Atlas." + + " End-to-end lineages across NiFi environments and other systems can be reported if those are" + + " connected by different protocols and data set, such as NiFi Site-to-Site, Kafka topic or Hive tables ... etc." + + " Atlas lineage reported by this reporting task can be useful to grasp the high level relationships between processes and data sets," + + " in addition to NiFi provenance events providing detailed event level lineage." + + " See 'Additional Details' for further description and limitations.") @Stateful(scopes = Scope.LOCAL, description = "Stores the Reporting Task's last event Id so that on restart the task knows where it left off.") @DynamicProperty(name = "hostnamePattern.<ClusterName>", value = "hostname Regex patterns", description = RegexClusterResolver.PATTERN_PROPERTY_PREFIX_DESC) // In order for each reporting task instance to have its own static objects such as KafkaNotification. @@ -268,7 +270,11 @@ public class ReportLineageToAtlas extends AbstractReportingTask { static final PropertyDescriptor NIFI_LINEAGE_STRATEGY = new PropertyDescriptor.Builder() .name("nifi-lineage-strategy") .displayName("NiFi Lineage Strategy") - .description("Specifies granularity on how NiFi data flow should be reported to Atlas.") + .description("Specifies granularity on how NiFi data flow should be reported to Atlas." + + " NOTE: It is strongly recommended to keep using the same strategy once this reporting task started to keep Atlas data clean." + + " Switching strategies will not delete Atlas entities created by the old strategy." + + " Having mixed entities created by different strategies makes Atlas lineage graph noisy." + + " For more detailed description on each strategy and differences, refer 'NiFi Lineage Strategy' section in Additional Details.") .required(true) .allowableValues(LINEAGE_STRATEGY_SIMPLE_PATH, LINEAGE_STRATEGY_COMPLETE_PATH) .defaultValue(LINEAGE_STRATEGY_SIMPLE_PATH.getValue()) http://git-wip-us.apache.org/repos/asf/nifi/blob/2590c883/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/resources/docs/org.apache.nifi.atlas.reporting.ReportLineageToAtlas/additionalDetails.html ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/resources/docs/org.apache.nifi.atlas.reporting.ReportLineageToAtlas/additionalDetails.html b/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/resources/docs/org.apache.nifi.atlas.reporting.ReportLineageToAtlas/additionalDetails.html index 91c3979..cd82079 100644 --- a/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/resources/docs/org.apache.nifi.atlas.reporting.ReportLineageToAtlas/additionalDetails.html +++ b/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/resources/docs/org.apache.nifi.atlas.reporting.ReportLineageToAtlas/additionalDetails.html @@ -311,27 +311,80 @@ Processor 3</pre> </tr> <tr> - <td>NiFiRemotePort</td> - <td> - Remote Input Port<br/> - Remote Output Port + <td rowspan="4">NiFiRemotePort</td> + <td rowspan="2"> + Remote Input Port </td> - <td> - SEND<br/> - RECEIVE<br/> + <td rowspan="2"> + SEND </td> - <td> + <td rowspan="2"> <ul> <li>http://nifi1.example.com:8080/nifi-api/data-transfer/input-ports/35dbc0ab-015e-1000-144c-a8d71255027d/transactions/89335043-f105-4de7-a0ef-46f2ef0c7c51/flow-files</li> <li>nifi://nifi1.example.com:8081/cb729f05-b2ee-4488-909d-c6696cc34588</li> </ul> </td> <td> - nifi_input_port<br/> + nifi_input_port + </td> + <td>rootGroupPortGUID@clusterName + (e.g. 35dbc0ab-015e-1000-144c-a8d71255027d@cl1)</td> + <td rowspan="4"><strong>NOTE:</strong>Only HTTP S2S protocol is supported. RAW support may be added in the future as it needs NiFi code modification. See <a href="https://issues.apache.org/jira/browse/NIFI-4654">NIFI-4654</a> for detail.</td> + </tr> + <tr> + <td> + With 'Simple Path' strategy intermediate 'nifi_queue' and 'nifi_flow_path' are created as well (marked with + in the following example) + <pre> +upstream (nifi_flow_path) + -> + queue (nifi_queue) + -> + Remote Input Port + (nifi_flowPath) + -> remote target port + (nifi_input_port) + </pre> + </td> + <td>remoteInputPortGUID@clusterName<br/>(e.g. f31a6b53-3077-4c59-144c-a8d71255027d@cl1) + <p>NOTE: The remoteInputPortGUID is the client side component ID and different from the remote target port GUID. Multiple Remote Input Ports can send to the same target remote input port.</p></td> + </tr> + <tr> + <td rowspan="2"> + Remote Output Port + </td> + <td rowspan="2"> + RECEIVE + </td> + <td rowspan="2"> + <ul> + <li>http://nifi1.example.com:8080/nifi-api/data-transfer/output-ports/45dbc0ab-015e-1000-144c-a8d71255027d/transactions/99335043-f105-4de7-a0ef-46f2ef0c7c51/flow-files</li> + <li>nifi://nifi1.example.com:8081/db729f05-b2ee-4488-909d-c6696cc34588</li> + </ul> + </td> + <td> nifi_output_port </td> - <td>remotePortGUID@clusterName<br/>(e.g. 35dbc0ab-015e-1000-144c-a8d71255027d@cl1)</td> - <td><strong>NOTE:</strong>Only HTTP S2S protocol is supported. RAW support may be added in the future as it needs NiFi code modification. See <a href="https://issues.apache.org/jira/browse/NIFI-4654">NIFI-4654</a> for detail.</td> + <td>rootGroupPortGUID@clusterName + (e.g. 45dbc0ab-015e-1000-144c-a8d71255027d@cl1)</td> + </tr> + <tr> + <td> + With 'Simple Path' strategy intermediate 'nifi_flow_path' and 'nifi_queue' are created as well (marked with + in the following example) + <pre> +remote target port +(nifi_output_port) + -> + Remote Output Port + (nifi_flow_path) + -> + queue (nifi_queue) + -> downstream + (nifi_flow_path) + </pre> + </td> + <td> + <ul> + <li>For 'nifi_flow_path': remoteOutputPortGUID@clusterName<br/>(e.g. 7375f8f6-4604-468d-144c-a8d71255027d@cl1) + <p>NOTE: The remoteOutputPortGUID is the client side component ID and different from the remote target port GUID. Multiple Remote Output Ports can pull from the same target remote output port.</p></li> + <li>For 'nifi_queue': downstreamPathGUID@clusterName<br/>(e.g. bb530e58-ee14-3cac-144c-a8d71255027d@cl1)</li> + </ul> + </td> </tr> <tr> <td>NiFiRootGroupPort</td> @@ -343,9 +396,17 @@ Processor 3</pre> RECEIVE<br/> SEND<br/> </td> - <td>(Same as Remote Input/Output Port)</td> - <td>(Same as above)</td> - <td>(Same as above)</td> + <td> + <ul> + <li>http://nifi1.example.com:8080/nifi-api/data-transfer/input-ports/35dbc0ab-015e-1000-144c-a8d71255027d/transactions/89335043-f105-4de7-a0ef-46f2ef0c7c51/flow-files</li> + <li>nifi://nifi1.example.com:8081/cb729f05-b2ee-4488-909d-c6696cc34588</li> + </ul> + </td> + <td> + nifi_input_port<br/> + nifi_output_port + </td> + <td>rootGroupPortGUID@clusterName<br/>(e.g. 35dbc0ab-015e-1000-144c-a8d71255027d@cl1)</td> <td></td> </tr> <tr> @@ -372,7 +433,7 @@ Processor 3</pre> </td> <td>kafka_topic</td> <td>topicName@clusterName<br/>(e.g. testTopic@cl1)</td> - <td><strong>NOTE:</strong>With Atlas 0.8.2, the same topic name in different clusters can not be created using the pre-built 'kafka_topic'. See <a href="https://issues.apache.org/jira/browse/ATLAS-2286">ATLAS-2286</a>.</td> + <td><strong>NOTE:</strong>With Atlas earlier than 0.8.2, the same topic name in different clusters can not be created using the pre-built 'kafka_topic'. See <a href="https://issues.apache.org/jira/browse/ATLAS-2286">ATLAS-2286</a>.</td> </tr> <tr> <td>PutHiveStreaming</td> @@ -422,6 +483,7 @@ Processor 3</pre> <td>hdfs://nn.example.com:8020/user/nifi/5262553828219</td> <td>hdfs_path</td> <td>/path/fileName@clusterName<br/>(e.g. /app/warehouse/hive/db/default@cl1)</td> + <td></td> </tr> <tr> <td>HBaseTable</td> @@ -444,6 +506,7 @@ Processor 3</pre> <td>hbase://hmaster.example.com:16000/tableA/rowX</td> <td>hbase_table</td> <td>tableName@clusterName<br/>(e.g. myTable@cl1)</td> + <td></td> </tr> <tr> <td>FilePath</td> @@ -460,6 +523,7 @@ Processor 3</pre> <td>file:///tmp/a.txt</td> <td>fs_path</td> <td>/path/fileName@hostname<br/>(e.g. /tmp/dir/filename....@host.example.com)</td> + <td></td> </tr> <tr id="unknown-datasets"> @@ -475,6 +539,7 @@ Processor 3</pre> <td></td> <td>nifi_data</td> <td>processorGuid@clusterName<br/>db8bb12c-5cd3-3011-c971-579f460ebedf@cl1</td> + <td></td> </tr> </table>