NIFI-1156: Updated version from 0.4.0-SNAPSHOT to 0.4.2-SNAPSHOT; addressed some error handling issues; updated licensing information
Project: http://git-wip-us.apache.org/repos/asf/nifi/repo Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/2c9fb676 Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/2c9fb676 Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/2c9fb676 Branch: refs/heads/NIFI-259 Commit: 2c9fb676cdb9c5831b7b1c92e5e7c9936c911e58 Parents: 8f688d4 ee7400e Author: Mark Payne <[email protected]> Authored: Wed Jan 20 15:27:58 2016 -0500 Committer: Mark Payne <[email protected]> Committed: Wed Jan 20 15:27:58 2016 -0500 ---------------------------------------------------------------------- nifi-assembly/LICENSE | 60 +++- nifi-assembly/pom.xml | 5 + .../nifi-html-bundle/nifi-html-nar/pom.xml | 40 +++ .../src/main/resources/META-INF/LICENSE | 240 ++++++++++++++++ .../src/main/resources/META-INF/NOTICE | 19 ++ .../nifi-html-processors/pom.xml | 71 +++++ .../org/apache/nifi/AbstractHTMLProcessor.java | 130 +++++++++ .../java/org/apache/nifi/GetHTMLElement.java | 241 ++++++++++++++++ .../java/org/apache/nifi/ModifyHTMLElement.java | 179 ++++++++++++ .../java/org/apache/nifi/PutHTMLElement.java | 165 +++++++++++ .../org.apache.nifi.processor.Processor | 17 ++ .../java/org/apache/nifi/AbstractHTMLTest.java | 28 ++ .../org/apache/nifi/TestGetHTMLElement.java | 275 +++++++++++++++++++ .../org/apache/nifi/TestModifyHTMLElement.java | 204 ++++++++++++++ .../org/apache/nifi/TestPutHTMLElement.java | 127 +++++++++ .../src/test/resources/Weather.html | 25 ++ nifi-nar-bundles/nifi-html-bundle/pom.xml | 43 +++ nifi-nar-bundles/pom.xml | 1 + pom.xml | 13 +- 19 files changed, 1881 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-assembly/LICENSE ---------------------------------------------------------------------- diff --cc nifi-assembly/LICENSE index e21ecc9,d12843b..ca4a880 --- a/nifi-assembly/LICENSE +++ b/nifi-assembly/LICENSE @@@ -471,6 -449,6 +471,38 @@@ For details see http://www.abeautifulsi FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++This product bundles 'jsoup' which is available under the MIT License. ++For details see http://jsoup.org/ ++ ++ jsoup License ++ The jsoup code-base (include source and compiled packages) are distributed under the open source MIT license as described below. ++ ++ The MIT License ++ Copyright © 2009 - 2013 Jonathan Hedley ([email protected]) ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, ++ copy, modify, merge, publish, distribute, sublicense, and/or sell ++ copies of the Software, and to permit persons to whom the ++ Software is furnished to do so, subject to the following ++ conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ OTHER DEALINGS IN THE SOFTWARE. ++ ++ ++ This product bundles 'json2.js' which is available in the 'public domain'. For details see https://github.com/douglascrockford/JSON-js http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-assembly/pom.xml ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/pom.xml ---------------------------------------------------------------------- diff --cc nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/pom.xml index 0000000,fd23f7b..97b4322 mode 000000,100644..100644 --- a/nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/pom.xml +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/pom.xml @@@ -1,0 -1,41 +1,40 @@@ + <?xml version="1.0" encoding="UTF-8"?> + <!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-html-bundle</artifactId> - <version>0.4.0-SNAPSHOT</version> ++ <version>0.4.2-SNAPSHOT</version> + </parent> + + <artifactId>nifi-html-nar</artifactId> + <packaging>nar</packaging> + + <dependencies> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-standard-services-api-nar</artifactId> + <type>nar</type> + </dependency> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-html-processors</artifactId> - <version>0.4.0-SNAPSHOT</version> + </dependency> + </dependencies> + + </project> http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/src/main/resources/META-INF/LICENSE ---------------------------------------------------------------------- diff --cc nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/src/main/resources/META-INF/LICENSE index 0000000,0000000..c62123e new file mode 100644 --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/src/main/resources/META-INF/LICENSE @@@ -1,0 -1,0 +1,240 @@@ ++ ++ Apache License ++ Version 2.0, January 2004 ++ http://www.apache.org/licenses/ ++ ++ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION ++ ++ 1. Definitions. ++ ++ "License" shall mean the terms and conditions for use, reproduction, ++ and distribution as defined by Sections 1 through 9 of this document. ++ ++ "Licensor" shall mean the copyright owner or entity authorized by ++ the copyright owner that is granting the License. ++ ++ "Legal Entity" shall mean the union of the acting entity and all ++ other entities that control, are controlled by, or are under common ++ control with that entity. For the purposes of this definition, ++ "control" means (i) the power, direct or indirect, to cause the ++ direction or management of such entity, whether by contract or ++ otherwise, or (ii) ownership of fifty percent (50%) or more of the ++ outstanding shares, or (iii) beneficial ownership of such entity. ++ ++ "You" (or "Your") shall mean an individual or Legal Entity ++ exercising permissions granted by this License. ++ ++ "Source" form shall mean the preferred form for making modifications, ++ including but not limited to software source code, documentation ++ source, and configuration files. ++ ++ "Object" form shall mean any form resulting from mechanical ++ transformation or translation of a Source form, including but ++ not limited to compiled object code, generated documentation, ++ and conversions to other media types. ++ ++ "Work" shall mean the work of authorship, whether in Source or ++ Object form, made available under the License, as indicated by a ++ copyright notice that is included in or attached to the work ++ (an example is provided in the Appendix below). ++ ++ "Derivative Works" shall mean any work, whether in Source or Object ++ form, that is based on (or derived from) the Work and for which the ++ editorial revisions, annotations, elaborations, or other modifications ++ represent, as a whole, an original work of authorship. For the purposes ++ of this License, Derivative Works shall not include works that remain ++ separable from, or merely link (or bind by name) to the interfaces of, ++ the Work and Derivative Works thereof. ++ ++ "Contribution" shall mean any work of authorship, including ++ the original version of the Work and any modifications or additions ++ to that Work or Derivative Works thereof, that is intentionally ++ submitted to Licensor for inclusion in the Work by the copyright owner ++ or by an individual or Legal Entity authorized to submit on behalf of ++ the copyright owner. For the purposes of this definition, "submitted" ++ means any form of electronic, verbal, or written communication sent ++ to the Licensor or its representatives, including but not limited to ++ communication on electronic mailing lists, source code control systems, ++ and issue tracking systems that are managed by, or on behalf of, the ++ Licensor for the purpose of discussing and improving the Work, but ++ excluding communication that is conspicuously marked or otherwise ++ designated in writing by the copyright owner as "Not a Contribution." ++ ++ "Contributor" shall mean Licensor and any individual or Legal Entity ++ on behalf of whom a Contribution has been received by Licensor and ++ subsequently incorporated within the Work. ++ ++ 2. Grant of Copyright License. Subject to the terms and conditions of ++ this License, each Contributor hereby grants to You a perpetual, ++ worldwide, non-exclusive, no-charge, royalty-free, irrevocable ++ copyright license to reproduce, prepare Derivative Works of, ++ publicly display, publicly perform, sublicense, and distribute the ++ Work and such Derivative Works in Source or Object form. ++ ++ 3. Grant of Patent License. Subject to the terms and conditions of ++ this License, each Contributor hereby grants to You a perpetual, ++ worldwide, non-exclusive, no-charge, royalty-free, irrevocable ++ (except as stated in this section) patent license to make, have made, ++ use, offer to sell, sell, import, and otherwise transfer the Work, ++ where such license applies only to those patent claims licensable ++ by such Contributor that are necessarily infringed by their ++ Contribution(s) alone or by combination of their Contribution(s) ++ with the Work to which such Contribution(s) was submitted. If You ++ institute patent litigation against any entity (including a ++ cross-claim or counterclaim in a lawsuit) alleging that the Work ++ or a Contribution incorporated within the Work constitutes direct ++ or contributory patent infringement, then any patent licenses ++ granted to You under this License for that Work shall terminate ++ as of the date such litigation is filed. ++ ++ 4. Redistribution. You may reproduce and distribute copies of the ++ Work or Derivative Works thereof in any medium, with or without ++ modifications, and in Source or Object form, provided that You ++ meet the following conditions: ++ ++ (a) You must give any other recipients of the Work or ++ Derivative Works a copy of this License; and ++ ++ (b) You must cause any modified files to carry prominent notices ++ stating that You changed the files; and ++ ++ (c) You must retain, in the Source form of any Derivative Works ++ that You distribute, all copyright, patent, trademark, and ++ attribution notices from the Source form of the Work, ++ excluding those notices that do not pertain to any part of ++ the Derivative Works; and ++ ++ (d) If the Work includes a "NOTICE" text file as part of its ++ distribution, then any Derivative Works that You distribute must ++ include a readable copy of the attribution notices contained ++ within such NOTICE file, excluding those notices that do not ++ pertain to any part of the Derivative Works, in at least one ++ of the following places: within a NOTICE text file distributed ++ as part of the Derivative Works; within the Source form or ++ documentation, if provided along with the Derivative Works; or, ++ within a display generated by the Derivative Works, if and ++ wherever such third-party notices normally appear. The contents ++ of the NOTICE file are for informational purposes only and ++ do not modify the License. You may add Your own attribution ++ notices within Derivative Works that You distribute, alongside ++ or as an addendum to the NOTICE text from the Work, provided ++ that such additional attribution notices cannot be construed ++ as modifying the License. ++ ++ You may add Your own copyright statement to Your modifications and ++ may provide additional or different license terms and conditions ++ for use, reproduction, or distribution of Your modifications, or ++ for any such Derivative Works as a whole, provided Your use, ++ reproduction, and distribution of the Work otherwise complies with ++ the conditions stated in this License. ++ ++ 5. Submission of Contributions. Unless You explicitly state otherwise, ++ any Contribution intentionally submitted for inclusion in the Work ++ by You to the Licensor shall be under the terms and conditions of ++ this License, without any additional terms or conditions. ++ Notwithstanding the above, nothing herein shall supersede or modify ++ the terms of any separate license agreement you may have executed ++ with Licensor regarding such Contributions. ++ ++ 6. Trademarks. This License does not grant permission to use the trade ++ names, trademarks, service marks, or product names of the Licensor, ++ except as required for reasonable and customary use in describing the ++ origin of the Work and reproducing the content of the NOTICE file. ++ ++ 7. Disclaimer of Warranty. Unless required by applicable law or ++ agreed to in writing, Licensor provides the Work (and each ++ Contributor provides its Contributions) on an "AS IS" BASIS, ++ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or ++ implied, including, without limitation, any warranties or conditions ++ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A ++ PARTICULAR PURPOSE. You are solely responsible for determining the ++ appropriateness of using or redistributing the Work and assume any ++ risks associated with Your exercise of permissions under this License. ++ ++ 8. Limitation of Liability. In no event and under no legal theory, ++ whether in tort (including negligence), contract, or otherwise, ++ unless required by applicable law (such as deliberate and grossly ++ negligent acts) or agreed to in writing, shall any Contributor be ++ liable to You for damages, including any direct, indirect, special, ++ incidental, or consequential damages of any character arising as a ++ result of this License or out of the use or inability to use the ++ Work (including but not limited to damages for loss of goodwill, ++ work stoppage, computer failure or malfunction, or any and all ++ other commercial damages or losses), even if such Contributor ++ has been advised of the possibility of such damages. ++ ++ 9. Accepting Warranty or Additional Liability. While redistributing ++ the Work or Derivative Works thereof, You may choose to offer, ++ and charge a fee for, acceptance of support, warranty, indemnity, ++ or other liability obligations and/or rights consistent with this ++ License. However, in accepting such obligations, You may act only ++ on Your own behalf and on Your sole responsibility, not on behalf ++ of any other Contributor, and only if You agree to indemnify, ++ defend, and hold each Contributor harmless for any liability ++ incurred by, or claims asserted against, such Contributor by reason ++ of your accepting any such warranty or additional liability. ++ ++ END OF TERMS AND CONDITIONS ++ ++ APPENDIX: How to apply the Apache License to your work. ++ ++ To apply the Apache License to your work, attach the following ++ boilerplate notice, with the fields enclosed by brackets "[]" ++ replaced with your own identifying information. (Don't include ++ the brackets!) The text should be enclosed in the appropriate ++ comment syntax for the file format. We also recommend that a ++ file or class name and description of purpose be included on the ++ same "printed page" as the copyright notice for easier ++ identification within third-party archives. ++ ++ Copyright [yyyy] [name of copyright owner] ++ ++ Licensed under the Apache License, Version 2.0 (the "License"); ++ you may not use this file except in compliance with the License. ++ You may obtain a copy of the License at ++ ++ http://www.apache.org/licenses/LICENSE-2.0 ++ ++ Unless required by applicable law or agreed to in writing, software ++ distributed under the License is distributed on an "AS IS" BASIS, ++ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ See the License for the specific language governing permissions and ++ limitations under the License. ++ ++APACHE NIFI SUBCOMPONENTS: ++ ++The Apache NiFi project contains subcomponents with separate copyright ++notices and license terms. Your use of the source code for the these ++subcomponents is subject to the terms and conditions of the following ++licenses. ++ ++This product bundles 'jsoup' which is available under the MIT License. ++For details see http://jsoup.org/ ++ ++ jsoup License ++ The jsoup code-base (include source and compiled packages) are distributed under the open source MIT license as described below. ++ ++ The MIT License ++ Copyright © 2009 - 2013 Jonathan Hedley ([email protected]) ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, ++ copy, modify, merge, publish, distribute, sublicense, and/or sell ++ copies of the Software, and to permit persons to whom the ++ Software is furnished to do so, subject to the following ++ conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ OTHER DEALINGS IN THE SOFTWARE. ++ http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/src/main/resources/META-INF/NOTICE ---------------------------------------------------------------------- diff --cc nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/src/main/resources/META-INF/NOTICE index 0000000,0000000..894d3de new file mode 100644 --- /dev/null +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-nar/src/main/resources/META-INF/NOTICE @@@ -1,0 -1,0 +1,19 @@@ ++nifi-html-nar ++Copyright 2015 The Apache Software Foundation ++ ++This product includes software developed at ++The Apache Software Foundation (http://www.apache.org/). ++ ++****************** ++Apache Software License v2 ++****************** ++ ++The following binary components are provided under the Apache Software License v2 ++ ++ (ASLv2) Apache Commons Lang ++ The following NOTICE information applies: ++ Apache Commons Lang ++ Copyright 2001-2014 The Apache Software Foundation ++ ++ This product includes software from the Spring Framework, ++ under the Apache License 2.0 (see: StringUtils.containsWhitespace()) http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/pom.xml ---------------------------------------------------------------------- diff --cc nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/pom.xml index 0000000,25e19b9..77b769f mode 000000,100644..100644 --- a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/pom.xml +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/pom.xml @@@ -1,0 -1,73 +1,71 @@@ + <?xml version="1.0" encoding="UTF-8"?> + <!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-html-bundle</artifactId> - <version>0.4.0-SNAPSHOT</version> ++ <version>0.4.2-SNAPSHOT</version> + </parent> + + <artifactId>nifi-html-processors</artifactId> + <description>Support for parsing HTML documents</description> + + <dependencies> + <dependency> + <groupId>org.jsoup</groupId> + <artifactId>jsoup</artifactId> - <version>1.8.3</version> + </dependency> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-api</artifactId> + </dependency> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-processor-utils</artifactId> + </dependency> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-mock</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-simple</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> - <version>4.12</version> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + <configuration> + <excludes combine.children="append"> + <exclude>src/test/resources/Weather.html</exclude> + </excludes> + </configuration> + </plugin> + </plugins> + </build> + </project> http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/AbstractHTMLProcessor.java ---------------------------------------------------------------------- diff --cc nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/AbstractHTMLProcessor.java index 0000000,42467d9..20dca29 mode 000000,100644..100644 --- a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/AbstractHTMLProcessor.java +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/AbstractHTMLProcessor.java @@@ -1,0 -1,138 +1,130 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.nifi; + + import org.apache.nifi.components.PropertyDescriptor; + import org.apache.nifi.components.ValidationContext; + import org.apache.nifi.components.ValidationResult; + import org.apache.nifi.components.Validator; + import org.apache.nifi.flowfile.FlowFile; + import org.apache.nifi.processor.AbstractProcessor; + import org.apache.nifi.processor.ProcessContext; + import org.apache.nifi.processor.ProcessSession; + import org.apache.nifi.processor.Relationship; + import org.apache.nifi.processor.io.InputStreamCallback; + import org.apache.nifi.processor.util.StandardValidators; + import org.jsoup.Jsoup; + import org.jsoup.nodes.Document; + import org.jsoup.select.Selector; + + import java.io.IOException; + import java.io.InputStream; + import java.util.concurrent.atomic.AtomicReference; + + public abstract class AbstractHTMLProcessor extends AbstractProcessor { + + protected static final String ELEMENT_HTML = "HTML"; + protected static final String ELEMENT_TEXT = "Text"; + protected static final String ELEMENT_DATA = "Data"; + protected static final String ELEMENT_ATTRIBUTE = "Attribute"; + + protected static final Validator CSS_SELECTOR_VALIDATOR = new Validator() { + @Override + public ValidationResult validate(final String subject, final String value, final ValidationContext context) { + if (context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(value)) { + return new ValidationResult.Builder().subject(subject).input(value).explanation("Expression Language Present").valid(true).build(); + } + + String reason = null; + try { + Document doc = Jsoup.parse("<html></html>"); + doc.select(value); + } catch (final Selector.SelectorParseException e) { + reason = "\"" + value + "\" is an invalid CSS selector"; + } + + return new ValidationResult.Builder().subject(subject).input(value).explanation(reason).valid(reason == null).build(); + } + }; + + public static final PropertyDescriptor URL = new PropertyDescriptor + .Builder().name("URL") + .description("Base URL for the HTML page being parsed.") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .build(); + + public static final PropertyDescriptor CSS_SELECTOR = new PropertyDescriptor + .Builder().name("CSS Selector") + .description("CSS selector syntax string used to extract the desired HTML element(s).") + .required(true) + .addValidator(CSS_SELECTOR_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + public static final PropertyDescriptor HTML_CHARSET = new PropertyDescriptor + .Builder().name("HTML Character Encoding") + .description("Character encoding of the input HTML") + .defaultValue("UTF-8") + .required(true) + .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR) + .build(); + + public static final Relationship REL_ORIGINAL = new Relationship.Builder() + .name("original") + .description("The original HTML input") + .build(); + + public static final Relationship REL_SUCCESS = new Relationship.Builder() + .name("success") + .description("Successfully parsed HTML element") + .build(); + + public static final Relationship REL_INVALID_HTML = new Relationship.Builder() + .name("invalid html") + .description("The input HTML syntax is invalid") + .build(); + + public static final Relationship REL_NOT_FOUND = new Relationship.Builder() + .name("element not found") + .description("Element could not be found in the HTML document. The original HTML input will remain " + + "in the flowfile content unchanged. Relationship '" + REL_ORIGINAL + "' will not be invoked " + + "in this scenario.") + .build(); + + /** + * Parses the Jsoup HTML document from the FlowFile input content. + * - * @param inputFlowFile - * Input FlowFile containing the HTML ++ * @param inputFlowFile Input FlowFile containing the HTML ++ * @param context ProcessContext ++ * @param session ProcessSession + * - * @param context - * ProcessContext - * - * @param session - * ProcessSession - * - * @return - * Jsoup Document ++ * @return Jsoup Document + */ - protected Document parseHTMLDocumentFromFlowfile(FlowFile inputFlowFile, - final ProcessContext context, - final ProcessSession session) { ++ protected Document parseHTMLDocumentFromFlowfile(final FlowFile inputFlowFile, final ProcessContext context, final ProcessSession session) { + final AtomicReference<Document> doc = new AtomicReference<>(); + session.read(inputFlowFile, new InputStreamCallback() { + @Override + public void process(InputStream inputStream) throws IOException { + doc.set(Jsoup.parse(inputStream, + context.getProperty(HTML_CHARSET).getValue(), + context.getProperty(URL).getValue())); + } + }); + return doc.get(); + } + } http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/GetHTMLElement.java ---------------------------------------------------------------------- diff --cc nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/GetHTMLElement.java index 0000000,feda16c..1d421a0 mode 000000,100644..100644 --- a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/GetHTMLElement.java +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/GetHTMLElement.java @@@ -1,0 -1,254 +1,241 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.nifi; + + import org.apache.commons.lang3.StringUtils; + import org.apache.nifi.annotation.behavior.InputRequirement; ++import org.apache.nifi.annotation.behavior.SupportsBatching; + import org.apache.nifi.components.PropertyDescriptor; + import org.apache.nifi.flowfile.FlowFile; + import org.apache.nifi.processor.ProcessContext; + import org.apache.nifi.processor.ProcessSession; + import org.apache.nifi.processor.Relationship; + import org.apache.nifi.processor.ProcessorInitializationContext; + import org.apache.nifi.annotation.behavior.WritesAttribute; + import org.apache.nifi.annotation.behavior.WritesAttributes; + import org.apache.nifi.annotation.documentation.CapabilityDescription; + import org.apache.nifi.annotation.documentation.SeeAlso; + import org.apache.nifi.annotation.documentation.Tags; + import org.apache.nifi.processor.exception.ProcessException; + import org.apache.nifi.processor.io.StreamCallback; + import org.apache.nifi.processor.util.StandardValidators; + import org.jsoup.nodes.Document; + import org.jsoup.nodes.Element; + import org.jsoup.select.Elements; + + import java.io.IOException; + import java.io.InputStream; + import java.io.OutputStream; ++import java.nio.charset.StandardCharsets; + import java.util.ArrayList; + import java.util.List; + import java.util.Set; + import java.util.HashSet; + import java.util.Collections; + ++@SupportsBatching + @Tags({"get", "html", "dom", "css", "element"}) + @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) + @CapabilityDescription("Extracts HTML element values from the incoming flowfile's content using a CSS selector." + + " The incoming HTML is first converted into a HTML Document Object Model so that HTML elements may be selected" + + " in the similar manner that CSS selectors are used to apply styles to HTML. The resulting HTML DOM is then \"queried\"" + + " using the user defined CSS selector string. The result of \"querying\" the HTML DOM may produce 0-N results." + + " If no results are found the flowfile will be transferred to the \"element not found\" relationship to indicate" + + " so to the end user. If N results are found a new flowfile will be created and emitted for each result. The query result will" + + " either be placed in the content of the new flowfile or as an attribute of the new flowfile. By default the result is written to an" + + " attribute. This can be controlled by the \"Destination\" property. Resulting query values may also have data" + + " prepended or appended to them by setting the value of property \"Prepend Element Value\" or \"Append Element Value\"." + + " Prepended and appended values are treated as string values and concatenated to the result retrieved from the" + + " HTML DOM query operation. A more thorough reference for the CSS selector syntax can be found at" + + " \"http://jsoup.org/apidocs/org/jsoup/select/Selector.html\"") + @SeeAlso({ModifyHTMLElement.class, PutHTMLElement.class}) + @WritesAttributes({@WritesAttribute(attribute="HTMLElement", description="Flowfile attribute where the element result" + + " parsed from the HTML using the CSS selector syntax are placed if the destination is a flowfile attribute.")}) + public class GetHTMLElement + extends AbstractHTMLProcessor { + + public static final String HTML_ELEMENT_ATTRIBUTE_NAME = "HTMLElement"; + public static final String DESTINATION_ATTRIBUTE = "flowfile-attribute"; + public static final String DESTINATION_CONTENT = "flowfile-content"; + + public static final PropertyDescriptor PREPEND_ELEMENT_VALUE = new PropertyDescriptor + .Builder().name("Prepend Element Value") + .description("Prepends the specified value to the resulting Element") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + public static final PropertyDescriptor APPEND_ELEMENT_VALUE = new PropertyDescriptor + .Builder().name("Append Element Value") + .description("Appends the specified value to the resulting Element") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + public static final PropertyDescriptor ATTRIBUTE_KEY = new PropertyDescriptor + .Builder().name("Attribute Name") + .description(("When getting the value of a HTML element attribute this value is used as the key to determine" + + " which attribute on the selected element should be retrieved. This value is used when the \"Output Type\"" + + " is set to \"" + ELEMENT_ATTRIBUTE + "\"")) + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + + public static final PropertyDescriptor OUTPUT_TYPE = new PropertyDescriptor.Builder() + .name("Output Type") + .description("Controls the type of DOM value that is retrieved from the HTML element.") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .allowableValues(ELEMENT_HTML, ELEMENT_TEXT, ELEMENT_ATTRIBUTE, ELEMENT_DATA) + .defaultValue(ELEMENT_HTML) + .build(); + + public static final PropertyDescriptor DESTINATION = new PropertyDescriptor.Builder() + .name("Destination") + .description("Control if element extracted is written as a flowfile attribute or " + + "as flowfile content.") + .required(true) + .allowableValues(DESTINATION_ATTRIBUTE, DESTINATION_CONTENT) + .defaultValue(DESTINATION_ATTRIBUTE) + .build(); + + private List<PropertyDescriptor> descriptors; + + private Set<Relationship> relationships; + + @Override + protected void init(final ProcessorInitializationContext context) { + final List<PropertyDescriptor> descriptors = new ArrayList<>(); + descriptors.add(URL); + descriptors.add(CSS_SELECTOR); + descriptors.add(HTML_CHARSET); + descriptors.add(OUTPUT_TYPE); + descriptors.add(DESTINATION); + descriptors.add(PREPEND_ELEMENT_VALUE); + descriptors.add(APPEND_ELEMENT_VALUE); + descriptors.add(ATTRIBUTE_KEY); + this.descriptors = Collections.unmodifiableList(descriptors); + + final Set<Relationship> relationships = new HashSet<>(); + relationships.add(REL_ORIGINAL); + relationships.add(REL_SUCCESS); + relationships.add(REL_INVALID_HTML); + relationships.add(REL_NOT_FOUND); + this.relationships = Collections.unmodifiableSet(relationships); + } + + @Override + public Set<Relationship> getRelationships() { + return this.relationships; + } + + @Override + public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { + return descriptors; + } + + @Override + public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { + final FlowFile flowFile = session.get(); + if ( flowFile == null ) { + return; + } + - try { ++ final Document doc; ++ final Elements eles; + - final Document doc = parseHTMLDocumentFromFlowfile(flowFile, context, session); - final Elements eles = doc.select(context.getProperty(CSS_SELECTOR) - .evaluateAttributeExpressions(flowFile).getValue()); - final String prependValue = context.getProperty(PREPEND_ELEMENT_VALUE) - .evaluateAttributeExpressions(flowFile).getValue(); - final String appendValue = context.getProperty(APPEND_ELEMENT_VALUE) - .evaluateAttributeExpressions(flowFile).getValue(); - - if (eles == null || eles.size() == 0) { - //No element found - session.transfer(flowFile, REL_NOT_FOUND); - } else { - for (final Element ele : eles) { - final FlowFile ff = session.create(flowFile); - - switch (context.getProperty(DESTINATION).getValue()) { - case DESTINATION_ATTRIBUTE: - final FlowFile atFlowfile = session.putAttribute(ff, HTML_ELEMENT_ATTRIBUTE_NAME, - extractElementValue( - prependValue, - context.getProperty(OUTPUT_TYPE).getValue(), - appendValue, - ele, - context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions() - .getValue())); - session.transfer(atFlowfile, REL_SUCCESS); - break; - case DESTINATION_CONTENT: - final FlowFile conFlowfile = session.write(ff, new StreamCallback() { - @Override - public void process(InputStream inputStream, OutputStream outputStream) throws IOException { - try { - outputStream.write(extractElementValue( - prependValue, - context.getProperty(OUTPUT_TYPE).getValue(), - appendValue, - ele, - context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions() - .getValue()).getBytes()); - } catch (Exception ex) { - getLogger().error(ex.getMessage()); - session.transfer(ff, REL_INVALID_HTML); - } - } - }); - - session.transfer(conFlowfile, REL_SUCCESS); - break; - } ++ try { ++ doc = parseHTMLDocumentFromFlowfile(flowFile, context, session); ++ eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions(flowFile).getValue()); ++ } catch (final Exception ex) { ++ getLogger().error("Failed to extract HTML from {} due to {}; routing to {}", new Object[] {flowFile, ex, REL_INVALID_HTML}, ex); ++ session.transfer(flowFile, REL_INVALID_HTML); ++ return; ++ } + ++ final String prependValue = context.getProperty(PREPEND_ELEMENT_VALUE).evaluateAttributeExpressions(flowFile).getValue(); ++ final String appendValue = context.getProperty(APPEND_ELEMENT_VALUE).evaluateAttributeExpressions(flowFile).getValue(); ++ final String outputType = context.getProperty(OUTPUT_TYPE).getValue(); ++ final String attributeKey = context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions(flowFile).getValue(); ++ ++ if (eles == null || eles.isEmpty()) { ++ // No element found ++ session.transfer(flowFile, REL_NOT_FOUND); ++ } else { ++ // Create a new FlowFile for each matching element. ++ for (final Element ele : eles) { ++ final String extractedElementValue = extractElementValue(prependValue, outputType, appendValue, ele, attributeKey); ++ ++ final FlowFile ff = session.create(flowFile); ++ FlowFile updatedFF = ff; ++ ++ switch (context.getProperty(DESTINATION).getValue()) { ++ case DESTINATION_ATTRIBUTE: ++ updatedFF = session.putAttribute(ff, HTML_ELEMENT_ATTRIBUTE_NAME, extractedElementValue); ++ break; ++ case DESTINATION_CONTENT: ++ updatedFF = session.write(ff, new StreamCallback() { ++ @Override ++ public void process(final InputStream inputStream, final OutputStream outputStream) throws IOException { ++ outputStream.write(extractedElementValue.getBytes(StandardCharsets.UTF_8)); ++ } ++ }); ++ ++ break; + } + - //Transfer the original HTML - session.transfer(flowFile, REL_ORIGINAL); ++ session.transfer(updatedFF, REL_SUCCESS); + } + - } catch (Exception ex) { - getLogger().error(ex.getMessage()); - session.transfer(flowFile, REL_INVALID_HTML); ++ // Transfer the original HTML ++ session.transfer(flowFile, REL_ORIGINAL); + } - + } + + + /** + * Extracts the HTML value based on the configuration values. + * - * @return - * value from the parsed HTML element ++ * @return value from the parsed HTML element + */ - private String extractElementValue(String prependValue, String outputType, String appendValue, Element ele, - String attrKey) { ++ private String extractElementValue(String prependValue, final String outputType, String appendValue, final Element ele, final String attrKey) { + if (StringUtils.isEmpty(prependValue)) { + prependValue = ""; + } + if (StringUtils.isEmpty(appendValue)) { + appendValue = ""; + } + + switch (outputType) { + case ELEMENT_HTML: + return prependValue + ele.html() + appendValue; + case ELEMENT_TEXT: + return prependValue + ele.text() + appendValue; + case ELEMENT_DATA: + return prependValue + ele.data() + appendValue; + case ELEMENT_ATTRIBUTE: + return prependValue + ele.attr(attrKey) + appendValue; + default: + return prependValue + ele.html() + appendValue; + } + } + + } http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/ModifyHTMLElement.java ---------------------------------------------------------------------- diff --cc nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/ModifyHTMLElement.java index 0000000,0829158..e84d4ed mode 000000,100644..100644 --- a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/ModifyHTMLElement.java +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/ModifyHTMLElement.java @@@ -1,0 -1,175 +1,179 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.nifi; + + import org.apache.nifi.annotation.behavior.InputRequirement; + import org.apache.nifi.annotation.behavior.SupportsBatching; + import org.apache.nifi.annotation.behavior.WritesAttribute; + import org.apache.nifi.annotation.behavior.WritesAttributes; + import org.apache.nifi.annotation.documentation.CapabilityDescription; + import org.apache.nifi.annotation.documentation.SeeAlso; + import org.apache.nifi.annotation.documentation.Tags; + import org.apache.nifi.components.PropertyDescriptor; + import org.apache.nifi.flowfile.FlowFile; + import org.apache.nifi.processor.ProcessContext; + import org.apache.nifi.processor.ProcessSession; + import org.apache.nifi.processor.Relationship; + import org.apache.nifi.processor.ProcessorInitializationContext; + import org.apache.nifi.processor.exception.ProcessException; + import org.apache.nifi.processor.io.StreamCallback; + import org.apache.nifi.processor.util.StandardValidators; + import org.jsoup.nodes.Document; + import org.jsoup.nodes.Element; + import org.jsoup.select.Elements; + + import java.io.IOException; + import java.io.InputStream; + import java.io.OutputStream; ++import java.nio.charset.StandardCharsets; + import java.util.List; + import java.util.ArrayList; + import java.util.Set; + import java.util.HashSet; + import java.util.Collections; + + @Tags({"modify", "html", "dom", "css", "element"}) + @SupportsBatching + @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) + @CapabilityDescription("Modifies the value of an existing HTML element. The desired element to be modified is located by" + + " using CSS selector syntax. The incoming HTML is first converted into a HTML Document Object Model so that HTML elements may be selected" + + " in the similar manner that CSS selectors are used to apply styles to HTML. The resulting HTML DOM is then \"queried\"" + + " using the user defined CSS selector string to find the element the user desires to modify. If the HTML element is found" + + " the element's value is updated in the DOM using the value specified \"Modified Value\" property. All DOM elements" + + " that match the CSS selector will be updated. Once all of the DOM elements have been updated the DOM is rendered" + + " to HTML and the result replaces the flowfile content with the updated HTML. A more thorough reference for the" + + " CSS selector syntax can be found at" + + " \"http://jsoup.org/apidocs/org/jsoup/select/Selector.html\"") + @SeeAlso({GetHTMLElement.class, PutHTMLElement.class}) + @WritesAttributes({@WritesAttribute(attribute="NumElementsModified", description="Total number of HTML " + + "element modifications made")}) + public class ModifyHTMLElement extends AbstractHTMLProcessor { + + public static final String NUM_ELEMENTS_MODIFIED_ATTR = "NumElementsModified"; + + public static final PropertyDescriptor OUTPUT_TYPE = new PropertyDescriptor.Builder() + .name("Output Type") + .description("Controls whether the HTML element is output as " + + ELEMENT_HTML + "," + ELEMENT_TEXT + " or " + ELEMENT_DATA) + .required(true) + .allowableValues(ELEMENT_HTML, ELEMENT_TEXT, ELEMENT_ATTRIBUTE) + .defaultValue(ELEMENT_HTML) + .build(); + + public static final PropertyDescriptor MODIFIED_VALUE = new PropertyDescriptor + .Builder().name("Modified Value") + .description("Value to update the found HTML elements with") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + public static final PropertyDescriptor ATTRIBUTE_KEY = new PropertyDescriptor + .Builder().name("Attribute Name") + .description(("When modifying the value of an element attribute this value is used as the key to determine" + + " which attribute on the selected element will be modified with the new value.")) + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + private List<PropertyDescriptor> descriptors; + + private Set<Relationship> relationships; + + @Override + protected void init(final ProcessorInitializationContext context) { + final List<PropertyDescriptor> descriptors = new ArrayList<>(); + descriptors.add(URL); + descriptors.add(CSS_SELECTOR); + descriptors.add(HTML_CHARSET); + descriptors.add(OUTPUT_TYPE); + descriptors.add(MODIFIED_VALUE); + descriptors.add(ATTRIBUTE_KEY); + this.descriptors = Collections.unmodifiableList(descriptors); + + final Set<Relationship> relationships = new HashSet<Relationship>(); + relationships.add(REL_ORIGINAL); + relationships.add(REL_SUCCESS); + relationships.add(REL_INVALID_HTML); + relationships.add(REL_NOT_FOUND); + this.relationships = Collections.unmodifiableSet(relationships); + } + + @Override + public Set<Relationship> getRelationships() { + return this.relationships; + } + + @Override + public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { + return descriptors; + } + + @Override + public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { + final FlowFile flowFile = session.get(); + if (flowFile == null) { + return; + } + ++ final Document doc; ++ final Elements eles; + try { - final Document doc = parseHTMLDocumentFromFlowfile(flowFile, context, session); - final Elements eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions().getValue()); - - if (eles == null || eles.size() == 0) { - //No element found - session.transfer(flowFile, REL_NOT_FOUND); - } else { - for (Element ele : eles) { - switch (context.getProperty(OUTPUT_TYPE).getValue()) { - case ELEMENT_HTML: - ele.html(context.getProperty(MODIFIED_VALUE).evaluateAttributeExpressions().getValue()); - break; - case ELEMENT_ATTRIBUTE: - ele.attr(context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions().getValue(), - context.getProperty(MODIFIED_VALUE).evaluateAttributeExpressions().getValue()); - break; - case ELEMENT_TEXT: - ele.text(context.getProperty(MODIFIED_VALUE).evaluateAttributeExpressions().getValue()); - break; - } - } ++ doc = parseHTMLDocumentFromFlowfile(flowFile, context, session); ++ eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions(flowFile).getValue()); ++ } catch (Exception ex) { ++ getLogger().error("Failed to extract HTML from {} due to {}; routing to {}", new Object[] {flowFile, ex.toString(), REL_INVALID_HTML.getName()}, ex); ++ session.transfer(flowFile, REL_INVALID_HTML); ++ return; ++ } + - FlowFile ff = session.write(session.create(flowFile), new StreamCallback() { - @Override - public void process(InputStream in, OutputStream out) throws IOException { - out.write(doc.html().getBytes()); - } - }); - ff = session.putAttribute(ff, NUM_ELEMENTS_MODIFIED_ATTR, new Integer(eles.size()).toString()); - session.transfer(ff, REL_SUCCESS); - - //Transfer the original HTML - session.transfer(flowFile, REL_ORIGINAL); ++ final String modifiedValue = context.getProperty(MODIFIED_VALUE).evaluateAttributeExpressions(flowFile).getValue(); ++ ++ if (eles == null || eles.size() == 0) { ++ // No element found ++ session.transfer(flowFile, REL_NOT_FOUND); ++ } else { ++ for (Element ele : eles) { ++ switch (context.getProperty(OUTPUT_TYPE).getValue()) { ++ case ELEMENT_HTML: ++ ele.html(modifiedValue); ++ break; ++ case ELEMENT_ATTRIBUTE: ++ ele.attr(context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions(flowFile).getValue(), modifiedValue); ++ break; ++ case ELEMENT_TEXT: ++ ele.text(modifiedValue); ++ break; ++ } + } + - } catch (Exception ex) { - getLogger().error(ex.getMessage()); - session.transfer(flowFile, REL_INVALID_HTML); ++ FlowFile ff = session.write(session.create(flowFile), new StreamCallback() { ++ @Override ++ public void process(InputStream in, OutputStream out) throws IOException { ++ out.write(doc.html().getBytes(StandardCharsets.UTF_8)); ++ } ++ }); ++ ff = session.putAttribute(ff, NUM_ELEMENTS_MODIFIED_ATTR, new Integer(eles.size()).toString()); ++ session.transfer(ff, REL_SUCCESS); ++ ++ // Transfer the original HTML ++ session.transfer(flowFile, REL_ORIGINAL); + } + } + + } http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/PutHTMLElement.java ---------------------------------------------------------------------- diff --cc nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/PutHTMLElement.java index 0000000,2af31d5..995fc99 mode 000000,100644..100644 --- a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/PutHTMLElement.java +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/main/java/org/apache/nifi/PutHTMLElement.java @@@ -1,0 -1,160 +1,165 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.nifi; + + import org.apache.nifi.annotation.behavior.InputRequirement; + import org.apache.nifi.annotation.behavior.SupportsBatching; + import org.apache.nifi.annotation.documentation.CapabilityDescription; + import org.apache.nifi.annotation.documentation.SeeAlso; + import org.apache.nifi.annotation.documentation.Tags; + import org.apache.nifi.components.PropertyDescriptor; + import org.apache.nifi.flowfile.FlowFile; + import org.apache.nifi.processor.ProcessContext; + import org.apache.nifi.processor.ProcessSession; + import org.apache.nifi.processor.Relationship; + import org.apache.nifi.processor.ProcessorInitializationContext; + import org.apache.nifi.processor.exception.ProcessException; + import org.apache.nifi.processor.io.StreamCallback; + import org.apache.nifi.processor.util.StandardValidators; + import org.jsoup.nodes.Document; + import org.jsoup.nodes.Element; + import org.jsoup.select.Elements; + + import java.io.IOException; + import java.io.InputStream; + import java.io.OutputStream; ++import java.nio.charset.StandardCharsets; + import java.util.ArrayList; + import java.util.List; + import java.util.Set; + import java.util.HashSet; + import java.util.Collections; + + @Tags({"put", "html", "dom", "css", "element"}) + @SupportsBatching + @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) + @CapabilityDescription("Places a new HTML element in the existing HTML DOM. The desired position for the new HTML element is specified by" + + " using CSS selector syntax. The incoming HTML is first converted into a HTML Document Object Model so that HTML DOM location may be located" + + " in a similar manner that CSS selectors are used to apply styles to HTML. The resulting HTML DOM is then \"queried\"" + + " using the user defined CSS selector string to find the position where the user desires to add the new HTML element." + + " Once the new HTML element is added to the DOM it is rendered to HTML and the result replaces the flowfile" + + " content with the updated HTML. A more thorough reference for the CSS selector syntax can be found at" + + " \"http://jsoup.org/apidocs/org/jsoup/select/Selector.html\"") + @SeeAlso({GetHTMLElement.class, ModifyHTMLElement.class}) + public class PutHTMLElement extends AbstractHTMLProcessor { + + public static final String APPEND_ELEMENT = "append-html"; + public static final String PREPEND_ELEMENT = "prepend-html"; + + public static final PropertyDescriptor PUT_LOCATION_TYPE = new PropertyDescriptor.Builder() + .name("Element Insert Location Type") + .description("Controls whether the new element is prepended or appended to the children of the " + + "Element located by the CSS selector. EX: prepended value '<b>Hi</b>' inside of " + + "Element (using CSS Selector 'p') '<p>There</p>' would result in " + + "'<p><b>Hi</b>There</p>'. Appending the value would result in '<p>There<b>Hi</b></p>'") + .required(true) + .allowableValues(APPEND_ELEMENT, PREPEND_ELEMENT) + .defaultValue(APPEND_ELEMENT) + .build(); + + public static final PropertyDescriptor PUT_VALUE = new PropertyDescriptor.Builder() + .name("Put Value") + .description("Value used when creating the new Element. Value should be a valid HTML element. " + + "The text should be supplied unencoded: characters like '<', '>', etc will be properly HTML " + + "encoded in the resulting output.") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(true) + .build(); + + private List<PropertyDescriptor> descriptors; + + private Set<Relationship> relationships; + + @Override + protected void init(final ProcessorInitializationContext context) { + final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>(); + descriptors.add(URL); + descriptors.add(CSS_SELECTOR); + descriptors.add(HTML_CHARSET); + descriptors.add(PUT_LOCATION_TYPE); + descriptors.add(PUT_VALUE); + this.descriptors = Collections.unmodifiableList(descriptors); + + final Set<Relationship> relationships = new HashSet<Relationship>(); + relationships.add(REL_ORIGINAL); + relationships.add(REL_SUCCESS); + relationships.add(REL_INVALID_HTML); + relationships.add(REL_NOT_FOUND); + this.relationships = Collections.unmodifiableSet(relationships); + } + + @Override + public Set<Relationship> getRelationships() { + return this.relationships; + } + + @Override + public final List<PropertyDescriptor> getSupportedPropertyDescriptors() { + return descriptors; + } + + @Override + public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { + final FlowFile flowFile = session.get(); + if (flowFile == null) { + return; + } + ++ final Document doc; ++ final Elements eles; + try { - final Document doc = parseHTMLDocumentFromFlowfile(flowFile, context, session); - final Elements eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions().getValue()); - - if (eles == null || eles.size() == 0) { - //No element found - session.transfer(flowFile, REL_NOT_FOUND); - } else { - for (Element ele : eles) { - switch (context.getProperty(PUT_LOCATION_TYPE).getValue()) { - case APPEND_ELEMENT: - ele.append(context.getProperty(PUT_VALUE).evaluateAttributeExpressions(flowFile).getValue()); - break; - case PREPEND_ELEMENT: - ele.prepend(context.getProperty(PUT_VALUE).evaluateAttributeExpressions(flowFile).getValue()); - break; - } - } ++ doc = parseHTMLDocumentFromFlowfile(flowFile, context, session); ++ eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions().getValue()); ++ } catch (Exception ex) { ++ getLogger().error("Failed to extract HTML from {} due to {}; routing to {}", new Object[] {flowFile, ex.toString(), REL_INVALID_HTML.getName()}, ex); ++ session.transfer(flowFile, REL_INVALID_HTML); ++ return; ++ } + - FlowFile ff = session.write(session.create(flowFile), new StreamCallback() { - @Override - public void process(InputStream in, OutputStream out) throws IOException { - out.write(doc.html().getBytes()); - } - }); - session.transfer(ff, REL_SUCCESS); + - //Transfer the original HTML - session.transfer(flowFile, REL_ORIGINAL); ++ if (eles == null || eles.isEmpty()) { ++ // No element found ++ session.transfer(flowFile, REL_NOT_FOUND); ++ } else { ++ final String putValue = context.getProperty(PUT_VALUE).evaluateAttributeExpressions(flowFile).getValue(); ++ ++ for (final Element ele : eles) { ++ switch (context.getProperty(PUT_LOCATION_TYPE).getValue()) { ++ case APPEND_ELEMENT: ++ ele.append(putValue); ++ break; ++ case PREPEND_ELEMENT: ++ ele.prepend(putValue); ++ break; ++ } + } + - } catch (Exception ex) { - getLogger().error(ex.getMessage()); - session.transfer(flowFile, REL_INVALID_HTML); - } ++ FlowFile ff = session.write(session.create(flowFile), new StreamCallback() { ++ @Override ++ public void process(final InputStream in, final OutputStream out) throws IOException { ++ out.write(doc.html().getBytes(StandardCharsets.UTF_8)); ++ } ++ }); + - } ++ session.transfer(ff, REL_SUCCESS); + ++ // Transfer the original HTML ++ session.transfer(flowFile, REL_ORIGINAL); ++ } ++ } + } http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestGetHTMLElement.java ---------------------------------------------------------------------- diff --cc nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestGetHTMLElement.java index 0000000,1cc5d73..4839fce mode 000000,100644..100644 --- a/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestGetHTMLElement.java +++ b/nifi-nar-bundles/nifi-html-bundle/nifi-html-processors/src/test/java/org/apache/nifi/TestGetHTMLElement.java @@@ -1,0 -1,276 +1,275 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.nifi; + + import org.apache.nifi.util.MockFlowFile; + import org.apache.nifi.util.TestRunner; + import org.apache.nifi.util.TestRunners; + import org.jsoup.Jsoup; + import org.jsoup.nodes.Document; + import org.jsoup.select.Selector; + import org.junit.Before; + import org.junit.Test; + + import java.io.File; + import java.io.IOException; + import java.lang.Exception; + import java.net.URL; + import java.util.List; + + public class TestGetHTMLElement extends AbstractHTMLTest { + + private TestRunner testRunner; + + @Before + public void init() { + testRunner = TestRunners.newTestRunner(GetHTMLElement.class); + testRunner.setProperty(GetHTMLElement.URL, "http://localhost"); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_HTML); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.HTML_CHARSET, "UTF-8"); + } + + @Test + public void testCSSSelectorSyntaxValidator() throws IOException { + Document doc = Jsoup.parse(new URL("http://www.google.com"), 5000); + try { + doc.select("---jeremy"); + } catch (Selector.SelectorParseException ex) { - String mes = ex.getMessage(); + ex.printStackTrace(); + } + } + + @Test + public void testNoElementFound() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "b"); //Bold element is not present in sample HTML + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1); + } + + @Test + public void testInvalidSelector() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "InvalidCSSSelectorSyntax"); + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1); + } + + @Test + public void testSingleElementFound() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "head"); + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + } + + @Test + public void testMultipleElementFound() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "a"); + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 3); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + } + + @Test + public void testElementFoundWriteToAttribute() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_ATTRIBUTE); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE); + testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href"); + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + ffs.get(0).assertAttributeEquals(GetHTMLElement.HTML_ELEMENT_ATTRIBUTE_NAME, ATL_WEATHER_LINK); + } + + @Test + public void testElementFoundWriteToContent() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE); + testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href"); + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + ffs.get(0).assertContentEquals(ATL_WEATHER_LINK); + } + + @Test + public void testValidPrependValueToFoundElement() throws Exception { + final String PREPEND_VALUE = "TestPrepend"; + testRunner.setProperty(GetHTMLElement.PREPEND_ELEMENT_VALUE, PREPEND_VALUE); + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE); + testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href"); + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + ffs.get(0).assertContentEquals(PREPEND_VALUE + ATL_WEATHER_LINK); + } + + @Test + public void testValidPrependValueToNotFoundElement() throws Exception { + final String PREPEND_VALUE = "TestPrepend"; + testRunner.setProperty(GetHTMLElement.PREPEND_ELEMENT_VALUE, PREPEND_VALUE); + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "b"); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_TEXT); + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1); + } + + @Test + public void testValidAppendValueToFoundElement() throws Exception { + final String APPEND_VALUE = "TestAppend"; + testRunner.setProperty(GetHTMLElement.APPEND_ELEMENT_VALUE, APPEND_VALUE); + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE); + testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href"); + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + ffs.get(0).assertContentEquals(ATL_WEATHER_LINK + APPEND_VALUE); + } + + @Test + public void testValidAppendValueToNotFoundElement() throws Exception { + final String APPEND_VALUE = "TestAppend"; + testRunner.setProperty(GetHTMLElement.APPEND_ELEMENT_VALUE, APPEND_VALUE); + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "b"); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_TEXT); + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1); + } + + @Test + public void testExtractAttributeFromElement() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "meta[name=author]"); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE); + testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "Content"); + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + ffs.get(0).assertContentEquals(AUTHOR_NAME); + } + + @Test + public void testExtractTextFromElement() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_TEXT); + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + ffs.get(0).assertContentEquals(ATL_WEATHER_TEXT); + } + + @Test + public void testExtractHTMLFromElement() throws Exception { + testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + GDR_ID); + testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT); + testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_HTML); + + testRunner.enqueue(new File("src/test/resources/Weather.html").toPath()); + testRunner.run(); + + testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0); + testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1); + testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0); + + List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS); + ffs.get(0).assertContentEquals(GDR_WEATHER_TEXT); + } + } http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-nar-bundles/nifi-html-bundle/pom.xml ---------------------------------------------------------------------- diff --cc nifi-nar-bundles/nifi-html-bundle/pom.xml index 0000000,186fef3..95d61de mode 000000,100644..100644 --- a/nifi-nar-bundles/nifi-html-bundle/pom.xml +++ b/nifi-nar-bundles/nifi-html-bundle/pom.xml @@@ -1,0 -1,43 +1,43 @@@ + <?xml version="1.0" encoding="UTF-8"?> + <!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + --> + <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-nar-bundles</artifactId> - <version>0.4.0-SNAPSHOT</version> ++ <version>0.4.2-SNAPSHOT</version> + </parent> + + <artifactId>nifi-html-bundle</artifactId> + <packaging>pom</packaging> + + <modules> + <module>nifi-html-processors</module> + <module>nifi-html-nar</module> + </modules> + + <dependencyManagement> + <dependencies> + <dependency> + <groupId>org.apache.nifi</groupId> + <artifactId>nifi-html-processors</artifactId> - <type>nar</type> ++ <version>0.4.2-SNAPSHOT</version> + </dependency> + </dependencies> + </dependencyManagement> + + </project> http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/nifi-nar-bundles/pom.xml ---------------------------------------------------------------------- diff --cc nifi-nar-bundles/pom.xml index 96ab012,5e3a97c..3bc915b --- a/nifi-nar-bundles/pom.xml +++ b/nifi-nar-bundles/pom.xml @@@ -48,8 -48,7 +48,9 @@@ <module>nifi-avro-bundle</module> <module>nifi-couchbase-bundle</module> <module>nifi-azure-bundle</module> + <module>nifi-ldap-iaa-providers-bundle</module> + <module>nifi-riemann-bundle</module> + <module>nifi-html-bundle</module> </modules> <dependencyManagement> <dependencies> http://git-wip-us.apache.org/repos/asf/nifi/blob/2c9fb676/pom.xml ---------------------------------------------------------------------- diff --cc pom.xml index 213888b,57b0193..501593f --- a/pom.xml +++ b/pom.xml @@@ -754,9 -726,9 +754,14 @@@ language governing permissions and limi <version>1.3.1</version> </dependency> <dependency> ++ <groupId>org.jsoup</groupId> ++ <artifactId>jsoup</artifactId> ++ <version>1.8.3</version> ++ </dependency> ++ <dependency> <groupId>org.apache.nifi</groupId> <artifactId>nifi-api</artifactId> - <version>0.4.0-SNAPSHOT</version> + <version>0.4.2-SNAPSHOT</version> </dependency> <dependency> <groupId>org.apache.nifi</groupId> @@@ -891,8 -863,14 +896,14 @@@ </dependency> <dependency> <groupId>org.apache.nifi</groupId> + <artifactId>nifi-html-nar</artifactId> - <version>0.4.0-SNAPSHOT</version> ++ <version>0.4.2-SNAPSHOT</version> + <type>nar</type> + </dependency> + <dependency> + <groupId>org.apache.nifi</groupId> <artifactId>nifi-kite-nar</artifactId> - <version>0.4.0-SNAPSHOT</version> + <version>0.4.2-SNAPSHOT</version> <type>nar</type> </dependency> <dependency>
