This is an automated email from the ASF dual-hosted git repository. rombert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-commons-html.git
commit fae7ec857766e4a2adb0acdaaa1fad3624c07271 Author: Robert Munteanu <[email protected]> AuthorDate: Mon Apr 24 07:34:50 2017 +0000 SLING-6783 updates for org.apache.commons.html Submitted-By: Jason Bailey git-svn-id: https://svn.apache.org/repos/asf/sling/trunk@1792430 13f79535-47bb-0310-9956-ffa450edef68 --- NOTICE | 2 +- README.md | 39 ++++++++++++++++++++++ pom.xml | 14 ++++---- .../sling/commons/html/impl/HtmlParserImpl.java | 36 ++++++++++++++++++-- 4 files changed, 79 insertions(+), 12 deletions(-) diff --git a/NOTICE b/NOTICE index be0c7d1..92f43bf 100644 --- a/NOTICE +++ b/NOTICE @@ -8,4 +8,4 @@ This product includes software developed at The Apache Software Foundation (http://www.apache.org/). This product includes software developed at -http://home.ccil.org/~cowan/XML/tagsoup/ \ No newline at end of file +http://vrici.lojban.org/~cowan/XML/tagsoup/ \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..106367a --- /dev/null +++ b/README.md @@ -0,0 +1,39 @@ +# current settings and their default values + +* http://xml.org/sax/features/namespaces=true +* http://xml.org/sax/features/namespace-prefixes=false +* http://xml.org/sax/features/external-general-entities=false +* http://xml.org/sax/features/external-parameter-entities=false +* http://xml.org/sax/features/is-standalone=false +* http://xml.org/sax/features/lexical-handler/parameter-entities=false +* http://xml.org/sax/features/resolve-dtd-uris=true +* http://xml.org/sax/features/string-interning=true +* http://xml.org/sax/features/use-attributes2=false +* http://xml.org/sax/features/use-locator2=false +* http://xml.org/sax/features/use-entity-resolver2=false +* http://xml.org/sax/features/validation=false +* http://xml.org/sax/features/xmlns-uris=false +* http://xml.org/sax/features/xmlns-uris=false +* http://xml.org/sax/features/xml-1.1=false + +default SAX features are defined here +http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html + +tagsoup specific features are + +* http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons=false + A value of "true" indicates that the parser will ignore unknown elements. +* http://www.ccil.org/~cowan/tagsoup/features/bogons-empty=false + A value of "true" indicates that the parser will give unknown elements a content model of EMPTY; a value of "false", a content model of ANY. +* http://www.ccil.org/~cowan/tagsoup/features/root-bogons=true + A value of "true" indicates that the parser will allow unknown elements to be the root of the output document. +* http://www.ccil.org/~cowan/tagsoup/features/default-attributes=true + A value of "true" indicates that the parser will return default attribute values for missing attributes that have default values. +* http://www.ccil.org/~cowan/tagsoup/features/translate-colons=false + A value of "true" indicates that the parser will translate colons into underscores in names. +* http://www.ccil.org/~cowan/tagsoup/features/restart-elements=true + A value of "true" indicates that the parser will attempt to restart the restartable elements. +* http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace=false + A value of "true" indicates that the parser will transmit whitespace in element-only content via the SAX ignorableWhitespace callback. Normally this is not done, because HTML is an SGML application and SGML suppresses such whitespace. +* http://www.ccil.org/~cowan/tagsoup/features/cdata-elements=true + A value of "true" indicates that the parser will process the script and style elements (or any elements with type='cdata' in the TSSL schema) as SGML CDATA elements (that is, no markup is recognized except the matching end-tag). diff --git a/pom.xml b/pom.xml index 97eddd9..f022b61 100644 --- a/pom.xml +++ b/pom.xml @@ -23,7 +23,7 @@ <parent> <groupId>org.apache.sling</groupId> <artifactId>sling</artifactId> - <version>26</version> + <version>30</version> <relativePath/> </parent> @@ -46,10 +46,6 @@ <plugins> <plugin> <groupId>org.apache.felix</groupId> - <artifactId>maven-scr-plugin</artifactId> - </plugin> - <plugin> - <groupId>org.apache.felix</groupId> <artifactId>maven-bundle-plugin</artifactId> <extensions>true</extensions> <configuration> @@ -85,11 +81,13 @@ <dependency> <groupId>org.ccil.cowan.tagsoup</groupId> <artifactId>tagsoup</artifactId> - <version>1.2</version> + <version>1.2.1</version> </dependency> <dependency> - <groupId>org.apache.felix</groupId> - <artifactId>org.apache.felix.scr.annotations</artifactId> + <groupId>org.apache.sling</groupId> + <artifactId>org.apache.sling.commons.osgi</artifactId> + <version>2.2.0</version> + <scope>provided</scope> </dependency> </dependencies> </project> diff --git a/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java b/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java index d3cd9b8..109e411 100644 --- a/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java +++ b/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java @@ -20,11 +20,16 @@ package org.apache.sling.commons.html.impl; import java.io.IOException; import java.io.InputStream; +import java.util.Map; -import org.apache.felix.scr.annotations.Component; -import org.apache.felix.scr.annotations.Service; import org.apache.sling.commons.html.HtmlParser; +import org.apache.sling.commons.osgi.PropertiesUtil; import org.ccil.cowan.tagsoup.Parser; +import org.osgi.service.component.annotations.Activate; +import org.osgi.service.component.annotations.Component; +import org.osgi.service.metatype.annotations.AttributeDefinition; +import org.osgi.service.metatype.annotations.Designate; +import org.osgi.service.metatype.annotations.ObjectClassDefinition; import org.w3c.dom.Document; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; @@ -32,8 +37,19 @@ import org.xml.sax.SAXException; import org.xml.sax.ext.LexicalHandler; @Component -@Service(value=HtmlParser.class) +@Designate(ocd = HtmlParserImpl.Config.class) public class HtmlParserImpl implements HtmlParser { + + @ObjectClassDefinition(name="Apache Sling HTML Parser", description="Parser configuration") + static @interface Config { + + @AttributeDefinition(name = "Parser Properties", + description = "Additional properties to be applied to the underlying parser in the format of key=[true|false]") + String[] properties(); + + } + + private Map<String,Boolean> features; /** * @see org.apache.sling.commons.html.HtmlParser#parse(java.io.InputStream, java.lang.String, org.xml.sax.ContentHandler) @@ -44,6 +60,9 @@ public class HtmlParserImpl implements HtmlParser { if ( ch instanceof LexicalHandler ) { parser.setProperty("http://xml.org/sax/properties/lexical-handler", ch); } + for (String feature : features.keySet()){ + parser.setProperty(feature, features.get(feature)); + } parser.setContentHandler(ch); final InputSource source = new InputSource(stream); source.setEncoding(encoding); @@ -68,6 +87,9 @@ public class HtmlParserImpl implements HtmlParser { try { parser.setProperty("http://xml.org/sax/properties/lexical-handler", builder); + for (String feature : features.keySet()) { + parser.setProperty(feature, features.get(feature)); + } parser.setContentHandler(builder); parser.parse(source); } catch (SAXException se) { @@ -78,4 +100,12 @@ public class HtmlParserImpl implements HtmlParser { } return builder.getDocument(); } + + @Activate + private void activate(Config config) { + Map<String,String> temp = PropertiesUtil.toMap(config.properties(), new String[]{}); + for (String key : temp.keySet()){ + features.put(key, Boolean.valueOf(temp.get(key))); + } + } } -- To stop receiving notification emails like this one, please contact "[email protected]" <[email protected]>.
