This is an automated email from the ASF dual-hosted git repository. paulk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/groovy.git
commit 30f5dcfc610d573d5d831a6f02384c1134372fa7 Author: Paul King <[email protected]> AuthorDate: Thu Sep 17 23:28:11 2020 +1000 GROOVY-9745: Provide a way to filter illegal/discouraged characters when using MarkupBuilder (closes #1366) --- .../groovy/runtime/StringGroovyMethods.java | 55 ++++++++ .../vmplugin/v8/PluginDefaultGroovyMethods.java | 11 ++ .../src/main/java/groovy/xml/MarkupBuilder.java | 141 +++++++-------------- .../xml/markupsupport/AllControlToUndefined.java | 34 +++++ .../xml/markupsupport/DoubleQuoteFilter.java | 29 +++++ .../xml/markupsupport/SingleQuoteFilter.java | 29 +++++ .../markupsupport/StandardControlToUndefined.java | 35 +++++ .../markupsupport/StandardXmlAttributeFilter.java | 39 ++++++ .../xml/markupsupport/StandardXmlFilter.java | 39 ++++++ .../xml/markupsupport/SurrogateToUndefined.java | 34 +++++ .../StreamingMarkupWriter.java | 51 ++++---- .../xml/MarkupBuilderIllegalCharactersSpec.groovy | 31 ++--- 12 files changed, 393 insertions(+), 135 deletions(-) diff --git a/src/main/java/org/codehaus/groovy/runtime/StringGroovyMethods.java b/src/main/java/org/codehaus/groovy/runtime/StringGroovyMethods.java index 73faf6e..9baf1f8 100644 --- a/src/main/java/org/codehaus/groovy/runtime/StringGroovyMethods.java +++ b/src/main/java/org/codehaus/groovy/runtime/StringGroovyMethods.java @@ -49,8 +49,10 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; +import java.util.Optional; import java.util.Set; import java.util.StringTokenizer; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -678,6 +680,59 @@ public class StringGroovyMethods extends DefaultGroovyMethodsSupport { } /** + * Iterates through this String a character at a time collecting either the + * original character or a transformed replacement String. + * The return value is an {@code Optional} either having a value equal to the transformed replacement String + * or {@code empty()} to indicate that no transformation is required. + * <p> + * <pre class="groovyTestCase"> + * import java.util.function.Function + * import static java.util.Optional.* + * + * Function<Character, Optional<String>> xform1 = s -> s == 'o' ? of('_O') : empty() + * Function<Character, Optional<String>> xform2 = { it == 'G' ? of('G_') : empty() } + * assert "Groovy".collectReplacements([xform1, xform2]) == 'G_r_O_Ovy' + * </pre> + * + * @param self the original String + * @param transforms one or more transforms which potentially convert a single character to a transformed string + * @return A new string in which all characters that require escaping + * have been replaced with the corresponding replacements + * as determined by the {@code transform} function. + * + * @since 3.0.6 + */ + public static String collectReplacements(final String self, final List<Function<Character, Optional<String>>> transforms) { + if (self == null) return self; + + StringBuilder sb = null; // lazy create for edge-case efficiency + for (int i = 0, len = self.length(); i < len; i++) { + final char ch = self.charAt(i); + Optional<String> replacement = Optional.empty(); + for (Function<Character, Optional<String>> next : transforms) { + replacement = next.apply(ch); + if (replacement.isPresent()) { + break; + } + } + + if (replacement.isPresent()) { + // output differs from input; we write to our local buffer + if (sb == null) { + sb = new StringBuilder((int) (1.1 * len)); + sb.append(self, 0, i); + } + sb.append(replacement.get()); + } else if (sb != null) { + // earlier output differs from input; we write to our local buffer + sb.append(ch); + } + } + + return sb == null ? self : sb.toString(); + } + + /** * Processes each regex group matched substring of the given CharSequence. If the closure * parameter takes one argument, an array with all match groups is passed to it. * If the closure takes as many arguments as there are match groups, then each diff --git a/src/main/java/org/codehaus/groovy/vmplugin/v8/PluginDefaultGroovyMethods.java b/src/main/java/org/codehaus/groovy/vmplugin/v8/PluginDefaultGroovyMethods.java index 398a337..11fe6f6 100644 --- a/src/main/java/org/codehaus/groovy/vmplugin/v8/PluginDefaultGroovyMethods.java +++ b/src/main/java/org/codehaus/groovy/vmplugin/v8/PluginDefaultGroovyMethods.java @@ -54,6 +54,7 @@ import java.util.function.IntFunction; import java.util.function.IntPredicate; import java.util.function.LongFunction; import java.util.function.LongPredicate; +import java.util.function.Supplier; import java.util.function.ToDoubleFunction; import java.util.function.ToIntFunction; import java.util.function.ToLongFunction; @@ -870,4 +871,14 @@ public class PluginDefaultGroovyMethods extends DefaultGroovyMethodsSupport { } return DoubleStream.of(self.getAsDouble()); } + + /** + * Provide JDK9 {@code or} on JDK8. + */ + public static <T> Optional<T> orElse(Optional<T> self, Supplier<? extends Optional<? extends T>> supplier) { + if (self.isPresent()) { + return self; + } + return (Optional<T>) supplier.get(); + } } diff --git a/subprojects/groovy-xml/src/main/java/groovy/xml/MarkupBuilder.java b/subprojects/groovy-xml/src/main/java/groovy/xml/MarkupBuilder.java index 163a4d0..f11cf55 100644 --- a/subprojects/groovy-xml/src/main/java/groovy/xml/MarkupBuilder.java +++ b/subprojects/groovy-xml/src/main/java/groovy/xml/MarkupBuilder.java @@ -18,16 +18,25 @@ */ package groovy.xml; -import groovy.lang.Closure; import groovy.namespace.QName; import groovy.util.BuilderSupport; import groovy.util.IndentPrinter; +import groovy.xml.markupsupport.DoubleQuoteFilter; +import groovy.xml.markupsupport.SingleQuoteFilter; +import groovy.xml.markupsupport.StandardXmlAttributeFilter; +import groovy.xml.markupsupport.StandardXmlFilter; import org.codehaus.groovy.runtime.StringGroovyMethods; import java.io.PrintWriter; import java.io.Writer; +import java.util.ArrayList; import java.util.Iterator; +import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.function.Function; + +import static org.codehaus.groovy.vmplugin.v8.PluginDefaultGroovyMethods.orElse; /** * A helper class for creating XML or HTML markup. @@ -72,7 +81,15 @@ public class MarkupBuilder extends BuilderSupport { private boolean omitEmptyAttributes = false; private boolean expandEmptyElements = false; private boolean escapeAttributes = true; - private CharFilter characterFilter = CharFilter.NONE; + private List<Function<Character, Optional<String>>> additionalFilters = null; + + public List<Function<Character, Optional<String>>> getAdditionalFilters() { + return additionalFilters; + } + + public void setAdditionalFilters(List<Function<Character, Optional<String>>> additionalFilters) { + this.additionalFilters = additionalFilters; + } /** * Returns the escapeAttributes property value. @@ -225,45 +242,6 @@ public class MarkupBuilder extends BuilderSupport { this.expandEmptyElements = expandEmptyElements; } - /** - * Returns the current character filter. - * - * @return the character filter used by this builder. - */ - public CharFilter getCharacterFilter() { return this.characterFilter; } - - /** - * Set a filter to limit the characters, that can appear in attribute values and text nodes. - * <p> - * Some unicode character are either not allowed, discouraged or not referenceable with an escape sequence - * by specification. Especially XML parsers might have trouble dealing with some of those characters. - * Since HTML strives for closeness to XML, filtering might be helpful there, too, albeit to a lesser degree. - * </p> - * <p> - * Examples include null bytes (0x0), control characters (0x1C "file separator"), surrogates or non-characters. - * If a filter policy is used, characters that fail to pass will be replaced by 0xFFFD (�) in the output. - * </p> - * <p> - * Available policies are: - * <dl> - * <dt>NONE (Default)</dt> - * <dd>No filter is applied to the output</dd> - * <dt>XML_ALL</dt> - * <dd> - * Allow all characters, that are neccessarily supported. According to the XML spec.<br> - * Given as #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] ( | [#x10000-#x10FFFF] )<br> - * (as of Aug. 2020) - * </dd> - * <dt>XML_STRICT</dt> - * <dd> - * Filter out none-supported <it>and</it> discouraged characters, according to XML spec. - * </dd> - * </dl> - * </p> - * @param characterFilter character policy to use - */ - public void setCharacterFilter(CharFilter characterFilter) { this.characterFilter = characterFilter; } - protected IndentPrinter getPrinter() { return this.out; } @@ -431,71 +409,38 @@ public class MarkupBuilder extends BuilderSupport { * have been replaced with the corresponding XML entities. */ private String escapeXmlValue(String value, boolean isAttrValue) { - if (value == null) + if (value == null) { throw new IllegalArgumentException(); - return StringGroovyMethods.collectReplacements(value, new ReplacingClosure(isAttrValue, useDoubleQuotes, characterFilter)); + } + List<Function<Character, Optional<String>>> transforms = new ArrayList<>(); + transforms.add(new DefaultXmlEscapingFunction(isAttrValue, useDoubleQuotes)); + if (additionalFilters != null) { + transforms.addAll(additionalFilters); + } + return StringGroovyMethods.collectReplacements(value, transforms); } - private static class ReplacingClosure extends Closure<String> { + public static class DefaultXmlEscapingFunction implements Function<Character, Optional<String>> { private final boolean isAttrValue; - private final boolean useDoubleQuotes; - private final CharFilter characterFilter; - public ReplacingClosure(boolean isAttrValue, boolean useDoubleQuotes, CharFilter characterFilter) { - super(null); - this.isAttrValue = isAttrValue; - this.useDoubleQuotes = useDoubleQuotes; - this.characterFilter = characterFilter; - } + private final Function<Character, Optional<String>> stdFilter = new StandardXmlFilter(); + private final Function<Character, Optional<String>> attrFilter = new StandardXmlAttributeFilter(); + private final Function<Character, Optional<String>> quoteFilter; - public String doCall(Character ch) { - switch (ch) { - case 0: - if (characterFilter != CharFilter.NONE) return "\uFFFD"; - break; - case '&': - return "&"; - case '<': - return "<"; - case '>': - return ">"; - case '\n': - if (isAttrValue) return " "; - break; - case '\r': - if (isAttrValue) return " "; - break; - case '\t': - if (isAttrValue) return "	"; - break; - case '"': - // The double quote is only escaped if the value is for - // an attribute and the builder is configured to output - // attribute values inside double quotes. - if (isAttrValue && useDoubleQuotes) return """; - break; - case '\'': - // The apostrophe is only escaped if the value is for an - // attribute, as opposed to element content, and if the - // builder is configured to surround attribute values with - // single quotes. - if (isAttrValue && !useDoubleQuotes) return "'"; - break; - } - if (characterFilter != CharFilter.NONE) { - if (Character.isSurrogate(ch) - || ch < 127 && ch != 9 && ch != 10 && ch != 12 && ch != 13) { - return "\uFFFD"; - } - } - if (characterFilter == CharFilter.XML_STRICT) { - if (Character.isISOControl(ch) || isNonCharacter(ch)) return "\uFFFD"; - } - return null; + public DefaultXmlEscapingFunction(boolean isAttrValue, boolean useDoubleQuotes) { + this.isAttrValue = isAttrValue; + this.quoteFilter = useDoubleQuotes ? new DoubleQuoteFilter() : new SingleQuoteFilter(); } - private boolean isNonCharacter(char ch) { - return 0xFDD0 <= ch && ch <= 0xFDEF || ((ch ^ 0xFFFE) == 0 || (ch ^ 0xFFFF) == 0); + public Optional<String> apply(Character ch) { + return orElse(stdFilter.apply(ch), + () -> { + if (isAttrValue) { + return orElse(attrFilter.apply(ch), () -> quoteFilter.apply(ch)); + } + return Optional.empty(); + } + ); } } diff --git a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/AllControlToUndefined.java b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/AllControlToUndefined.java new file mode 100644 index 0000000..d85bf06 --- /dev/null +++ b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/AllControlToUndefined.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package groovy.xml.markupsupport; + +import java.util.Optional; + +public class AllControlToUndefined extends StandardControlToUndefined { + public Optional<String> apply(Character ch) { + if (Character.isISOControl(ch) || isNonCharacter(ch)) { + return Optional.of("\uFFFD"); + } + return super.apply(ch); + } + + private boolean isNonCharacter(char ch) { + return 0xFDD0 <= ch && ch <= 0xFDEF || ((ch ^ 0xFFFE) == 0 || (ch ^ 0xFFFF) == 0); + } +} diff --git a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/DoubleQuoteFilter.java b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/DoubleQuoteFilter.java new file mode 100644 index 0000000..2545f5b --- /dev/null +++ b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/DoubleQuoteFilter.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package groovy.xml.markupsupport; + +import java.util.Optional; +import java.util.function.Function; + +public class DoubleQuoteFilter implements Function<Character, Optional<String>> { + public Optional<String> apply(Character ch) { + if (ch == '"') return Optional.of("""); + else return Optional.empty(); + } +} diff --git a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/SingleQuoteFilter.java b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/SingleQuoteFilter.java new file mode 100644 index 0000000..fee3cce --- /dev/null +++ b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/SingleQuoteFilter.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package groovy.xml.markupsupport; + +import java.util.Optional; +import java.util.function.Function; + +public class SingleQuoteFilter implements Function<Character, Optional<String>> { + public Optional<String> apply(Character ch) { + if (ch == '\'') return Optional.of("'"); + else return Optional.empty(); + } +} diff --git a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardControlToUndefined.java b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardControlToUndefined.java new file mode 100644 index 0000000..d71f035 --- /dev/null +++ b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardControlToUndefined.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package groovy.xml.markupsupport; + +import java.util.Optional; +import java.util.function.Function; + +public class StandardControlToUndefined implements Function<Character, Optional<String>> { + public Optional<String> apply(Character ch) { + if (ch < 32 && !isXmlAllowedControl(ch)) { + return Optional.of("\uFFFD"); + } + return Optional.empty(); + } + + private boolean isXmlAllowedControl(char ch) { + return ch == 9 || ch == 10 || ch == 12 || ch == 13; + } +} diff --git a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardXmlAttributeFilter.java b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardXmlAttributeFilter.java new file mode 100644 index 0000000..f612ef5 --- /dev/null +++ b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardXmlAttributeFilter.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package groovy.xml.markupsupport; + +import java.util.Optional; +import java.util.function.Function; + +public class StandardXmlAttributeFilter implements Function<Character, Optional<String>> { + public Optional<String> apply(Character ch) { + String result = null; + switch (ch) { + case '\n': + result = " "; + break; + case '\r': + result = " "; + break; + case '\t': + result = "	"; + } + return Optional.ofNullable(result); + } +} diff --git a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardXmlFilter.java b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardXmlFilter.java new file mode 100644 index 0000000..771964a --- /dev/null +++ b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardXmlFilter.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package groovy.xml.markupsupport; + +import java.util.Optional; +import java.util.function.Function; + +public class StandardXmlFilter implements Function<Character, Optional<String>> { + public Optional<String> apply(Character ch) { + String result = null; + switch (ch) { + case '&': + result = "&"; + break; + case '<': + result = "<"; + break; + case '>': + result = ">"; + } + return Optional.ofNullable(result); + } +} diff --git a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/SurrogateToUndefined.java b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/SurrogateToUndefined.java new file mode 100644 index 0000000..2e8cf8a --- /dev/null +++ b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/SurrogateToUndefined.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package groovy.xml.markupsupport; + +import java.util.Optional; +import java.util.function.Function; + +/** + * Convert surrogate chars to XML undefined character. + */ +public class SurrogateToUndefined implements Function<Character, Optional<String>> { + public Optional<String> apply(Character ch) { + if (Character.isSurrogate(ch)) { + return Optional.of("\uFFFD"); + } + return Optional.empty(); + } +} diff --git a/subprojects/groovy-xml/src/main/java/groovy/xml/streamingmarkupsupport/StreamingMarkupWriter.java b/subprojects/groovy-xml/src/main/java/groovy/xml/streamingmarkupsupport/StreamingMarkupWriter.java index 6f35a52..ec73b79 100644 --- a/subprojects/groovy-xml/src/main/java/groovy/xml/streamingmarkupsupport/StreamingMarkupWriter.java +++ b/subprojects/groovy-xml/src/main/java/groovy/xml/streamingmarkupsupport/StreamingMarkupWriter.java @@ -19,12 +19,18 @@ package groovy.xml.streamingmarkupsupport; import groovy.io.EncodingAwareBufferedWriter; +import groovy.xml.markupsupport.DoubleQuoteFilter; +import groovy.xml.markupsupport.SingleQuoteFilter; +import groovy.xml.markupsupport.StandardXmlAttributeFilter; +import groovy.xml.markupsupport.StandardXmlFilter; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; +import java.util.Optional; +import java.util.function.Function; public class StreamingMarkupWriter extends Writer { protected final Writer writer; @@ -34,7 +40,9 @@ public class StreamingMarkupWriter extends Writer { protected boolean writingAttribute = false; protected boolean haveHighSurrogate = false; protected StringBuilder surrogatePair = new StringBuilder(2); - private boolean useDoubleQuotes; + private final Function<Character, Optional<String>> stdFilter = new StandardXmlFilter(); + private final Function<Character, Optional<String>> attrFilter = new StandardXmlAttributeFilter(); + private final Function<Character, Optional<String>> quoteFilter; private final Writer escapedWriter = new Writer() { /* (non-Javadoc) * @see java.io.Writer#close() @@ -54,15 +62,12 @@ public class StreamingMarkupWriter extends Writer { * @see java.io.Writer#write(int) */ public void write(final int c) throws IOException { - if (c == '<') { - StreamingMarkupWriter.this.writer.write("<"); - } else if (c == '>') { - StreamingMarkupWriter.this.writer.write(">"); - } else if (c == '&') { - StreamingMarkupWriter.this.writer.write("&"); - } else { - StreamingMarkupWriter.this.write(c); + Optional<String> transformed = stdFilter.apply((char) c); + if (transformed.isPresent()) { + StreamingMarkupWriter.this.writer.write(transformed.get()); + return; } + StreamingMarkupWriter.this.write(c); } /* (non-Javadoc) @@ -92,7 +97,7 @@ public class StreamingMarkupWriter extends Writer { } public StreamingMarkupWriter(final Writer writer, final String encoding, boolean useDoubleQuotes) { - this.useDoubleQuotes = useDoubleQuotes; + this.quoteFilter = useDoubleQuotes ? new DoubleQuoteFilter() : new SingleQuoteFilter(); this.writer = writer; if (encoding != null) { @@ -168,19 +173,21 @@ public class StreamingMarkupWriter extends Writer { this.writer.write("&#x"); this.writer.write(Integer.toHexString(c)); this.writer.write(';'); - } else if (c == '\'' && this.writingAttribute && !useDoubleQuotes) { - this.writer.write("'"); - } else if (c == '"' && this.writingAttribute && useDoubleQuotes) { - this.writer.write("""); - } else if (c == '\n' && this.writingAttribute) { - this.writer.write(" "); - } else if (c == '\r' && this.writingAttribute) { - this.writer.write(" "); - } else if (c == '\t' && this.writingAttribute) { - this.writer.write("	"); - } else { - this.writer.write(c); + return; + } + if (this.writingAttribute) { + Optional<String> transformed = attrFilter.apply((char) c); + if (transformed.isPresent()) { + this.writer.write(transformed.get()); + return; + } + transformed = quoteFilter.apply((char) c); + if (transformed.isPresent()) { + this.writer.write(transformed.get()); + return; + } } + this.writer.write(c); } } diff --git a/subprojects/groovy-xml/src/test/groovy/groovy/xml/MarkupBuilderIllegalCharactersSpec.groovy b/subprojects/groovy-xml/src/test/groovy/groovy/xml/MarkupBuilderIllegalCharactersSpec.groovy index d978d9c..bd5e368 100644 --- a/subprojects/groovy-xml/src/test/groovy/groovy/xml/MarkupBuilderIllegalCharactersSpec.groovy +++ b/subprojects/groovy-xml/src/test/groovy/groovy/xml/MarkupBuilderIllegalCharactersSpec.groovy @@ -18,6 +18,9 @@ */ package groovy.xml +import groovy.xml.markupsupport.AllControlToUndefined +import groovy.xml.markupsupport.StandardControlToUndefined +import groovy.xml.markupsupport.SurrogateToUndefined import spock.lang.Specification class MarkupBuilderIllegalCharactersSpec extends Specification { @@ -34,13 +37,13 @@ class MarkupBuilderIllegalCharactersSpec extends Specification { given: def writer def xml - def characterFilter = MarkupBuilder.CharFilter.XML_STRICT + def additionalFilters = [new SurrogateToUndefined(), new AllControlToUndefined()] expect: characters.each { writer = new StringWriter() xml = new MarkupBuilder(writer) - xml.characterFilter = characterFilter + xml.additionalFilters = additionalFilters def encoded = shouldFilter ? '\uFFFD' : it xml.tag(attr: it, it) @@ -52,12 +55,12 @@ class MarkupBuilderIllegalCharactersSpec extends Specification { where: characterRange | shouldFilter | characters - 'Null' | true | nullCharacter // Not neccessarily XML, not allowed in HTML - 'C0 control w/o whitespace' | true | c0Controls // Not neccessarily XML, not in HTML char references + 'Null' | true | nullCharacter // Not necessarily XML, not allowed in HTML + 'C0 control w/o whitespace' | true | c0Controls // Not necessarily XML, not in HTML char references 'ext control I' | true | extControl1 // Discouraged XML, not in HTML char references 'Next line NEL' | true | nextLine // Not in HTML char references 'ext control II' | true | extControl2 // Discouraged XML, not in HTML char references - 'Surrogates' | true | surrogates // Not neccessarily XML, not in HTML char references + 'Surrogates' | true | surrogates // Not necessarily XML, not in HTML char references 'Non-characters I' | true | nonCharacters1 // Discouraged XML, not in HTML char references 'Non-characters II' | true | nonCharacters2 // Discouraged XML, not in HTML char references } @@ -66,13 +69,13 @@ class MarkupBuilderIllegalCharactersSpec extends Specification { given: def writer def xml - def characterFilter = MarkupBuilder.CharFilter.XML_ALL + def additionalFilters = [new SurrogateToUndefined(), new StandardControlToUndefined()] expect: characters.each { writer = new StringWriter() xml = new MarkupBuilder(writer) - xml.characterFilter = characterFilter + xml.additionalFilters = additionalFilters def encoded = shouldFilter ? '\uFFFD' : it xml.tag(attr: it, it) @@ -84,12 +87,12 @@ class MarkupBuilderIllegalCharactersSpec extends Specification { where: characterRange | shouldFilter | characters - 'Null' | true | nullCharacter // Not neccessarily XML, not allowed in HTML - 'C0 control w/o whitespace' | true | c0Controls // Not neccessarily XML, not in HTML char references + 'Null' | true | nullCharacter // Not necessarily XML, not allowed in HTML + 'C0 control w/o whitespace' | true | c0Controls // Not necessarily XML, not in HTML char references 'ext control I' | false | extControl1 // Discouraged XML, not in HTML char references 'Next line NEL' | false | nextLine // Not in HTML char references 'ext control II' | false | extControl2 // Discouraged XML, not in HTML char references - 'Surrogates' | true | surrogates // Not neccessarily XML, not in HTML char references + 'Surrogates' | true | surrogates // Not necessarily XML, not in HTML char references 'Non-characters I' | false | nonCharacters1 // Discouraged XML, not in HTML char references 'Non-characters II' | false | nonCharacters2 // Discouraged XML, not in HTML char references } @@ -98,13 +101,11 @@ class MarkupBuilderIllegalCharactersSpec extends Specification { given: def writer def xml - def characterFilter = MarkupBuilder.CharFilter.NONE expect: characters.each { writer = new StringWriter() xml = new MarkupBuilder(writer) - xml.characterFilter = characterFilter def encoded = shouldFilter ? '\uFFFD' : it xml.tag(attr: it, it) @@ -116,12 +117,12 @@ class MarkupBuilderIllegalCharactersSpec extends Specification { where: characterRange | shouldFilter | characters - 'Null' | false | nullCharacter // Not neccessarily XML, not allowed in HTML - 'C0 control w/o whitespace' | false | c0Controls // Not neccessarily XML, not in HTML char references + 'Null' | false | nullCharacter // Not necessarily XML, not allowed in HTML + 'C0 control w/o whitespace' | false | c0Controls // Not necessarily XML, not in HTML char references 'ext control I' | false | extControl1 // Discouraged XML, not in HTML char references 'Next line NEL' | false | nextLine // Not in HTML char references 'ext control II' | false | extControl2 // Discouraged XML, not in HTML char references - 'Surrogates' | false | surrogates // Not neccessarily XML, not in HTML char references + 'Surrogates' | false | surrogates // Not necessarily XML, not in HTML char references 'Non-characters I' | false | nonCharacters1 // Discouraged XML, not in HTML char references 'Non-characters II' | false | nonCharacters2 // Discouraged XML, not in HTML char references }
