This is an automated email from the ASF dual-hosted git repository.

paulk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/groovy.git

commit 30f5dcfc610d573d5d831a6f02384c1134372fa7
Author: Paul King <[email protected]>
AuthorDate: Thu Sep 17 23:28:11 2020 +1000

    GROOVY-9745: Provide a way to filter illegal/discouraged characters when 
using MarkupBuilder (closes #1366)
---
 .../groovy/runtime/StringGroovyMethods.java        |  55 ++++++++
 .../vmplugin/v8/PluginDefaultGroovyMethods.java    |  11 ++
 .../src/main/java/groovy/xml/MarkupBuilder.java    | 141 +++++++--------------
 .../xml/markupsupport/AllControlToUndefined.java   |  34 +++++
 .../xml/markupsupport/DoubleQuoteFilter.java       |  29 +++++
 .../xml/markupsupport/SingleQuoteFilter.java       |  29 +++++
 .../markupsupport/StandardControlToUndefined.java  |  35 +++++
 .../markupsupport/StandardXmlAttributeFilter.java  |  39 ++++++
 .../xml/markupsupport/StandardXmlFilter.java       |  39 ++++++
 .../xml/markupsupport/SurrogateToUndefined.java    |  34 +++++
 .../StreamingMarkupWriter.java                     |  51 ++++----
 .../xml/MarkupBuilderIllegalCharactersSpec.groovy  |  31 ++---
 12 files changed, 393 insertions(+), 135 deletions(-)

diff --git a/src/main/java/org/codehaus/groovy/runtime/StringGroovyMethods.java 
b/src/main/java/org/codehaus/groovy/runtime/StringGroovyMethods.java
index 73faf6e..9baf1f8 100644
--- a/src/main/java/org/codehaus/groovy/runtime/StringGroovyMethods.java
+++ b/src/main/java/org/codehaus/groovy/runtime/StringGroovyMethods.java
@@ -49,8 +49,10 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
+import java.util.Optional;
 import java.util.Set;
 import java.util.StringTokenizer;
+import java.util.function.Function;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -678,6 +680,59 @@ public class StringGroovyMethods extends 
DefaultGroovyMethodsSupport {
     }
 
     /**
+     * Iterates through this String a character at a time collecting either the
+     * original character or a transformed replacement String.
+     * The return value is an {@code Optional} either having a value equal to 
the transformed replacement String
+     * or {@code empty()} to indicate that no transformation is required.
+     * <p>
+     * <pre class="groovyTestCase">
+     * import java.util.function.Function
+     * import static java.util.Optional.*
+     *
+     * Function<Character, Optional<String>> xform1 = s -> s == 'o' ? of('_O') 
: empty()
+     * Function<Character, Optional<String>> xform2 = { it == 'G' ? of('G_') : 
empty() }
+     * assert "Groovy".collectReplacements([xform1, xform2]) == 'G_r_O_Ovy'
+     * </pre>
+     *
+     * @param self the original String
+     * @param transforms one or more transforms which potentially convert a 
single character to a transformed string
+     * @return A new string in which all characters that require escaping
+     *         have been replaced with the corresponding replacements
+     *         as determined by the {@code transform} function.
+     *
+     * @since 3.0.6
+     */
+    public static String collectReplacements(final String self, final 
List<Function<Character, Optional<String>>> transforms) {
+        if (self == null) return self;
+
+        StringBuilder sb = null; // lazy create for edge-case efficiency
+        for (int i = 0, len = self.length(); i < len; i++) {
+            final char ch = self.charAt(i);
+            Optional<String> replacement = Optional.empty();
+            for (Function<Character, Optional<String>> next : transforms) {
+                replacement = next.apply(ch);
+                if (replacement.isPresent()) {
+                    break;
+                }
+            }
+
+            if (replacement.isPresent()) {
+                // output differs from input; we write to our local buffer
+                if (sb == null) {
+                    sb = new StringBuilder((int) (1.1 * len));
+                    sb.append(self, 0, i);
+                }
+                sb.append(replacement.get());
+            } else if (sb != null) {
+                // earlier output differs from input; we write to our local 
buffer
+                sb.append(ch);
+            }
+        }
+
+        return sb == null ? self : sb.toString();
+    }
+
+    /**
      * Processes each regex group matched substring of the given CharSequence. 
If the closure
      * parameter takes one argument, an array with all match groups is passed 
to it.
      * If the closure takes as many arguments as there are match groups, then 
each
diff --git 
a/src/main/java/org/codehaus/groovy/vmplugin/v8/PluginDefaultGroovyMethods.java 
b/src/main/java/org/codehaus/groovy/vmplugin/v8/PluginDefaultGroovyMethods.java
index 398a337..11fe6f6 100644
--- 
a/src/main/java/org/codehaus/groovy/vmplugin/v8/PluginDefaultGroovyMethods.java
+++ 
b/src/main/java/org/codehaus/groovy/vmplugin/v8/PluginDefaultGroovyMethods.java
@@ -54,6 +54,7 @@ import java.util.function.IntFunction;
 import java.util.function.IntPredicate;
 import java.util.function.LongFunction;
 import java.util.function.LongPredicate;
+import java.util.function.Supplier;
 import java.util.function.ToDoubleFunction;
 import java.util.function.ToIntFunction;
 import java.util.function.ToLongFunction;
@@ -870,4 +871,14 @@ public class PluginDefaultGroovyMethods extends 
DefaultGroovyMethodsSupport {
         }
         return DoubleStream.of(self.getAsDouble());
     }
+
+    /**
+     * Provide JDK9 {@code or} on JDK8.
+     */
+    public static <T> Optional<T> orElse(Optional<T> self, Supplier<? extends 
Optional<? extends T>> supplier) {
+        if (self.isPresent()) {
+            return self;
+        }
+        return (Optional<T>) supplier.get();
+    }
 }
diff --git a/subprojects/groovy-xml/src/main/java/groovy/xml/MarkupBuilder.java 
b/subprojects/groovy-xml/src/main/java/groovy/xml/MarkupBuilder.java
index 163a4d0..f11cf55 100644
--- a/subprojects/groovy-xml/src/main/java/groovy/xml/MarkupBuilder.java
+++ b/subprojects/groovy-xml/src/main/java/groovy/xml/MarkupBuilder.java
@@ -18,16 +18,25 @@
  */
 package groovy.xml;
 
-import groovy.lang.Closure;
 import groovy.namespace.QName;
 import groovy.util.BuilderSupport;
 import groovy.util.IndentPrinter;
+import groovy.xml.markupsupport.DoubleQuoteFilter;
+import groovy.xml.markupsupport.SingleQuoteFilter;
+import groovy.xml.markupsupport.StandardXmlAttributeFilter;
+import groovy.xml.markupsupport.StandardXmlFilter;
 import org.codehaus.groovy.runtime.StringGroovyMethods;
 
 import java.io.PrintWriter;
 import java.io.Writer;
+import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
+import java.util.Optional;
+import java.util.function.Function;
+
+import static 
org.codehaus.groovy.vmplugin.v8.PluginDefaultGroovyMethods.orElse;
 
 /**
  * A helper class for creating XML or HTML markup.
@@ -72,7 +81,15 @@ public class MarkupBuilder extends BuilderSupport {
     private boolean omitEmptyAttributes = false;
     private boolean expandEmptyElements = false;
     private boolean escapeAttributes = true;
-    private CharFilter characterFilter = CharFilter.NONE;
+    private List<Function<Character, Optional<String>>> additionalFilters = 
null;
+
+    public List<Function<Character, Optional<String>>> getAdditionalFilters() {
+        return additionalFilters;
+    }
+
+    public void setAdditionalFilters(List<Function<Character, 
Optional<String>>> additionalFilters) {
+        this.additionalFilters = additionalFilters;
+    }
 
     /**
      * Returns the escapeAttributes property value.
@@ -225,45 +242,6 @@ public class MarkupBuilder extends BuilderSupport {
         this.expandEmptyElements = expandEmptyElements;
     }
 
-    /**
-     * Returns the current character filter.
-     *
-     * @return the character filter used by this builder.
-     */
-    public CharFilter getCharacterFilter() { return this.characterFilter; }
-
-    /**
-     * Set a filter to limit the characters, that can appear in attribute 
values and text nodes.
-     * <p>
-     *     Some unicode character are either not allowed, discouraged or not 
referenceable  with an escape sequence
-     *     by specification. Especially XML parsers might have trouble dealing 
with some of those characters.
-     *     Since HTML strives for closeness to XML, filtering might be helpful 
there, too, albeit to a lesser degree.
-     * </p>
-     * <p>
-     *     Examples include null bytes (0x0), control characters (0x1C "file 
separator"), surrogates or non-characters.
-     *     If a filter policy is used, characters that fail to pass will be 
replaced by 0xFFFD (&#xFFFD;) in the output.
-     * </p>
-     * <p>
-     *     Available policies are:
-     *     <dl>
-     *         <dt>NONE (Default)</dt>
-     *         <dd>No filter is applied to the output</dd>
-     *         <dt>XML_ALL</dt>
-     *         <dd>
-     *             Allow all characters, that are neccessarily supported. 
According to the XML spec.<br>
-     *             Given as #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] 
( | [#x10000-#x10FFFF] )<br>
-     *             (as of Aug. 2020)
-     *         </dd>
-     *         <dt>XML_STRICT</dt>
-     *         <dd>
-     *             Filter out none-supported <it>and</it> discouraged 
characters, according to XML spec.
-     *         </dd>
-     *     </dl>
-     * </p>
-     * @param characterFilter character policy to use
-     */
-    public void setCharacterFilter(CharFilter characterFilter) { 
this.characterFilter = characterFilter; }
-
     protected IndentPrinter getPrinter() {
         return this.out;
     }
@@ -431,71 +409,38 @@ public class MarkupBuilder extends BuilderSupport {
      *         have been replaced with the corresponding XML entities.
      */
     private String escapeXmlValue(String value, boolean isAttrValue) {
-        if (value == null)
+        if (value == null) {
             throw new IllegalArgumentException();
-        return StringGroovyMethods.collectReplacements(value, new 
ReplacingClosure(isAttrValue, useDoubleQuotes, characterFilter));
+        }
+        List<Function<Character, Optional<String>>> transforms = new 
ArrayList<>();
+        transforms.add(new DefaultXmlEscapingFunction(isAttrValue, 
useDoubleQuotes));
+        if (additionalFilters != null) {
+            transforms.addAll(additionalFilters);
+        }
+        return StringGroovyMethods.collectReplacements(value, transforms);
     }
 
-    private static class ReplacingClosure extends Closure<String> {
+    public static class DefaultXmlEscapingFunction implements 
Function<Character, Optional<String>> {
         private final boolean isAttrValue;
-        private final boolean useDoubleQuotes;
-        private final CharFilter characterFilter;
 
-        public ReplacingClosure(boolean isAttrValue, boolean useDoubleQuotes, 
CharFilter characterFilter) {
-            super(null);
-            this.isAttrValue = isAttrValue;
-            this.useDoubleQuotes = useDoubleQuotes;
-            this.characterFilter = characterFilter;
-        }
+        private final Function<Character, Optional<String>> stdFilter = new 
StandardXmlFilter();
+        private final Function<Character, Optional<String>> attrFilter = new 
StandardXmlAttributeFilter();
+        private final Function<Character, Optional<String>> quoteFilter;
 
-        public String doCall(Character ch) {
-            switch (ch) {
-                case 0:
-                    if (characterFilter != CharFilter.NONE) return "\uFFFD";
-                    break;
-                case '&':
-                    return "&amp;";
-                case '<':
-                    return "&lt;";
-                case '>':
-                    return "&gt;";
-                case '\n':
-                    if (isAttrValue) return "&#10;";
-                    break;
-                case '\r':
-                    if (isAttrValue) return "&#13;";
-                    break;
-                case '\t':
-                    if (isAttrValue) return "&#09;";
-                    break;
-                case '"':
-                    // The double quote is only escaped if the value is for
-                    // an attribute and the builder is configured to output
-                    // attribute values inside double quotes.
-                    if (isAttrValue && useDoubleQuotes) return "&quot;";
-                    break;
-                case '\'':
-                    // The apostrophe is only escaped if the value is for an
-                    // attribute, as opposed to element content, and if the
-                    // builder is configured to surround attribute values with
-                    // single quotes.
-                    if (isAttrValue && !useDoubleQuotes) return "&apos;";
-                    break;
-            }
-            if (characterFilter != CharFilter.NONE) {
-                if (Character.isSurrogate(ch)
-                        || ch < 127 && ch !=  9 && ch != 10 && ch != 12 && ch 
!= 13) {
-                    return "\uFFFD";
-                }
-            }
-            if (characterFilter == CharFilter.XML_STRICT) {
-                if (Character.isISOControl(ch) || isNonCharacter(ch))  return 
"\uFFFD";
-            }
-            return null;
+        public DefaultXmlEscapingFunction(boolean isAttrValue, boolean 
useDoubleQuotes) {
+            this.isAttrValue = isAttrValue;
+            this.quoteFilter = useDoubleQuotes ? new DoubleQuoteFilter() : new 
SingleQuoteFilter();
         }
 
-        private boolean isNonCharacter(char ch) {
-            return 0xFDD0 <= ch && ch <= 0xFDEF || ((ch ^ 0xFFFE) == 0 || (ch 
^ 0xFFFF) == 0);
+        public Optional<String> apply(Character ch) {
+            return orElse(stdFilter.apply(ch),
+                    () -> {
+                        if (isAttrValue) {
+                            return orElse(attrFilter.apply(ch), () -> 
quoteFilter.apply(ch));
+                        }
+                        return Optional.empty();
+                    }
+            );
         }
     }
 
diff --git 
a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/AllControlToUndefined.java
 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/AllControlToUndefined.java
new file mode 100644
index 0000000..d85bf06
--- /dev/null
+++ 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/AllControlToUndefined.java
@@ -0,0 +1,34 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.xml.markupsupport;
+
+import java.util.Optional;
+
+public class AllControlToUndefined extends StandardControlToUndefined {
+    public Optional<String> apply(Character ch) {
+        if (Character.isISOControl(ch) || isNonCharacter(ch)) {
+            return Optional.of("\uFFFD");
+        }
+        return super.apply(ch);
+    }
+
+    private boolean isNonCharacter(char ch) {
+        return 0xFDD0 <= ch && ch <= 0xFDEF || ((ch ^ 0xFFFE) == 0 || (ch ^ 
0xFFFF) == 0);
+    }
+}
diff --git 
a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/DoubleQuoteFilter.java
 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/DoubleQuoteFilter.java
new file mode 100644
index 0000000..2545f5b
--- /dev/null
+++ 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/DoubleQuoteFilter.java
@@ -0,0 +1,29 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.xml.markupsupport;
+
+import java.util.Optional;
+import java.util.function.Function;
+
+public class DoubleQuoteFilter implements Function<Character, 
Optional<String>> {
+    public Optional<String> apply(Character ch) {
+        if (ch == '"') return Optional.of("&quot;");
+        else return Optional.empty();
+    }
+}
diff --git 
a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/SingleQuoteFilter.java
 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/SingleQuoteFilter.java
new file mode 100644
index 0000000..fee3cce
--- /dev/null
+++ 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/SingleQuoteFilter.java
@@ -0,0 +1,29 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.xml.markupsupport;
+
+import java.util.Optional;
+import java.util.function.Function;
+
+public class SingleQuoteFilter implements Function<Character, 
Optional<String>> {
+    public Optional<String> apply(Character ch) {
+        if (ch == '\'') return Optional.of("&apos;");
+        else return Optional.empty();
+    }
+}
diff --git 
a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardControlToUndefined.java
 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardControlToUndefined.java
new file mode 100644
index 0000000..d71f035
--- /dev/null
+++ 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardControlToUndefined.java
@@ -0,0 +1,35 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.xml.markupsupport;
+
+import java.util.Optional;
+import java.util.function.Function;
+
+public class StandardControlToUndefined implements Function<Character, 
Optional<String>> {
+    public Optional<String> apply(Character ch) {
+        if (ch < 32 && !isXmlAllowedControl(ch)) {
+            return Optional.of("\uFFFD");
+        }
+        return Optional.empty();
+    }
+
+    private boolean isXmlAllowedControl(char ch) {
+        return ch ==  9 || ch == 10 || ch == 12 || ch == 13;
+    }
+}
diff --git 
a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardXmlAttributeFilter.java
 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardXmlAttributeFilter.java
new file mode 100644
index 0000000..f612ef5
--- /dev/null
+++ 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardXmlAttributeFilter.java
@@ -0,0 +1,39 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.xml.markupsupport;
+
+import java.util.Optional;
+import java.util.function.Function;
+
+public class StandardXmlAttributeFilter implements Function<Character, 
Optional<String>> {
+    public Optional<String> apply(Character ch) {
+        String result = null;
+        switch (ch) {
+            case '\n':
+                result = "&#10;";
+                break;
+            case '\r':
+                result = "&#13;";
+                break;
+            case '\t':
+                result = "&#09;";
+        }
+        return Optional.ofNullable(result);
+    }
+}
diff --git 
a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardXmlFilter.java
 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardXmlFilter.java
new file mode 100644
index 0000000..771964a
--- /dev/null
+++ 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/StandardXmlFilter.java
@@ -0,0 +1,39 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.xml.markupsupport;
+
+import java.util.Optional;
+import java.util.function.Function;
+
+public class StandardXmlFilter implements Function<Character, 
Optional<String>> {
+    public Optional<String> apply(Character ch) {
+        String result = null;
+        switch (ch) {
+            case '&':
+                result = "&amp;";
+                break;
+            case '<':
+                result = "&lt;";
+                break;
+            case '>':
+                result = "&gt;";
+        }
+        return Optional.ofNullable(result);
+    }
+}
diff --git 
a/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/SurrogateToUndefined.java
 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/SurrogateToUndefined.java
new file mode 100644
index 0000000..2e8cf8a
--- /dev/null
+++ 
b/subprojects/groovy-xml/src/main/java/groovy/xml/markupsupport/SurrogateToUndefined.java
@@ -0,0 +1,34 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one
+ *  or more contributor license agreements.  See the NOTICE file
+ *  distributed with this work for additional information
+ *  regarding copyright ownership.  The ASF licenses this file
+ *  to you under the Apache License, Version 2.0 (the
+ *  "License"); you may not use this file except in compliance
+ *  with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing,
+ *  software distributed under the License is distributed on an
+ *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ *  KIND, either express or implied.  See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package groovy.xml.markupsupport;
+
+import java.util.Optional;
+import java.util.function.Function;
+
+/**
+ * Convert surrogate chars to XML undefined character.
+ */
+public class SurrogateToUndefined implements Function<Character, 
Optional<String>> {
+    public Optional<String> apply(Character ch) {
+        if (Character.isSurrogate(ch)) {
+            return Optional.of("\uFFFD");
+        }
+        return Optional.empty();
+    }
+}
diff --git 
a/subprojects/groovy-xml/src/main/java/groovy/xml/streamingmarkupsupport/StreamingMarkupWriter.java
 
b/subprojects/groovy-xml/src/main/java/groovy/xml/streamingmarkupsupport/StreamingMarkupWriter.java
index 6f35a52..ec73b79 100644
--- 
a/subprojects/groovy-xml/src/main/java/groovy/xml/streamingmarkupsupport/StreamingMarkupWriter.java
+++ 
b/subprojects/groovy-xml/src/main/java/groovy/xml/streamingmarkupsupport/StreamingMarkupWriter.java
@@ -19,12 +19,18 @@
 package groovy.xml.streamingmarkupsupport;
 
 import groovy.io.EncodingAwareBufferedWriter;
+import groovy.xml.markupsupport.DoubleQuoteFilter;
+import groovy.xml.markupsupport.SingleQuoteFilter;
+import groovy.xml.markupsupport.StandardXmlAttributeFilter;
+import groovy.xml.markupsupport.StandardXmlFilter;
 
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetEncoder;
+import java.util.Optional;
+import java.util.function.Function;
 
 public class StreamingMarkupWriter extends Writer {
     protected final Writer writer;
@@ -34,7 +40,9 @@ public class StreamingMarkupWriter extends Writer {
     protected boolean writingAttribute = false;
     protected boolean haveHighSurrogate = false;
     protected StringBuilder surrogatePair = new StringBuilder(2);
-    private boolean useDoubleQuotes;
+    private final Function<Character, Optional<String>> stdFilter = new 
StandardXmlFilter();
+    private final Function<Character, Optional<String>> attrFilter = new 
StandardXmlAttributeFilter();
+    private final Function<Character, Optional<String>> quoteFilter;
     private final Writer escapedWriter = new Writer() {
         /* (non-Javadoc)
         * @see java.io.Writer#close()
@@ -54,15 +62,12 @@ public class StreamingMarkupWriter extends Writer {
         * @see java.io.Writer#write(int)
         */
         public void write(final int c) throws IOException {
-            if (c == '<') {
-                StreamingMarkupWriter.this.writer.write("&lt;");
-            } else if (c == '>') {
-                StreamingMarkupWriter.this.writer.write("&gt;");
-            } else if (c == '&') {
-                StreamingMarkupWriter.this.writer.write("&amp;");
-            } else {
-                StreamingMarkupWriter.this.write(c);
+            Optional<String> transformed = stdFilter.apply((char) c);
+            if (transformed.isPresent()) {
+                StreamingMarkupWriter.this.writer.write(transformed.get());
+                return;
             }
+            StreamingMarkupWriter.this.write(c);
         }
 
         /* (non-Javadoc)
@@ -92,7 +97,7 @@ public class StreamingMarkupWriter extends Writer {
     }
 
     public StreamingMarkupWriter(final Writer writer, final String encoding, 
boolean useDoubleQuotes) {
-        this.useDoubleQuotes = useDoubleQuotes;
+        this.quoteFilter = useDoubleQuotes ? new DoubleQuoteFilter() : new 
SingleQuoteFilter();
         this.writer = writer;
 
         if (encoding != null) {
@@ -168,19 +173,21 @@ public class StreamingMarkupWriter extends Writer {
                 this.writer.write("&#x");
                 this.writer.write(Integer.toHexString(c));
                 this.writer.write(';');
-            } else if (c == '\'' && this.writingAttribute && !useDoubleQuotes) 
{
-                this.writer.write("&apos;");
-            } else if (c == '"' && this.writingAttribute && useDoubleQuotes) {
-                this.writer.write("&quot;");
-            } else if (c == '\n' && this.writingAttribute) {
-                this.writer.write("&#10;");
-            } else if (c == '\r' && this.writingAttribute) {
-                this.writer.write("&#13;");
-            } else if (c == '\t' && this.writingAttribute) {
-                this.writer.write("&#09;");
-            } else {
-                this.writer.write(c);
+                return;
+            }
+            if (this.writingAttribute) {
+                Optional<String> transformed = attrFilter.apply((char) c);
+                if (transformed.isPresent()) {
+                    this.writer.write(transformed.get());
+                    return;
+                }
+                transformed = quoteFilter.apply((char) c);
+                if (transformed.isPresent()) {
+                    this.writer.write(transformed.get());
+                    return;
+                }
             }
+            this.writer.write(c);
         }
     }
 
diff --git 
a/subprojects/groovy-xml/src/test/groovy/groovy/xml/MarkupBuilderIllegalCharactersSpec.groovy
 
b/subprojects/groovy-xml/src/test/groovy/groovy/xml/MarkupBuilderIllegalCharactersSpec.groovy
index d978d9c..bd5e368 100644
--- 
a/subprojects/groovy-xml/src/test/groovy/groovy/xml/MarkupBuilderIllegalCharactersSpec.groovy
+++ 
b/subprojects/groovy-xml/src/test/groovy/groovy/xml/MarkupBuilderIllegalCharactersSpec.groovy
@@ -18,6 +18,9 @@
  */
 package groovy.xml
 
+import groovy.xml.markupsupport.AllControlToUndefined
+import groovy.xml.markupsupport.StandardControlToUndefined
+import groovy.xml.markupsupport.SurrogateToUndefined
 import spock.lang.Specification
 
 class MarkupBuilderIllegalCharactersSpec extends Specification {
@@ -34,13 +37,13 @@ class MarkupBuilderIllegalCharactersSpec extends 
Specification {
         given:
         def writer
         def xml
-        def characterFilter = MarkupBuilder.CharFilter.XML_STRICT
+        def additionalFilters = [new SurrogateToUndefined(), new 
AllControlToUndefined()]
 
         expect:
         characters.each {
             writer = new StringWriter()
             xml = new MarkupBuilder(writer)
-            xml.characterFilter = characterFilter
+            xml.additionalFilters = additionalFilters
             def encoded = shouldFilter ? '\uFFFD' : it
 
             xml.tag(attr: it, it)
@@ -52,12 +55,12 @@ class MarkupBuilderIllegalCharactersSpec extends 
Specification {
 
         where:
         characterRange              | shouldFilter | characters
-        'Null'                      | true         | nullCharacter         // 
Not neccessarily XML, not allowed in HTML
-        'C0 control w/o whitespace' | true         | c0Controls            // 
Not neccessarily XML, not in HTML char references
+        'Null'                      | true         | nullCharacter         // 
Not necessarily XML, not allowed in HTML
+        'C0 control w/o whitespace' | true         | c0Controls            // 
Not necessarily XML, not in HTML char references
         'ext control I'             | true         | extControl1           // 
Discouraged XML, not in HTML char references
         'Next line NEL'             | true         | nextLine              // 
Not in HTML char references
         'ext control II'            | true         | extControl2           // 
Discouraged XML, not in HTML char references
-        'Surrogates'                | true         | surrogates            // 
Not neccessarily XML, not in HTML char references
+        'Surrogates'                | true         | surrogates            // 
Not necessarily XML, not in HTML char references
         'Non-characters I'          | true         | nonCharacters1        // 
Discouraged XML, not in HTML char references
         'Non-characters II'         | true         | nonCharacters2        // 
Discouraged XML, not in HTML char references
     }
@@ -66,13 +69,13 @@ class MarkupBuilderIllegalCharactersSpec extends 
Specification {
         given:
         def writer
         def xml
-        def characterFilter = MarkupBuilder.CharFilter.XML_ALL
+        def additionalFilters = [new SurrogateToUndefined(), new 
StandardControlToUndefined()]
 
         expect:
         characters.each {
             writer = new StringWriter()
             xml = new MarkupBuilder(writer)
-            xml.characterFilter = characterFilter
+            xml.additionalFilters = additionalFilters
             def encoded = shouldFilter ? '\uFFFD' : it
 
             xml.tag(attr: it, it)
@@ -84,12 +87,12 @@ class MarkupBuilderIllegalCharactersSpec extends 
Specification {
 
         where:
         characterRange              | shouldFilter | characters
-        'Null'                      | true         | nullCharacter         // 
Not neccessarily XML, not allowed in HTML
-        'C0 control w/o whitespace' | true         | c0Controls            // 
Not neccessarily XML, not in HTML char references
+        'Null'                      | true         | nullCharacter         // 
Not necessarily XML, not allowed in HTML
+        'C0 control w/o whitespace' | true         | c0Controls            // 
Not necessarily XML, not in HTML char references
         'ext control I'             | false        | extControl1           // 
Discouraged XML, not in HTML char references
         'Next line NEL'             | false        | nextLine              // 
Not in HTML char references
         'ext control II'            | false        | extControl2           // 
Discouraged XML, not in HTML char references
-        'Surrogates'                | true         | surrogates            // 
Not neccessarily XML, not in HTML char references
+        'Surrogates'                | true         | surrogates            // 
Not necessarily XML, not in HTML char references
         'Non-characters I'          | false        | nonCharacters1        // 
Discouraged XML, not in HTML char references
         'Non-characters II'         | false        | nonCharacters2        // 
Discouraged XML, not in HTML char references
     }
@@ -98,13 +101,11 @@ class MarkupBuilderIllegalCharactersSpec extends 
Specification {
         given:
         def writer
         def xml
-        def characterFilter = MarkupBuilder.CharFilter.NONE
 
         expect:
         characters.each {
             writer = new StringWriter()
             xml = new MarkupBuilder(writer)
-            xml.characterFilter = characterFilter
             def encoded = shouldFilter ? '\uFFFD' : it
 
             xml.tag(attr: it, it)
@@ -116,12 +117,12 @@ class MarkupBuilderIllegalCharactersSpec extends 
Specification {
 
         where:
         characterRange              | shouldFilter | characters
-        'Null'                      | false        | nullCharacter         // 
Not neccessarily XML, not allowed in HTML
-        'C0 control w/o whitespace' | false        | c0Controls            // 
Not neccessarily XML, not in HTML char references
+        'Null'                      | false        | nullCharacter         // 
Not necessarily XML, not allowed in HTML
+        'C0 control w/o whitespace' | false        | c0Controls            // 
Not necessarily XML, not in HTML char references
         'ext control I'             | false        | extControl1           // 
Discouraged XML, not in HTML char references
         'Next line NEL'             | false        | nextLine              // 
Not in HTML char references
         'ext control II'            | false        | extControl2           // 
Discouraged XML, not in HTML char references
-        'Surrogates'                | false        | surrogates            // 
Not neccessarily XML, not in HTML char references
+        'Surrogates'                | false        | surrogates            // 
Not necessarily XML, not in HTML char references
         'Non-characters I'          | false        | nonCharacters1        // 
Discouraged XML, not in HTML char references
         'Non-characters II'         | false        | nonCharacters2        // 
Discouraged XML, not in HTML char references
     }

Reply via email to