This is an automated email from the ASF dual-hosted git repository.

rjung pushed a commit to branch 9.0.x
in repository https://gitbox.apache.org/repos/asf/tomcat.git


The following commit(s) were added to refs/heads/9.0.x by this push:
     new 275a1b1c1f Improve performance of escaping in AccessLogValve roughly 
by a factor of two.
275a1b1c1f is described below

commit 275a1b1c1fadd996987be075304640393298dde8
Author: Rainer Jung <rainer.j...@kippdata.de>
AuthorDate: Thu Apr 27 09:53:58 2023 +0200

    Improve performance of escaping in AccessLogValve roughly by a factor of 
two.
    
    - this is in the hot code path
    - move most of the special cases behind the typical case of characters
      which do not need escaping
    - as long as we encounter characters that to not need escaping,
      don't output them char by char but instead remember where this
      char sequence started and output them all at once when the next
      char which needs escaping is encountered (or at the end of the input).
---
 .../catalina/valves/AbstractAccessLogValve.java    | 93 +++++++++++++++-------
 .../valves/TestAbstractAccessLogValveEscape.java   |  9 ++-
 webapps/docs/changelog.xml                         |  4 +
 3 files changed, 77 insertions(+), 29 deletions(-)

diff --git a/java/org/apache/catalina/valves/AbstractAccessLogValve.java 
b/java/org/apache/catalina/valves/AbstractAccessLogValve.java
index c285fd7d66..567d5c1f1c 100644
--- a/java/org/apache/catalina/valves/AbstractAccessLogValve.java
+++ b/java/org/apache/catalina/valves/AbstractAccessLogValve.java
@@ -1858,37 +1858,74 @@ public abstract class AbstractAccessLogValve extends 
ValveBase implements Access
             return;
         }
 
-        for (char c : input.toCharArray()) {
-            switch (c) {
-                // " and \
-                case '\\':
-                    dest.append("\\\\");
-                    break;
-                case '\"':
-                    dest.append("\\\"");
-                    break;
-                // Standard C escapes for whitespace (not all standard C 
escapes)
-                case '\f':
-                    dest.append("\\f");
-                    break;
-                case '\n':
-                    dest.append("\\n");
-                    break;
-                case '\r':
-                    dest.append("\\r");
-                    break;
-                case '\t':
-                    dest.append("\\t");
-                    break;
-                default:
-                    // Control, delete (127) or above 127
-                    if (c < 32 || c > 126) {
+        int len = input.length();
+        // As long as we don't encounter chars that need escaping,
+        // we only remember start and length of that string part.
+        // "next" is the start of the string part containing these chars,
+        // "current - 1" is its end. So writing from "next" with length
+        // "current - next" writes that part.
+        // We write that part whenever we find a character to escape and the
+        // unchanged and unwritten string part is not empty.
+        int next = 0;
+        char c;
+        for (int current = 0; current < len; current++) {
+            c = input.charAt(current);
+            // Fast path
+            if (c >= 32 && c < 127) {
+                // special case " and \
+                switch (c) {
+                    case '\\': // dec 92
+                        // Write unchanged string parts
+                        if (current > next) {
+                            dest.write(input, next, current - next);
+                        }
+                        next = current + 1;
+                        dest.append("\\\\");
+                        break;
+                    case '\"': // dec 34
+                        // Write unchanged string parts
+                        if (current > next) {
+                            dest.write(input, next, current - next);
+                        }
+                        next = current + 1;
+                        dest.append("\\\"");
+                        break;
+                    // Don't output individual unchanged chars,
+                    // write the sub string only when the first char to encode
+                    // is encountered plus at the end.
+                    default:
+                }
+            // Control (1-31), delete (127) or above 127
+            } else {
+                // Write unchanged string parts
+                if (current > next) {
+                    dest.write(input, next, current - next);
+                }
+                next = current + 1;
+                switch (c) {
+                    // Standard escapes for some control chars
+                    case '\f': // dec 12
+                        dest.append("\\f");
+                        break;
+                    case '\n': // dec 10
+                        dest.append("\\n");
+                        break;
+                    case '\r': // dec 13
+                        dest.append("\\r");
+                        break;
+                    case '\t': // dec 09
+                        dest.append("\\t");
+                        break;
+                    // Unicode escape \\uXXXX
+                    default:
                         dest.append("\\u");
                         dest.append(HexUtils.toHexString(c));
-                    } else {
-                        dest.append(c);
-                    }
+                }
             }
         }
+        // Write remaining unchanged string parts
+        if (len > next) {
+            dest.write(input, next, len - next);
+        }
     }
 }
diff --git 
a/test/org/apache/catalina/valves/TestAbstractAccessLogValveEscape.java 
b/test/org/apache/catalina/valves/TestAbstractAccessLogValveEscape.java
index b0e0f3fabe..d6f5bc47ff 100644
--- a/test/org/apache/catalina/valves/TestAbstractAccessLogValveEscape.java
+++ b/test/org/apache/catalina/valves/TestAbstractAccessLogValveEscape.java
@@ -40,12 +40,19 @@ public class TestAbstractAccessLogValveEscape {
         parameters.add(new String[] { null, "-" });
         parameters.add(new String[] { "", "-" });
         parameters.add(new String[] { "ok", "ok" });
+        parameters.add(new String[] { "o\fk", "o\\fk" });
+        parameters.add(new String[] { "o\nk", "o\\nk" });
+        parameters.add(new String[] { "o\rk", "o\\rk" });
         parameters.add(new String[] { "o\tk", "o\\tk" });
+        parameters.add(new String[] { "o\"k", "o\\\"k" });
+        parameters.add(new String[] { "o\\k", "o\\\\k" });
         parameters.add(new String[] { "o\u0002k", "o\\u0002k" });
         parameters.add(new String[] { "o\u007fk", "o\\u007fk" });
         parameters.add(new String[] { "o\u0080k", "o\\u0080k" });
         parameters.add(new String[] { "o\u00ffk", "o\\u00ffk" });
-        parameters.add(new String[] { "o\"k", "o\\\"k" });
+        parameters.add(new String[] { "o\u8765k", "o\\u8765k" });
+        parameters.add(new String[] { "12345\u0002\u00036\t789\"", 
"12345\\u0002\\u00036\\t789\\\"" });
+        parameters.add(new String[] { "\u0002\u00036\t789\"12345", 
"\\u0002\\u00036\\t789\\\"12345" });
 
         return parameters;
     }
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index 72870aa8df..78dceb480e 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -120,6 +120,10 @@
         Change output of vertical tab in <code>AccessLogValve</code> from
        <code>\v</code> to <code>\u000b</code>. (rjung)
       </update>
+      <update>
+        Improve performance of escaping in <code>AccessLogValve</code>
+       roughly by a factor of two. (rjung)
+      </update>
     </changelog>
   </subsection>
   <subsection name="Coyote">


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to