Author: markt Date: Wed Sep 11 13:45:36 2013 New Revision: 1521837 URL: http://svn.apache.org/r1521837 Log: Profiling indicates that UTF-8 encoding is the bottleneck. Harmony's encoder is slightly faster than Java 7's so use Harmony's.
Added: tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Encoder.java Modified: tomcat/trunk/java/org/apache/tomcat/websocket/WsRemoteEndpointImplBase.java Added: tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Encoder.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Encoder.java?rev=1521837&view=auto ============================================================================== --- tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Encoder.java (added) +++ tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Encoder.java Wed Sep 11 13:45:36 2013 @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tomcat.util.buf; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.nio.charset.StandardCharsets; + +/** + * Encodes characters as bytes using UTF-8. Extracted from Apache Harmony. + */ +public class Utf8Encoder extends CharsetEncoder { + + public Utf8Encoder() { + super(StandardCharsets.UTF_8, 1.1f, 4.0f); + } + + @Override + protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { + if (in.hasArray() && out.hasArray()) { + return encodeHasArray(in, out); + } + return encodeNotHasArray(in, out); + } + + private CoderResult encodeHasArray(CharBuffer in, ByteBuffer out) { + int outRemaining = out.remaining(); + int pos = in.position(); + int limit = in.limit(); + byte[] bArr; + char[] cArr; + int x = pos; + bArr = out.array(); + cArr = in.array(); + int outPos = out.position(); + int rem = in.remaining(); + for (x = pos; x < pos + rem; x++) { + int jchar = (cArr[x] & 0xFFFF); + + if (jchar <= 0x7F) { + if (outRemaining < 1) { + in.position(x); + out.position(outPos); + return CoderResult.OVERFLOW; + } + bArr[outPos++] = (byte) (jchar & 0xFF); + outRemaining--; + } else if (jchar <= 0x7FF) { + + if (outRemaining < 2) { + in.position(x); + out.position(outPos); + return CoderResult.OVERFLOW; + } + bArr[outPos++] = (byte) (0xC0 + ((jchar >> 6) & 0x1F)); + bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F)); + outRemaining -= 2; + + } else if (jchar >= 0xD800 && jchar <= 0xDFFF) { + + // in has to have one byte more. + if (limit <= x + 1) { + in.position(x); + out.position(outPos); + return CoderResult.UNDERFLOW; + } + + if (outRemaining < 4) { + in.position(x); + out.position(outPos); + return CoderResult.OVERFLOW; + } + + // The surrogate pair starts with a low-surrogate. + if (jchar >= 0xDC00) { + in.position(x); + out.position(outPos); + return CoderResult.malformedForLength(1); + } + + int jchar2 = cArr[x + 1] & 0xFFFF; + + // The surrogate pair ends with a high-surrogate. + if (jchar2 < 0xDC00) { + in.position(x); + out.position(outPos); + return CoderResult.malformedForLength(1); + } + + // Note, the Unicode scalar value n is defined + // as follows: + // n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000 + // Where jchar is a high-surrogate, + // jchar2 is a low-surrogate. + int n = (jchar << 10) + jchar2 + 0xFCA02400; + + bArr[outPos++] = (byte) (0xF0 + ((n >> 18) & 0x07)); + bArr[outPos++] = (byte) (0x80 + ((n >> 12) & 0x3F)); + bArr[outPos++] = (byte) (0x80 + ((n >> 6) & 0x3F)); + bArr[outPos++] = (byte) (0x80 + (n & 0x3F)); + outRemaining -= 4; + x++; + + } else { + + if (outRemaining < 3) { + in.position(x); + out.position(outPos); + return CoderResult.OVERFLOW; + } + bArr[outPos++] = (byte) (0xE0 + ((jchar >> 12) & 0x0F)); + bArr[outPos++] = (byte) (0x80 + ((jchar >> 6) & 0x3F)); + bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F)); + outRemaining -= 3; + } + if (outRemaining == 0) { + in.position(x + 1); + out.position(outPos); + return CoderResult.OVERFLOW; + } + + } + if (rem != 0) { + in.position(x); + out.position(outPos); + } + return CoderResult.UNDERFLOW; + } + + private CoderResult encodeNotHasArray(CharBuffer in, ByteBuffer out) { + int outRemaining = out.remaining(); + int pos = in.position(); + int limit = in.limit(); + try { + while (pos < limit) { + if (outRemaining == 0) { + return CoderResult.OVERFLOW; + } + + int jchar = (in.get() & 0xFFFF); + + if (jchar <= 0x7F) { + + if (outRemaining < 1) { + return CoderResult.OVERFLOW; + } + out.put((byte) (jchar & 0xFF)); + outRemaining--; + + } else if (jchar <= 0x7FF) { + + if (outRemaining < 2) { + return CoderResult.OVERFLOW; + } + out.put((byte) (0xC0 + ((jchar >> 6) & 0x1F))); + out.put((byte) (0x80 + (jchar & 0x3F))); + outRemaining -= 2; + + } else if (jchar >= 0xD800 && jchar <= 0xDFFF) { + + // in has to have one byte more. + if (limit <= pos + 1) { + return CoderResult.UNDERFLOW; + } + + if (outRemaining < 4) { + return CoderResult.OVERFLOW; + } + + // The surrogate pair starts with a low-surrogate. + if (jchar >= 0xDC00) { + return CoderResult.malformedForLength(1); + } + + int jchar2 = (in.get() & 0xFFFF); + + // The surrogate pair ends with a high-surrogate. + if (jchar2 < 0xDC00) { + return CoderResult.malformedForLength(1); + } + + // Note, the Unicode scalar value n is defined + // as follows: + // n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000 + // Where jchar is a high-surrogate, + // jchar2 is a low-surrogate. + int n = (jchar << 10) + jchar2 + 0xFCA02400; + + out.put((byte) (0xF0 + ((n >> 18) & 0x07))); + out.put((byte) (0x80 + ((n >> 12) & 0x3F))); + out.put((byte) (0x80 + ((n >> 6) & 0x3F))); + out.put((byte) (0x80 + (n & 0x3F))); + outRemaining -= 4; + pos++; + + } else { + + if (outRemaining < 3) { + return CoderResult.OVERFLOW; + } + out.put((byte) (0xE0 + ((jchar >> 12) & 0x0F))); + out.put((byte) (0x80 + ((jchar >> 6) & 0x3F))); + out.put((byte) (0x80 + (jchar & 0x3F))); + outRemaining -= 3; + } + pos++; + } + } finally { + in.position(pos); + } + return CoderResult.UNDERFLOW; + } + +} \ No newline at end of file Modified: tomcat/trunk/java/org/apache/tomcat/websocket/WsRemoteEndpointImplBase.java URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/tomcat/websocket/WsRemoteEndpointImplBase.java?rev=1521837&r1=1521836&r2=1521837&view=diff ============================================================================== --- tomcat/trunk/java/org/apache/tomcat/websocket/WsRemoteEndpointImplBase.java (original) +++ tomcat/trunk/java/org/apache/tomcat/websocket/WsRemoteEndpointImplBase.java Wed Sep 11 13:45:36 2013 @@ -23,7 +23,6 @@ import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharsetEncoder; import java.nio.charset.CoderResult; -import java.nio.charset.StandardCharsets; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.List; @@ -45,6 +44,7 @@ import javax.websocket.SendResult; import org.apache.juli.logging.Log; import org.apache.juli.logging.LogFactory; +import org.apache.tomcat.util.buf.Utf8Encoder; import org.apache.tomcat.util.res.StringManager; public abstract class WsRemoteEndpointImplBase implements RemoteEndpoint { @@ -75,7 +75,7 @@ public abstract class WsRemoteEndpointIm // Max size of WebSocket header is 14 bytes private final ByteBuffer headerBuffer = ByteBuffer.allocate(14); private final ByteBuffer outputBuffer = ByteBuffer.allocate(8192); - private final CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder(); + private final CharsetEncoder encoder = new Utf8Encoder(); private final ByteBuffer encoderBuffer = ByteBuffer.allocate(8192); private final AtomicBoolean batchingAllowed = new AtomicBoolean(false); private volatile long sendTimeout = -1; --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org