Author: markt
Date: Wed Sep 11 13:45:36 2013
New Revision: 1521837
URL: http://svn.apache.org/r1521837
Log:
Profiling indicates that UTF-8 encoding is the bottleneck. Harmony's
encoder is slightly faster than Java 7's so use Harmony's.
Added:
tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Encoder.java
Modified:
tomcat/trunk/java/org/apache/tomcat/websocket/WsRemoteEndpointImplBase.java
Added: tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Encoder.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Encoder.java?rev=1521837&view=auto
==============================================================================
--- tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Encoder.java (added)
+++ tomcat/trunk/java/org/apache/tomcat/util/buf/Utf8Encoder.java Wed Sep 11
13:45:36 2013
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tomcat.util.buf;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.StandardCharsets;
+
+/**
+ * Encodes characters as bytes using UTF-8. Extracted from Apache Harmony.
+ */
+public class Utf8Encoder extends CharsetEncoder {
+
+ public Utf8Encoder() {
+ super(StandardCharsets.UTF_8, 1.1f, 4.0f);
+ }
+
+ @Override
+ protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
+ if (in.hasArray() && out.hasArray()) {
+ return encodeHasArray(in, out);
+ }
+ return encodeNotHasArray(in, out);
+ }
+
+ private CoderResult encodeHasArray(CharBuffer in, ByteBuffer out) {
+ int outRemaining = out.remaining();
+ int pos = in.position();
+ int limit = in.limit();
+ byte[] bArr;
+ char[] cArr;
+ int x = pos;
+ bArr = out.array();
+ cArr = in.array();
+ int outPos = out.position();
+ int rem = in.remaining();
+ for (x = pos; x < pos + rem; x++) {
+ int jchar = (cArr[x] & 0xFFFF);
+
+ if (jchar <= 0x7F) {
+ if (outRemaining < 1) {
+ in.position(x);
+ out.position(outPos);
+ return CoderResult.OVERFLOW;
+ }
+ bArr[outPos++] = (byte) (jchar & 0xFF);
+ outRemaining--;
+ } else if (jchar <= 0x7FF) {
+
+ if (outRemaining < 2) {
+ in.position(x);
+ out.position(outPos);
+ return CoderResult.OVERFLOW;
+ }
+ bArr[outPos++] = (byte) (0xC0 + ((jchar >> 6) & 0x1F));
+ bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F));
+ outRemaining -= 2;
+
+ } else if (jchar >= 0xD800 && jchar <= 0xDFFF) {
+
+ // in has to have one byte more.
+ if (limit <= x + 1) {
+ in.position(x);
+ out.position(outPos);
+ return CoderResult.UNDERFLOW;
+ }
+
+ if (outRemaining < 4) {
+ in.position(x);
+ out.position(outPos);
+ return CoderResult.OVERFLOW;
+ }
+
+ // The surrogate pair starts with a low-surrogate.
+ if (jchar >= 0xDC00) {
+ in.position(x);
+ out.position(outPos);
+ return CoderResult.malformedForLength(1);
+ }
+
+ int jchar2 = cArr[x + 1] & 0xFFFF;
+
+ // The surrogate pair ends with a high-surrogate.
+ if (jchar2 < 0xDC00) {
+ in.position(x);
+ out.position(outPos);
+ return CoderResult.malformedForLength(1);
+ }
+
+ // Note, the Unicode scalar value n is defined
+ // as follows:
+ // n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000
+ // Where jchar is a high-surrogate,
+ // jchar2 is a low-surrogate.
+ int n = (jchar << 10) + jchar2 + 0xFCA02400;
+
+ bArr[outPos++] = (byte) (0xF0 + ((n >> 18) & 0x07));
+ bArr[outPos++] = (byte) (0x80 + ((n >> 12) & 0x3F));
+ bArr[outPos++] = (byte) (0x80 + ((n >> 6) & 0x3F));
+ bArr[outPos++] = (byte) (0x80 + (n & 0x3F));
+ outRemaining -= 4;
+ x++;
+
+ } else {
+
+ if (outRemaining < 3) {
+ in.position(x);
+ out.position(outPos);
+ return CoderResult.OVERFLOW;
+ }
+ bArr[outPos++] = (byte) (0xE0 + ((jchar >> 12) & 0x0F));
+ bArr[outPos++] = (byte) (0x80 + ((jchar >> 6) & 0x3F));
+ bArr[outPos++] = (byte) (0x80 + (jchar & 0x3F));
+ outRemaining -= 3;
+ }
+ if (outRemaining == 0) {
+ in.position(x + 1);
+ out.position(outPos);
+ return CoderResult.OVERFLOW;
+ }
+
+ }
+ if (rem != 0) {
+ in.position(x);
+ out.position(outPos);
+ }
+ return CoderResult.UNDERFLOW;
+ }
+
+ private CoderResult encodeNotHasArray(CharBuffer in, ByteBuffer out) {
+ int outRemaining = out.remaining();
+ int pos = in.position();
+ int limit = in.limit();
+ try {
+ while (pos < limit) {
+ if (outRemaining == 0) {
+ return CoderResult.OVERFLOW;
+ }
+
+ int jchar = (in.get() & 0xFFFF);
+
+ if (jchar <= 0x7F) {
+
+ if (outRemaining < 1) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put((byte) (jchar & 0xFF));
+ outRemaining--;
+
+ } else if (jchar <= 0x7FF) {
+
+ if (outRemaining < 2) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put((byte) (0xC0 + ((jchar >> 6) & 0x1F)));
+ out.put((byte) (0x80 + (jchar & 0x3F)));
+ outRemaining -= 2;
+
+ } else if (jchar >= 0xD800 && jchar <= 0xDFFF) {
+
+ // in has to have one byte more.
+ if (limit <= pos + 1) {
+ return CoderResult.UNDERFLOW;
+ }
+
+ if (outRemaining < 4) {
+ return CoderResult.OVERFLOW;
+ }
+
+ // The surrogate pair starts with a low-surrogate.
+ if (jchar >= 0xDC00) {
+ return CoderResult.malformedForLength(1);
+ }
+
+ int jchar2 = (in.get() & 0xFFFF);
+
+ // The surrogate pair ends with a high-surrogate.
+ if (jchar2 < 0xDC00) {
+ return CoderResult.malformedForLength(1);
+ }
+
+ // Note, the Unicode scalar value n is defined
+ // as follows:
+ // n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000
+ // Where jchar is a high-surrogate,
+ // jchar2 is a low-surrogate.
+ int n = (jchar << 10) + jchar2 + 0xFCA02400;
+
+ out.put((byte) (0xF0 + ((n >> 18) & 0x07)));
+ out.put((byte) (0x80 + ((n >> 12) & 0x3F)));
+ out.put((byte) (0x80 + ((n >> 6) & 0x3F)));
+ out.put((byte) (0x80 + (n & 0x3F)));
+ outRemaining -= 4;
+ pos++;
+
+ } else {
+
+ if (outRemaining < 3) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put((byte) (0xE0 + ((jchar >> 12) & 0x0F)));
+ out.put((byte) (0x80 + ((jchar >> 6) & 0x3F)));
+ out.put((byte) (0x80 + (jchar & 0x3F)));
+ outRemaining -= 3;
+ }
+ pos++;
+ }
+ } finally {
+ in.position(pos);
+ }
+ return CoderResult.UNDERFLOW;
+ }
+
+}
\ No newline at end of file
Modified:
tomcat/trunk/java/org/apache/tomcat/websocket/WsRemoteEndpointImplBase.java
URL:
http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/tomcat/websocket/WsRemoteEndpointImplBase.java?rev=1521837&r1=1521836&r2=1521837&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/tomcat/websocket/WsRemoteEndpointImplBase.java
(original)
+++ tomcat/trunk/java/org/apache/tomcat/websocket/WsRemoteEndpointImplBase.java
Wed Sep 11 13:45:36 2013
@@ -23,7 +23,6 @@ import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
-import java.nio.charset.StandardCharsets;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.List;
@@ -45,6 +44,7 @@ import javax.websocket.SendResult;
import org.apache.juli.logging.Log;
import org.apache.juli.logging.LogFactory;
+import org.apache.tomcat.util.buf.Utf8Encoder;
import org.apache.tomcat.util.res.StringManager;
public abstract class WsRemoteEndpointImplBase implements RemoteEndpoint {
@@ -75,7 +75,7 @@ public abstract class WsRemoteEndpointIm
// Max size of WebSocket header is 14 bytes
private final ByteBuffer headerBuffer = ByteBuffer.allocate(14);
private final ByteBuffer outputBuffer = ByteBuffer.allocate(8192);
- private final CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder();
+ private final CharsetEncoder encoder = new Utf8Encoder();
private final ByteBuffer encoderBuffer = ByteBuffer.allocate(8192);
private final AtomicBoolean batchingAllowed = new AtomicBoolean(false);
private volatile long sendTimeout = -1;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]