Hi Sherman,

Am 11.01.2013 06:47, schrieb Xueming Shen:
(1) sp++ at Ln#159 probably can be moved into ln#155? the local sp here should
     not change the real "sp" after "break+return".
(2) maybe the "overflowflag" can just be replaced by "CoderResult cr", with 
init value
     of CoderResult.UNDERFLOW;",  then "cr = CoderResult.OVERFLOW" at ln#182, 
and
     simply "return cr" at ln#193, without another "if".

I've enhanced your suggestions, see more below...
Additionally, some part of encodeArrayLoop(...) maybe could be moved into a separate method, to be reused by encode(char[] src, int sp, int len, byte[] dst).
Some of the re-engineering could be adapted to the Decoder methods.

I'm surprised we only get 1.6% boost.  Maybe it is worth measuring the 
performance
of some "small" buf/array encoding? I'm a little concerned that the overhead may
slow down the small size buf/array encoding. There is a simple benchmark for 
String
en/decoding at test/sun/nio/cs/StrCodingBenchmark.

I think we should balance 4 cases in rating the performance:
a) few loops, small buf/array
b) few loops, big buf/array
c) many loops, small buf/array
d) many loops, big buf/array
In a), b) the loop surrounding code will not be JIT-compiled, so should be 
optimized for interpreter.
In c) d) the loop surrounding code *may be* JIT-compiled and consequtively inline the inner loop, should be verified. In b), d) the small inner loop, as demonstrated below, will be JIT-compiled, regardless if moved into separate method.

-Ulf

1) Check for (sp >= sl) is superfluous.
======================================
private static int copyISOs(
        char[] sa, int sp, byte[] da, int dp, int len) {
    int i = 0;
    for (; i < len; i++) {
        char c = sa[sp++];
        // if (c & '\uFF00' != 0) // needs bug 6935994 to be fixed, would be 
fastest
        if ((byte)(c >> 8) != 0) // temporary replacement, fast byte-length 
operation on x86
            break;
        da[dp++] = (byte)c;
    }
    return i;
}

private CoderResult encodeArrayLoop(
        CharBuffer src, ByteBuffer dst) {
    char[] sa = src.array();
    int soff = src.arrayOffset();
    int sp = soff + src.position();
    int sr = src.remaining();
    int sl = sp + sr;
    byte[] da = dst.array();
    int doff = dst.arrayOffset();
    int dp = doff + dst.position();
    int dr = dst.remaining();
    CoderResult cr;
    if (dr < sr) {
        sr = dr;
        cr = CoderResult.OVERFLOW;
    } else
        cr = CoderResult.UNDERFLOW;
    try {
        int ret = copyISOs(sa, sp, da, dp, sr);
        sp = sp + ret;
        dp = dp + ret;
        if (ret != sr) {
            if (sgp.parse(sa[sp], sa, sp, sl) < 0)
                return sgp.error();
            return sgp.unmappableResult();
        }
        return cr;
    } finally {
        src.position(sp - soff);
        dst.position(dp - doff);
    }
}

2) Avoids sp, dp to be recalculated; shorter surrounding code -> best chance to 
become JIT-compiled.
======================================
// while inlinig, JIT will erase the surrounding int[] p
private static boolean copyISOs(
        char[] sa, byte[] da, final int[] p, int sl) {
    for (int sp = p[0], dp = p[1]; sp < sl; sp++) {
        char c = sa[sp];
        // if (c & '\uFF00' != 0) // needs bug 6935994 to be fixed, would be 
fastest
        if ((byte)(c >> 8) != 0) // temporary replacement, fast byte-length 
operation on x86
            return false;
        da[dp++] = (byte)c;
    }
    return true;
}

private CoderResult encodeArrayLoop(
        CharBuffer src, ByteBuffer dst) {
    char[] sa = src.array();
    int soff = src.arrayOffset();
    int sp = soff + src.position();
    int sr = src.remaining();
    byte[] da = dst.array();
    int doff = dst.arrayOffset();
    int dp = doff + dst.position();
    int dr = dst.remaining();
    CoderResult cr;
    if (dr < sr) {
        sr = dr;
        cr = CoderResult.OVERFLOW;
    } else
        cr = CoderResult.UNDERFLOW;
    try {
        int sl = sp + sr;
        final int[] p = { sp, dp };
        if (!copyISOs(sa, da, p, sl)) {
            if (sgp.parse(sa[sp], sa, sp, sl) < 0)
                return sgp.error();
            return sgp.unmappableResult();
        }
        return cr;
    } finally {
        src.position(sp - soff);
        dst.position(dp - doff);
    }
}

3) No more needs try...finally block.
======================================
private CoderResult encodeArrayLoop(
        CharBuffer src, ByteBuffer dst) {
    char[] sa = src.array();
    int soff = src.arrayOffset();
    int sp = soff + src.position();
    int sr = src.remaining();
    byte[] da = dst.array();
    int doff = dst.arrayOffset();
    int dp = doff + dst.position();
    int dr = dst.remaining();
    CoderResult cr;
    if (dr < sr) {
        sr = dr;
        cr = CoderResult.OVERFLOW;
    } else
        cr = CoderResult.UNDERFLOW;
    int sl = sp + sr;
    for (; sp < sl; sp++) {
        char c = sa[sp];
        // if (c & '\uFF00' != 0) { // needs bug 6935994 to be fixed, would be 
fastest
        if ((byte)(c >> 8) != 0) { // temporary replacement, fast byte-length 
operation on x86
            cr = null;
            break;
        }
        da[dp++] = (byte)c;
    }
    src.position(sp - soff);
    dst.position(dp - doff);
    return result(cr, sa, sp, sl);
}

// if adapted, maybe could also be reused in encodeBufferLoop()
private static CoderResult result(CoderResult cr, byte[] sa, int sp, int sl) {
    return cr != null ? cr :
        sgp.parse(sa[sp], sa, sp, sl) < 0
            ? sgp.error();
            : sgp.unmappableResult();
}

Reply via email to