On Jan 11, 2013, at 10:19 AM, Ulf Zibis <ulf.zi...@cosoco.de> wrote:

> Hi Sherman,
> 
> Am 11.01.2013 06:47, schrieb Xueming Shen:
>> (1) sp++ at Ln#159 probably can be moved into ln#155? the local sp here 
>> should
>>     not change the real "sp" after "break+return".
>> (2) maybe the "overflowflag" can just be replaced by "CoderResult cr", with 
>> init value
>>     of CoderResult.UNDERFLOW;",  then "cr = CoderResult.OVERFLOW" at ln#182, 
>> and
>>     simply "return cr" at ln#193, without another "if".
> 
> I've enhanced your suggestions, see more below...
> Additionally, some part of encodeArrayLoop(...) maybe could be moved into a 
> separate method, to be reused by encode(char[] src, int sp, int len, byte[] 
> dst).
> Some of the re-engineering could be adapted to the Decoder methods.
> 
>> I'm surprised we only get 1.6% boost.  Maybe it is worth measuring the 
>> performance
>> of some "small" buf/array encoding? I'm a little concerned that the overhead 
>> may
>> slow down the small size buf/array encoding. There is a simple benchmark for 
>> String
>> en/decoding at test/sun/nio/cs/StrCodingBenchmark.
> 
> I think we should balance 4 cases in rating the performance:
> a) few loops, small buf/array
> b) few loops, big buf/array
> c) many loops, small buf/array
> d) many loops, big buf/array
> In a), b) the loop surrounding code will not be JIT-compiled, so should be 
> optimized for interpreter.
> In c) d) the loop surrounding code *may be* JIT-compiled and consequtively 
> inline the inner loop, should be verified.
> In b), d) the small inner loop, as demonstrated below, will be JIT-compiled, 
> regardless if moved into separate method.

But you guys noticed that sentence in the initial review request, right?

"Move encoding loop into separate method for which VM will use intrinsic on 
x86."

Just wanted to make sure ;-)

-- Chris

> 
> -Ulf
> 
> 1) Check for (sp >= sl) is superfluous.
> ======================================
> private static int copyISOs(
>        char[] sa, int sp, byte[] da, int dp, int len) {
>    int i = 0;
>    for (; i < len; i++) {
>        char c = sa[sp++];
>        // if (c & '\uFF00' != 0) // needs bug 6935994 to be fixed, would be 
> fastest
>        if ((byte)(c >> 8) != 0) // temporary replacement, fast byte-length 
> operation on x86
>            break;
>        da[dp++] = (byte)c;
>    }
>    return i;
> }
> 
> private CoderResult encodeArrayLoop(
>        CharBuffer src, ByteBuffer dst) {
>    char[] sa = src.array();
>    int soff = src.arrayOffset();
>    int sp = soff + src.position();
>    int sr = src.remaining();
>    int sl = sp + sr;
>    byte[] da = dst.array();
>    int doff = dst.arrayOffset();
>    int dp = doff + dst.position();
>    int dr = dst.remaining();
>    CoderResult cr;
>    if (dr < sr) {
>        sr = dr;
>        cr = CoderResult.OVERFLOW;
>    } else
>        cr = CoderResult.UNDERFLOW;
>    try {
>        int ret = copyISOs(sa, sp, da, dp, sr);
>        sp = sp + ret;
>        dp = dp + ret;
>        if (ret != sr) {
>            if (sgp.parse(sa[sp], sa, sp, sl) < 0)
>                return sgp.error();
>            return sgp.unmappableResult();
>        }
>        return cr;
>    } finally {
>        src.position(sp - soff);
>        dst.position(dp - doff);
>    }
> }
> 
> 2) Avoids sp, dp to be recalculated; shorter surrounding code -> best chance 
> to become JIT-compiled.
> ======================================
> // while inlinig, JIT will erase the surrounding int[] p
> private static boolean copyISOs(
>        char[] sa, byte[] da, final int[] p, int sl) {
>    for (int sp = p[0], dp = p[1]; sp < sl; sp++) {
>        char c = sa[sp];
>        // if (c & '\uFF00' != 0) // needs bug 6935994 to be fixed, would be 
> fastest
>        if ((byte)(c >> 8) != 0) // temporary replacement, fast byte-length 
> operation on x86
>            return false;
>        da[dp++] = (byte)c;
>    }
>    return true;
> }
> 
> private CoderResult encodeArrayLoop(
>        CharBuffer src, ByteBuffer dst) {
>    char[] sa = src.array();
>    int soff = src.arrayOffset();
>    int sp = soff + src.position();
>    int sr = src.remaining();
>    byte[] da = dst.array();
>    int doff = dst.arrayOffset();
>    int dp = doff + dst.position();
>    int dr = dst.remaining();
>    CoderResult cr;
>    if (dr < sr) {
>        sr = dr;
>        cr = CoderResult.OVERFLOW;
>    } else
>        cr = CoderResult.UNDERFLOW;
>    try {
>        int sl = sp + sr;
>        final int[] p = { sp, dp };
>        if (!copyISOs(sa, da, p, sl)) {
>            if (sgp.parse(sa[sp], sa, sp, sl) < 0)
>                return sgp.error();
>            return sgp.unmappableResult();
>        }
>        return cr;
>    } finally {
>        src.position(sp - soff);
>        dst.position(dp - doff);
>    }
> }
> 
> 3) No more needs try...finally block.
> ======================================
> private CoderResult encodeArrayLoop(
>        CharBuffer src, ByteBuffer dst) {
>    char[] sa = src.array();
>    int soff = src.arrayOffset();
>    int sp = soff + src.position();
>    int sr = src.remaining();
>    byte[] da = dst.array();
>    int doff = dst.arrayOffset();
>    int dp = doff + dst.position();
>    int dr = dst.remaining();
>    CoderResult cr;
>    if (dr < sr) {
>        sr = dr;
>        cr = CoderResult.OVERFLOW;
>    } else
>        cr = CoderResult.UNDERFLOW;
>    int sl = sp + sr;
>    for (; sp < sl; sp++) {
>        char c = sa[sp];
>        // if (c & '\uFF00' != 0) { // needs bug 6935994 to be fixed, would be 
> fastest
>        if ((byte)(c >> 8) != 0) { // temporary replacement, fast byte-length 
> operation on x86
>            cr = null;
>            break;
>        }
>        da[dp++] = (byte)c;
>    }
>    src.position(sp - soff);
>    dst.position(dp - doff);
>    return result(cr, sa, sp, sl);
> }
> 
> // if adapted, maybe could also be reused in encodeBufferLoop()
> private static CoderResult result(CoderResult cr, byte[] sa, int sp, int sl) {
>    return cr != null ? cr :
>        sgp.parse(sa[sp], sa, sp, sl) < 0
>            ? sgp.error();
>            : sgp.unmappableResult();
> }
> 

Reply via email to