On Mon, 10 Jun 2024 12:12:58 GMT, Shaojin Wen <d...@openjdk.org> wrote:
> After PR https://github.com/openjdk/jdk/pull/16245, C2 optimizes stores into > primitive arrays by combining values into larger stores. > > This PR rewrites the code of appendNull and append(boolean) methods so that > these two methods can be optimized by C2. # 1. Compare with the master branch 1. master (`a6fc2f8`) https://github.com/wenshao/jdk/tree/upstream_master_a6fc2f8 2. current (`5e815`) https://github.com/wenshao/jdk/tree/optim_str_builder_append_202406 # 2. Benchmark Commands make test TEST="micro:java.lang.StringBuilders.toStringCharWithBool8" make test TEST="micro:java.lang.StringBuilders.toStringCharWithNull8" # 3. Benchmark Numbers The performance numbers under MacBookPro M1 Pro are as follows: -Benchmark Mode Cnt Score Error Units #master (a6fc2f8) -StringBuilders.toStringCharWithBool8Latin1 avgt 15 7.371 ? 0.003 ns/op -StringBuilders.toStringCharWithBool8Utf16 avgt 15 9.613 ? 0.018 ns/op -StringBuilders.toStringCharWithNull8Latin1 avgt 15 7.071 ? 0.003 ns/op -StringBuilders.toStringCharWithNull8Utf16 avgt 15 9.296 ? 0.016 ns/op +Benchmark Mode Cnt Score Error Units #current (5e815) +StringBuilders.toStringCharWithBool8Latin1 avgt 15 6.515 ? 0.121 ns/op +11.61% +StringBuilders.toStringCharWithBool8Utf16 avgt 15 8.654 ? 0.035 ns/op +9.97% +StringBuilders.toStringCharWithNull8Latin1 avgt 15 5.550 ? 0.010 ns/op +21.51% +StringBuilders.toStringCharWithNull8Utf16 avgt 15 8.108 ? 0.041 ns/op +12.77% # 1. Compare with unsafe branch 1. current (`5e815`) https://github.com/wenshao/jdk/tree/optim_str_builder_append_202406 2. unsafe (`adc220`) https://github.com/wenshao/jdk/tree/optim_str_builder_append_202406_unsafe I think the performance of the Unsafe branch may be the best data for the C2 optimizer. @eme64 can help me see if C2 can do it? # 2. Benchmark Commands make test TEST="micro:java.lang.StringBuilders.toStringCharWithBool8" make test TEST="micro:java.lang.StringBuilders.toStringCharWithNull8" # 3. Implementation of Unsafe Branch class AbstractStringBuilder { static final Unsafe UNSAFE = Unsafe.getUnsafe(); static final int NULL_LATIN1; static final int TRUE_LATIN1; static final int FALS_LATIN1; static final long NULL_UTF16; static final long TRUE_UTF16; static final long FALS_UTF16; static { byte[] bytes4 = new byte[4]; byte[] bytes8 = new byte[8]; bytes4[0] = 'n'; bytes4[1] = 'u'; bytes4[2] = 'l'; bytes4[3] = 'l'; NULL_LATIN1 = UNSAFE.getInt(bytes4, Unsafe.ARRAY_BYTE_BASE_OFFSET); StringUTF16.inflate(bytes4, 0, bytes8, 0, 4); NULL_UTF16 = UNSAFE.getLong(bytes8, Unsafe.ARRAY_BYTE_BASE_OFFSET); bytes4[0] = 't'; bytes4[1] = 'r'; bytes4[2] = 'u'; bytes4[3] = 'e'; TRUE_LATIN1 = UNSAFE.getInt(bytes4, Unsafe.ARRAY_BYTE_BASE_OFFSET); StringUTF16.inflate(bytes4, 0, bytes8, 0, 4); TRUE_UTF16 = UNSAFE.getLong(bytes8, Unsafe.ARRAY_BYTE_BASE_OFFSET); bytes4[0] = 'f'; bytes4[1] = 'a'; bytes4[2] = 'l'; bytes4[3] = 's'; FALS_LATIN1 = UNSAFE.getInt(bytes4, Unsafe.ARRAY_BYTE_BASE_OFFSET); StringUTF16.inflate(bytes4, 0, bytes8, 0, 4); FALS_UTF16 = UNSAFE.getLong(bytes8, Unsafe.ARRAY_BYTE_BASE_OFFSET); } private AbstractStringBuilder appendNull() { ensureCapacityInternal(count + 4); int count = this.count; byte[] val = this.value; if (isLatin1()) { UNSAFE.putInt( val, Unsafe.ARRAY_BYTE_BASE_OFFSET + count, NULL_LATIN1); } else { UNSAFE.putLong( val, Unsafe.ARRAY_BYTE_BASE_OFFSET + (count << 1), NULL_UTF16); } this.count = count + 4; return this; } public AbstractStringBuilder append(boolean b) { int count = this.count; int spaceNeeded = count + (b ? 4 : 5); ensureCapacityInternal(spaceNeeded); byte[] val = this.value; if (isLatin1()) { UNSAFE.putInt( val, Unsafe.ARRAY_BYTE_BASE_OFFSET + count, b ? TRUE_LATIN1 : FALS_LATIN1); if (!b) { val[count + 4] = 'e'; } } else { UNSAFE.putLong( val, Unsafe.ARRAY_BYTE_BASE_OFFSET + (count << 1), b ? TRUE_UTF16 : FALS_UTF16); if (!b) { StringUTF16.putChar(val, count + 4, 'e'); } } this.count = spaceNeeded; return this; } } # 4. Benchmark Numbers The performance numbers under MacBookPro M1 Pro are as follows: -Benchmark Mode Cnt Score Error Units # unsafe (adc220) -StringBuilders.toStringCharWithBool8Latin1 avgt 15 6.415 ? 0.061 ns/op -StringBuilders.toStringCharWithBool8Utf16 avgt 15 7.307 ? 0.013 ns/op -StringBuilders.toStringCharWithNull8Latin1 avgt 15 5.443 ? 0.011 ns/op -StringBuilders.toStringCharWithNull8Utf16 avgt 15 6.944 ? 0.102 ns/op +Benchmark Mode Cnt Score Error Units #current (5e815) +StringBuilders.toStringCharWithBool8Latin1 avgt 15 6.515 ? 0.121 ns/op -1.55% +StringBuilders.toStringCharWithBool8Utf16 avgt 15 8.654 ? 0.035 ns/op -18.44% +StringBuilders.toStringCharWithNull8Latin1 avgt 15 5.550 ? 0.010 ns/op -1.96% +StringBuilders.toStringCharWithNull8Utf16 avgt 15 8.108 ? 0.041 ns/op -16.76% ------------- PR Comment: https://git.openjdk.org/jdk/pull/19626#issuecomment-2158201904 PR Comment: https://git.openjdk.org/jdk/pull/19626#issuecomment-2158296234