On Mon, 15 Mar 2021 21:47:28 GMT, Сергей Цыпанов <github.com+10835776+stsypa...@openjdk.org> wrote:
>> Hello, >> >> as of now `java.util.StringJoiner` still uses `char[]` as a storage for >> joined Strings. >> >> This applies for the cases when all joined Strings as well as delimiter, >> prefix and suffix contain only ASCII symbols. >> >> As a result when `StringJoiner.toString()` is called `byte[]` stored in >> Strings is inflated in order to fill in `char[]` and after that `char[]` is >> compressed when constructor of String is called: >> String delimiter = this.delimiter; >> char[] chars = new char[this.len + addLen]; >> int k = getChars(this.prefix, chars, 0); >> if (size > 0) { >> k += getChars(elts[0], chars, k); // inflate byte[] >> >> for(int i = 1; i < size; ++i) { >> k += getChars(delimiter, chars, k); >> k += getChars(elts[i], chars, k); >> } >> } >> >> k += getChars(this.suffix, chars, k); >> return new String(chars); // compress char[] -> byte[] >> This can be improved by utilizing new method `String.getBytes(byte[], int, >> int, byte, int)` [introduced](https://github.com/openjdk/jdk/pull/402) in >> [JDK-8224986](https://bugs.openjdk.java.net/browse/JDK-8254082) >> covering both cases when resulting String is Latin1 or UTF-16 >> >> I've prepared a patch along with benchmark proving that this change is >> correct and brings improvement. >> >> @BenchmarkMode(Mode.AverageTime) >> @OutputTimeUnit(TimeUnit.NANOSECONDS) >> @Fork(jvmArgsAppend = {"-Xms2g", "-Xmx2g"}) >> public class StringJoinerBenchmark { >> >> @Benchmark >> public String stringJoiner(Data data) { >> String[] stringArray = data.stringArray; >> return Joiner.joinWithStringJoiner(stringArray); >> } >> >> @State(Scope.Thread) >> public static class Data { >> >> @Param({"latin", "cyrillic", "mixed"}) >> private String mode; >> >> @Param({"8", "32", "64"}) >> private int length; >> >> @Param({"5", "10", "100"}) >> private int count; >> >> private String[] stringArray; >> >> @Setup >> public void setup() { >> RandomStringGenerator generator = new RandomStringGenerator(); >> >> stringArray = new String[count]; >> >> for (int i = 0; i < count; i++) { >> String alphabet = getAlphabet(i, mode); >> stringArray[i] = generator.randomString(alphabet, length); >> } >> } >> >> private static String getAlphabet(int index, String mode) { >> var latin = "abcdefghijklmnopqrstuvwxyz"; //English >> var cyrillic = "абвгдеёжзиклмнопрстуфхцчшщьыъэюя"; // Russian >> >> String alphabet; >> switch (mode) { >> case "mixed" -> alphabet = index % 2 == 0 ? cyrillic : latin; >> case "latin" -> alphabet = latin; >> case "cyrillic" -> alphabet = cyrillic; >> default -> throw new RuntimeException("Illegal mode " + mode); >> } >> return alphabet; >> } >> } >> } >> >> public class Joiner { >> >> public static String joinWithStringJoiner(String[] stringArray) { >> StringJoiner joiner = new StringJoiner(",", "[", "]"); >> for (String str : stringArray) { >> joiner.add(str); >> } >> return joiner.toString(); >> } >> } >> >> >> (count) (length) (mode) >> Java 14 patched Units >> stringJoiner 5 8 latin 78.836 >> ± 0.208 67.546 ± 0.500 ns/op >> stringJoiner 5 32 latin 92.877 >> ± 0.422 66.760 ± 0.498 ns/op >> stringJoiner 5 64 latin 115.423 >> ± 0.883 73.224 ± 0.289 ns/op >> stringJoiner 10 8 latin 152.587 >> ± 0.429 161.427 ± 0.635 ns/op >> stringJoiner 10 32 latin 189.998 >> ± 0.478 164.099 ± 0.963 ns/op >> stringJoiner 10 64 latin 238.679 >> ± 1.419 176.825 ± 0.533 ns/op >> stringJoiner 100 8 latin 1215.612 >> ± 17.413 1541.802 ± 126.166 ns/op >> stringJoiner 100 32 latin 1699.998 >> ± 28.407 1563.341 ± 4.439 ns/op >> stringJoiner 100 64 latin 2289.388 >> ± 45.319 2215.931 ± 137.583 ns/op >> stringJoiner 5 8 cyrillic 96.692 >> ± 0.947 80.946 ± 0.371 ns/op >> stringJoiner 5 32 cyrillic 107.806 >> ± 0.429 84.717 ± 0.541 ns/op >> stringJoiner 5 64 cyrillic 150.762 >> ± 2.267 96.214 ± 1.251 ns/op >> stringJoiner 10 8 cyrillic 190.570 >> ± 0.381 182.754 ± 0.678 ns/op >> stringJoiner 10 32 cyrillic 240.239 >> ± 1.110 187.991 ± 1.575 ns/op >> stringJoiner 10 64 cyrillic 382.493 >> ± 2.692 219.358 ± 0.967 ns/op >> stringJoiner 100 8 cyrillic 1737.435 >> ± 16.171 1658.379 ± 56.225 ns/op >> stringJoiner 100 32 cyrillic 2321.106 >> ± 13.583 1942.028 ± 3.418 ns/op >> stringJoiner 100 64 cyrillic 3189.563 >> ± 10.135 2484.796 ± 7.572 ns/op >> stringJoiner 5 8 mixed 88.743 >> ± 0.709 76.049 ± 0.685 ns/op >> stringJoiner 5 32 mixed 103.425 >> ± 0.745 81.006 ± 0.184 ns/op >> stringJoiner 5 64 mixed 146.441 >> ± 0.763 93.071 ± 0.858 ns/op >> stringJoiner 10 8 mixed 169.262 >> ± 0.482 158.890 ± 0.598 ns/op >> stringJoiner 10 32 mixed 218.566 >> ± 0.945 170.415 ± 1.172 ns/op >> stringJoiner 10 64 mixed 361.412 >> ± 1.546 200.374 ± 0.683 ns/op >> stringJoiner 100 8 mixed 1594.544 >> ± 5.761 1476.689 ± 2.004 ns/op >> stringJoiner 100 32 mixed 2221.333 >> ± 28.320 1879.071 ± 52.729 ns/op >> stringJoiner 100 64 mixed 3115.786 >> ± 22.485 2400.644 ± 4.401 ns/op >> >> stringJoiner:·gc.alloc.rate.norm 5 8 latin 248.023 >> ± 0.001 136.015 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 5 32 latin 608.045 >> ± 0.001 256.022 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 5 64 latin 1088.067 >> ± 0.002 416.030 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 10 8 latin 464.044 >> ± 0.001 264.029 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 10 32 latin 1184.085 >> ± 0.003 504.044 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 10 64 latin 2144.123 >> ± 0.005 824.065 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 100 8 latin 3872.367 >> ± 8.002 2024.269 ± 8.016 B/op >> stringJoiner:·gc.alloc.rate.norm 100 32 latin 11048.803 >> ± 7.988 4416.393 ± 0.004 B/op >> stringJoiner:·gc.alloc.rate.norm 100 64 latin 20657.194 >> ± 9.775 7640.642 ± 9.819 B/op >> stringJoiner:·gc.alloc.rate.norm 5 8 cyrillic 360.029 >> ± 0.001 184.020 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 5 32 cyrillic 960.060 >> ± 0.002 424.032 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 5 64 cyrillic 1760.088 >> ± 0.003 744.043 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 10 8 cyrillic 664.063 >> ± 0.001 352.038 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 10 32 cyrillic 1864.128 >> ± 0.003 832.067 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 10 64 cyrillic 3464.206 >> ± 0.007 1472.094 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 100 8 cyrillic 5704.541 >> ± 0.006 2928.317 ± 8.002 B/op >> stringJoiner:·gc.alloc.rate.norm 100 32 cyrillic 17705.090 >> ± 0.045 7720.658 ± 0.007 B/op >> stringJoiner:·gc.alloc.rate.norm 100 64 cyrillic 33689.855 >> ± 9.786 14121.042 ± 0.015 B/op >> stringJoiner:·gc.alloc.rate.norm 5 8 mixed 360.028 >> ± 0.001 184.016 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 5 32 mixed 960.059 >> ± 0.002 424.031 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 5 64 mixed 1760.088 >> ± 0.003 744.041 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 10 8 mixed 664.052 >> ± 0.001 352.035 ± 0.002 B/op >> stringJoiner:·gc.alloc.rate.norm 10 32 mixed 1864.116 >> ± 0.005 832.064 ± 0.001 B/op >> stringJoiner:·gc.alloc.rate.norm 10 64 mixed 3464.196 >> ± 0.006 1472.088 ± 0.002 B/op >> stringJoiner:·gc.alloc.rate.norm 100 8 mixed 5672.448 >> ± 8.002 2920.314 ± 0.006 B/op >> stringJoiner:·gc.alloc.rate.norm 100 32 mixed 17681.092 >> ± 9.824 7728.653 ± 8.008 B/op >> stringJoiner:·gc.alloc.rate.norm 100 64 mixed 33697.818 >> ± 8.011 14120.977 ± 0.034 B/op > > Сергей Цыпанов has updated the pull request incrementally with one additional > commit since the last revision: > > 8148937: (str) Update copyright year Marked as reviewed by chegar (Reviewer). ------------- PR: https://git.openjdk.java.net/jdk/pull/2627