Re: RFR: 8148937: (str) Adapt StringJoiner for Compact Strings [v3]
On Mon, 15 Mar 2021 21:47:28 GMT, Сергей Цыпанов wrote: >> Hello, >> >> as of now `java.util.StringJoiner` still uses `char[]` as a storage for >> joined Strings. >> >> This applies for the cases when all joined Strings as well as delimiter, >> prefix and suffix contain only ASCII symbols. >> >> As a result when `StringJoiner.toString()` is called `byte[]` stored in >> Strings is inflated in order to fill in `char[]` and after that `char[]` is >> compressed when constructor of String is called: >> String delimiter = this.delimiter; >> char[] chars = new char[this.len + addLen]; >> int k = getChars(this.prefix, chars, 0); >> if (size > 0) { >> k += getChars(elts[0], chars, k);// inflate byte[] >> >> for(int i = 1; i < size; ++i) { >> k += getChars(delimiter, chars, k); >> k += getChars(elts[i], chars, k); >> } >> } >> >> k += getChars(this.suffix, chars, k); >> return new String(chars);// compress char[] -> byte[] >> This can be improved by utilizing new method `String.getBytes(byte[], int, >> int, byte, int)` [introduced](https://github.com/openjdk/jdk/pull/402) in >> [JDK-8224986](https://bugs.openjdk.java.net/browse/JDK-8254082) >> covering both cases when resulting String is Latin1 or UTF-16 >> >> I've prepared a patch along with benchmark proving that this change is >> correct and brings improvement. >> >> @BenchmarkMode(Mode.AverageTime) >> @OutputTimeUnit(TimeUnit.NANOSECONDS) >> @Fork(jvmArgsAppend = {"-Xms2g", "-Xmx2g"}) >> public class StringJoinerBenchmark { >> >> @Benchmark >> public String stringJoiner(Data data) { >> String[] stringArray = data.stringArray; >> return Joiner.joinWithStringJoiner(stringArray); >> } >> >> @State(Scope.Thread) >> public static class Data { >> >> @Param({"latin", "cyrillic", "mixed"}) >> private String mode; >> >> @Param({"8", "32", "64"}) >> private int length; >> >> @Param({"5", "10", "100"}) >> private int count; >> >> private String[] stringArray; >> >> @Setup >> public void setup() { >> RandomStringGenerator generator = new RandomStringGenerator(); >> >> stringArray = new String[count]; >> >> for (int i = 0; i < count; i++) { >> String alphabet = getAlphabet(i, mode); >> stringArray[i] = generator.randomString(alphabet, length); >> } >> } >> >> private static String getAlphabet(int index, String mode) { >> var latin = "abcdefghijklmnopqrstuvwxyz"; //English >> var cyrillic = "абвгдеёжзиклмнопрстуфхцчшщьыъэюя"; // Russian >> >> String alphabet; >> switch (mode) { >> case "mixed" -> alphabet = index % 2 == 0 ? cyrillic : latin; >> case "latin" -> alphabet = latin; >> case "cyrillic" -> alphabet = cyrillic; >> default -> throw new RuntimeException("Illegal mode " + mode); >> } >> return alphabet; >> } >> } >> } >> >> public class Joiner { >> >> public static String joinWithStringJoiner(String[] stringArray) { >> StringJoiner joiner = new StringJoiner(",", "[", "]"); >> for (String str : stringArray) { >> joiner.add(str); >> } >> return joiner.toString(); >> } >> } >> >> >> (count) (length)(mode) >> Java 14 patched Units >> stringJoiner 5 8 latin 78.836 >> ± 0.20867.546 ± 0.500 ns/op >> stringJoiner 532 latin 92.877 >> ± 0.42266.760 ± 0.498 ns/op >> stringJoiner 564 latin 115.423 >> ± 0.88373.224 ± 0.289 ns/op >> stringJoiner 10 8 latin 152.587 >> ± 0.429 161.427 ± 0.635 ns/op >> stringJoiner 1032 latin 189.998 >> ± 0.478 164.099 ± 0.963 ns/op >> stringJoiner 1064 latin 238.679 >> ± 1.419 176.825 ± 0.533 ns/op >> stringJoiner100 8 latin 1215.612 >> ± 17.413 1541.802 ± 126.166 ns/op >> stringJoiner10032 latin 1699.998 >> ± 28.407 1563.341 ± 4.439 ns/op >> stringJoiner10064 latin 2289.388 >> ± 45.319 2215.931 ± 137.583 ns/op >> stringJoiner 5 8 cyrillic 96.692 >> ± 0.94780.946 ± 0.371 ns/op >> stringJoiner 532 cyrillic 107.806 >> ± 0.42984.717 ± 0.541 ns/op >> stringJoiner 564 cyrillic 150.762 >> ± 2.26796.214 ± 1.251 ns/op >> stringJoiner 10 8 cyrillic 190.57
Re: RFR: 8148937: (str) Adapt StringJoiner for Compact Strings [v3]
On Mon, 15 Mar 2021 21:47:28 GMT, Сергей Цыпанов wrote: >> Hello, >> >> as of now `java.util.StringJoiner` still uses `char[]` as a storage for >> joined Strings. >> >> This applies for the cases when all joined Strings as well as delimiter, >> prefix and suffix contain only ASCII symbols. >> >> As a result when `StringJoiner.toString()` is called `byte[]` stored in >> Strings is inflated in order to fill in `char[]` and after that `char[]` is >> compressed when constructor of String is called: >> String delimiter = this.delimiter; >> char[] chars = new char[this.len + addLen]; >> int k = getChars(this.prefix, chars, 0); >> if (size > 0) { >> k += getChars(elts[0], chars, k);// inflate byte[] >> >> for(int i = 1; i < size; ++i) { >> k += getChars(delimiter, chars, k); >> k += getChars(elts[i], chars, k); >> } >> } >> >> k += getChars(this.suffix, chars, k); >> return new String(chars);// compress char[] -> byte[] >> This can be improved by utilizing new method `String.getBytes(byte[], int, >> int, byte, int)` [introduced](https://github.com/openjdk/jdk/pull/402) in >> [JDK-8224986](https://bugs.openjdk.java.net/browse/JDK-8254082) >> covering both cases when resulting String is Latin1 or UTF-16 >> >> I've prepared a patch along with benchmark proving that this change is >> correct and brings improvement. >> >> @BenchmarkMode(Mode.AverageTime) >> @OutputTimeUnit(TimeUnit.NANOSECONDS) >> @Fork(jvmArgsAppend = {"-Xms2g", "-Xmx2g"}) >> public class StringJoinerBenchmark { >> >> @Benchmark >> public String stringJoiner(Data data) { >> String[] stringArray = data.stringArray; >> return Joiner.joinWithStringJoiner(stringArray); >> } >> >> @State(Scope.Thread) >> public static class Data { >> >> @Param({"latin", "cyrillic", "mixed"}) >> private String mode; >> >> @Param({"8", "32", "64"}) >> private int length; >> >> @Param({"5", "10", "100"}) >> private int count; >> >> private String[] stringArray; >> >> @Setup >> public void setup() { >> RandomStringGenerator generator = new RandomStringGenerator(); >> >> stringArray = new String[count]; >> >> for (int i = 0; i < count; i++) { >> String alphabet = getAlphabet(i, mode); >> stringArray[i] = generator.randomString(alphabet, length); >> } >> } >> >> private static String getAlphabet(int index, String mode) { >> var latin = "abcdefghijklmnopqrstuvwxyz"; //English >> var cyrillic = "абвгдеёжзиклмнопрстуфхцчшщьыъэюя"; // Russian >> >> String alphabet; >> switch (mode) { >> case "mixed" -> alphabet = index % 2 == 0 ? cyrillic : latin; >> case "latin" -> alphabet = latin; >> case "cyrillic" -> alphabet = cyrillic; >> default -> throw new RuntimeException("Illegal mode " + mode); >> } >> return alphabet; >> } >> } >> } >> >> public class Joiner { >> >> public static String joinWithStringJoiner(String[] stringArray) { >> StringJoiner joiner = new StringJoiner(",", "[", "]"); >> for (String str : stringArray) { >> joiner.add(str); >> } >> return joiner.toString(); >> } >> } >> >> >> (count) (length)(mode) >> Java 14 patched Units >> stringJoiner 5 8 latin 78.836 >> ± 0.20867.546 ± 0.500 ns/op >> stringJoiner 532 latin 92.877 >> ± 0.42266.760 ± 0.498 ns/op >> stringJoiner 564 latin 115.423 >> ± 0.88373.224 ± 0.289 ns/op >> stringJoiner 10 8 latin 152.587 >> ± 0.429 161.427 ± 0.635 ns/op >> stringJoiner 1032 latin 189.998 >> ± 0.478 164.099 ± 0.963 ns/op >> stringJoiner 1064 latin 238.679 >> ± 1.419 176.825 ± 0.533 ns/op >> stringJoiner100 8 latin 1215.612 >> ± 17.413 1541.802 ± 126.166 ns/op >> stringJoiner10032 latin 1699.998 >> ± 28.407 1563.341 ± 4.439 ns/op >> stringJoiner10064 latin 2289.388 >> ± 45.319 2215.931 ± 137.583 ns/op >> stringJoiner 5 8 cyrillic 96.692 >> ± 0.94780.946 ± 0.371 ns/op >> stringJoiner 532 cyrillic 107.806 >> ± 0.42984.717 ± 0.541 ns/op >> stringJoiner 564 cyrillic 150.762 >> ± 2.26796.214 ± 1.251 ns/op >> stringJoiner 10 8 cyrillic 190.57
Re: RFR: 8148937: (str) Adapt StringJoiner for Compact Strings [v3]
> Hello, > > as of now `java.util.StringJoiner` still uses `char[]` as a storage for > joined Strings. > > This applies for the cases when all joined Strings as well as delimiter, > prefix and suffix contain only ASCII symbols. > > As a result when `StringJoiner.toString()` is called `byte[]` stored in > Strings is inflated in order to fill in `char[]` and after that `char[]` is > compressed when constructor of String is called: > String delimiter = this.delimiter; > char[] chars = new char[this.len + addLen]; > int k = getChars(this.prefix, chars, 0); > if (size > 0) { > k += getChars(elts[0], chars, k);// inflate byte[] > > for(int i = 1; i < size; ++i) { > k += getChars(delimiter, chars, k); > k += getChars(elts[i], chars, k); > } > } > > k += getChars(this.suffix, chars, k); > return new String(chars);// compress char[] -> byte[] > This can be improved by utilizing new method `String.getBytes(byte[], int, > int, byte, int)` [introduced](https://github.com/openjdk/jdk/pull/402) in > [JDK-8224986](https://bugs.openjdk.java.net/browse/JDK-8254082) > covering both cases when resulting String is Latin1 or UTF-16 > > I've prepared a patch along with benchmark proving that this change is > correct and brings improvement. > > @BenchmarkMode(Mode.AverageTime) > @OutputTimeUnit(TimeUnit.NANOSECONDS) > @Fork(jvmArgsAppend = {"-Xms2g", "-Xmx2g"}) > public class StringJoinerBenchmark { > > @Benchmark > public String stringJoiner(Data data) { > String[] stringArray = data.stringArray; > return Joiner.joinWithStringJoiner(stringArray); > } > > @State(Scope.Thread) > public static class Data { > > @Param({"latin", "cyrillic", "mixed"}) > private String mode; > > @Param({"8", "32", "64"}) > private int length; > > @Param({"5", "10", "100"}) > private int count; > > private String[] stringArray; > > @Setup > public void setup() { > RandomStringGenerator generator = new RandomStringGenerator(); > > stringArray = new String[count]; > > for (int i = 0; i < count; i++) { > String alphabet = getAlphabet(i, mode); > stringArray[i] = generator.randomString(alphabet, length); > } > } > > private static String getAlphabet(int index, String mode) { > var latin = "abcdefghijklmnopqrstuvwxyz"; //English > var cyrillic = "абвгдеёжзиклмнопрстуфхцчшщьыъэюя"; // Russian > > String alphabet; > switch (mode) { > case "mixed" -> alphabet = index % 2 == 0 ? cyrillic : latin; > case "latin" -> alphabet = latin; > case "cyrillic" -> alphabet = cyrillic; > default -> throw new RuntimeException("Illegal mode " + mode); > } > return alphabet; > } > } > } > > public class Joiner { > > public static String joinWithStringJoiner(String[] stringArray) { > StringJoiner joiner = new StringJoiner(",", "[", "]"); > for (String str : stringArray) { > joiner.add(str); > } > return joiner.toString(); > } > } > > > (count) (length)(mode) > Java 14 patched Units > stringJoiner 5 8 latin 78.836 > ± 0.20867.546 ± 0.500 ns/op > stringJoiner 532 latin 92.877 > ± 0.42266.760 ± 0.498 ns/op > stringJoiner 564 latin 115.423 > ± 0.88373.224 ± 0.289 ns/op > stringJoiner 10 8 latin 152.587 > ± 0.429 161.427 ± 0.635 ns/op > stringJoiner 1032 latin 189.998 > ± 0.478 164.099 ± 0.963 ns/op > stringJoiner 1064 latin 238.679 > ± 1.419 176.825 ± 0.533 ns/op > stringJoiner100 8 latin 1215.612 > ± 17.413 1541.802 ± 126.166 ns/op > stringJoiner10032 latin 1699.998 > ± 28.407 1563.341 ± 4.439 ns/op > stringJoiner10064 latin 2289.388 > ± 45.319 2215.931 ± 137.583 ns/op > stringJoiner 5 8 cyrillic 96.692 > ± 0.94780.946 ± 0.371 ns/op > stringJoiner 532 cyrillic 107.806 > ± 0.42984.717 ± 0.541 ns/op > stringJoiner 564 cyrillic 150.762 > ± 2.26796.214 ± 1.251 ns/op > stringJoiner 10 8 cyrillic 190.570 > ± 0.381 182.754 ± 0.678 ns/op > stringJoiner 1032 cyrillic 240.239 > ± 1.110 187.991 ± 1.575 ns/op > stringJ