aherbert commented on a change in pull request #565: URL: https://github.com/apache/commons-lang/pull/565#discussion_r462184805
########## File path: src/test/java/org/apache/commons/lang3/StringUtilsChompTest.java ########## @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.lang3; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.concurrent.TimeUnit; + +import static org.apache.commons.lang3.StringUtils.EMPTY; +import static org.apache.commons.lang3.StringUtils.isEmpty; +import static org.apache.commons.lang3.StringUtils.length; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(value = 1, jvmArgs = {"-server", "-Xms512M", "-Xmx512M"}) +public class StringUtilsChompTest { + // Different code paths with no chomp + private static final String string1 = ""; + private static final String string2 = "a"; + private static final String string3 = "aa"; + + // Single char chomp + private static final String string4 = "\r"; + private static final String string5 = "\n"; + + // Multi-char chomp + private static final String string6 = "\r\n"; + private static final String string7 = "a\n"; + private static final String string8 = "a\r"; + private static final String string9 = "a\r\n"; + + private static final String[] strings = buildStrings(); + + private static String[] buildStrings() { + String[] res = new String[128 * 128]; + for (int i = 0; i < 128; i++) { + for (int j = 0; j < 128; j++) { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append((char) i); + stringBuilder.append((char) j); + res[i * 128 + j] = stringBuilder.toString(); + } + } + return res; + } + + @Benchmark + public String test1_Empty_Old() { + return chompOld(string1); + } + + @Benchmark + public String test1_Empty_New() { + return chompNew(string1); + } + + @Benchmark + public String test2_No_Chomp_Old() { + return chompOld(string2); + } + + @Benchmark + public String test2_No_Chomp_New() { + return chompNew(string2); + } + + @Benchmark + public String test3_No_Chomp_Old() { + return chompOld(string3); + } + + @Benchmark + public String test3_No_Chomp_New() { + return chompNew(string3); + } + + @Benchmark + public String test4_R_Old() { + return chompOld(string4); + } + + @Benchmark + public String test4_R_New() { + return chompNew(string4); + } + + @Benchmark + public String test5_N_Old() { + return chompOld(string5); + } + + @Benchmark + public String test5_N_New() { + return chompNew(string5); + } + + @Benchmark + public String test6_R_N_Old() { + return chompOld(string6); + } + + @Benchmark + public String test6_R_N_New() { + return chompNew(string6); + } + + @Benchmark + public String test7_a_N_Old() { + return chompOld(string7); + } + + @Benchmark + public String test7_a_N_New() { + return chompNew(string7); + } + + @Benchmark + public String test8_a_N_Old() { Review comment: In your naming convention this should be renamed as `test8_a_R_Old`. Same for the new test. However the `testX` prefix has no meaning. So I would drop it and go for something more useful: ``` noChar_New/Old singleChar_New/Old multiChar_New/Old noChar_CR_New/Old noChar_LF_New/Old noChar_CR_LF_New/Old singleChar_CR_New/Old singleChar_LF_New/Old singleChar_CR_LF_New/Old randomStrings_New/Old ``` LF = Line Feed = `\n` CR = Carriage Return = `\r` Also the static strings should not be final. This is to prevent the JVM from inlining the entire routine. If the string is final then it knows the input will never change. Remove the final keyword and it cannot know the variable, so cannot optimise as much. Given the large number of strings and methods this is cleaner if you use a JMH approach using a few `@State` classes. We can define the strings to chomp with an enum. JMH will create the state for the benchmark from all enum values if you use the enum with appropriate annotations. With this approach if you want to chomp another string then you just add a new value to the enum. Here is a more flexible version: ```java /** Define the strings to chomp. */ public enum ChompString { CHAR0(""), CHAR0_CR("\r"), CHAR0_LF("\n"), CHAR0_CR_LF("\r\n"), CHAR1("a"), CHAR1_CR("a\r"), CHAR1_LF("a\n"), CHAR1_CR_LF("a\r\n"), CHAR2("ab"), CHAR2_CR("ab\r"), CHAR2_LF("ab\n"), CHAR2_CR_LF("ab\r\n"), ; /** The string data. */ final String string; /** * Create an instance. * * @param string the string data */ ChompString(String string) { this.string = string; } } /** The benchmark data to chomp. */ @State(Scope.Benchmark) public static class ChompData { /** The data to chomp. */ @Param private ChompString data; /** * Gets the data. * * @return the data */ public String getData() { return data.string; } } /** The chomp method to benchmark. */ @State(Scope.Benchmark) public static class ChompMethod { /** The method name. */ @Param({"old", "new"}) private String name; /** The method. */ private UnaryOperator<String> method; /** * Gets the method. * * @return the method */ public UnaryOperator<String> getMethod() { return method; } /** Setup the chomp method. */ @Setup public void setup() { if ("old".equals(name)) { method = StringUtilsChompTest::chompOld; } else { method = StringUtilsChompTest::chompNew; } } } /** * Benchmark a single chomp of a string. * * @param data the data * @param method the method * @return the chomped string */ @Benchmark public String singleString(ChompData data, ChompMethod method) { return method.getMethod().apply(data.getData()); } ``` Run with: `mvn test -P benchmark -Dbenchmark=StringUtilsChompTest.singleString` ``` Benchmark (data) (name) Mode Cnt Score Error Units StringUtilsChompTest.singleString CHAR0 old avgt 5 3.136 ± 0.090 ns/op StringUtilsChompTest.singleString CHAR0 new avgt 5 3.184 ± 0.067 ns/op StringUtilsChompTest.singleString CHAR0_CR old avgt 5 3.933 ± 0.308 ns/op StringUtilsChompTest.singleString CHAR0_CR new avgt 5 4.090 ± 1.764 ns/op StringUtilsChompTest.singleString CHAR0_LF old avgt 5 4.079 ± 1.656 ns/op StringUtilsChompTest.singleString CHAR0_LF new avgt 5 3.852 ± 0.172 ns/op StringUtilsChompTest.singleString CHAR0_CR_LF old avgt 5 8.591 ± 4.080 ns/op StringUtilsChompTest.singleString CHAR0_CR_LF new avgt 5 8.593 ± 0.188 ns/op StringUtilsChompTest.singleString CHAR1 old avgt 5 3.753 ± 0.146 ns/op StringUtilsChompTest.singleString CHAR1 new avgt 5 3.893 ± 0.074 ns/op StringUtilsChompTest.singleString CHAR1_CR old avgt 5 16.363 ± 4.415 ns/op StringUtilsChompTest.singleString CHAR1_CR new avgt 5 14.285 ± 5.998 ns/op StringUtilsChompTest.singleString CHAR1_LF old avgt 5 14.586 ± 0.152 ns/op StringUtilsChompTest.singleString CHAR1_LF new avgt 5 14.427 ± 0.211 ns/op StringUtilsChompTest.singleString CHAR1_CR_LF old avgt 5 14.573 ± 0.781 ns/op StringUtilsChompTest.singleString CHAR1_CR_LF new avgt 5 15.104 ± 3.933 ns/op StringUtilsChompTest.singleString CHAR2 old avgt 5 4.000 ± 0.018 ns/op StringUtilsChompTest.singleString CHAR2 new avgt 5 4.226 ± 0.124 ns/op StringUtilsChompTest.singleString CHAR2_CR old avgt 5 15.954 ± 8.352 ns/op StringUtilsChompTest.singleString CHAR2_CR new avgt 5 14.033 ± 0.145 ns/op StringUtilsChompTest.singleString CHAR2_LF old avgt 5 14.376 ± 0.097 ns/op StringUtilsChompTest.singleString CHAR2_LF new avgt 5 18.650 ± 7.231 ns/op StringUtilsChompTest.singleString CHAR2_CR_LF old avgt 5 14.535 ± 0.231 ns/op StringUtilsChompTest.singleString CHAR2_CR_LF new avgt 5 14.436 ± 0.332 ns/op ``` Here there does not appear to be much difference between new and old. Note the outlier for `CHAR2_LF, new` with the high error. This should ideally be repeated. I was running other things on my workstation when this was running. Ideally I should have closed everything down, logged out and run from a terminal login. But I would not expect much change. The new method is not worse. However from the test on random strings we know it is faster: `mvn test -P benchmark -Dbenchmark=StringUtilsChompTest.randomStrings` ``` Benchmark (name) Mode Cnt Score Error Units StringUtilsChompTest.randomStrings old avgt 5 97037.105 ± 27475.538 ns/op StringUtilsChompTest.randomStrings new avgt 5 85709.846 ± 37420.959 ns/op ``` So the code in the PR is worthwhile. ########## File path: src/test/java/org/apache/commons/lang3/StringUtilsChompTest.java ########## @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.lang3; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.concurrent.TimeUnit; + +import static org.apache.commons.lang3.StringUtils.EMPTY; +import static org.apache.commons.lang3.StringUtils.isEmpty; +import static org.apache.commons.lang3.StringUtils.length; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(value = 1, jvmArgs = {"-server", "-Xms512M", "-Xmx512M"}) +public class StringUtilsChompTest { + // Different code paths with no chomp + private static final String string1 = ""; + private static final String string2 = "a"; + private static final String string3 = "aa"; + + // Single char chomp + private static final String string4 = "\r"; + private static final String string5 = "\n"; + + // Multi-char chomp + private static final String string6 = "\r\n"; + private static final String string7 = "a\n"; + private static final String string8 = "a\r"; + private static final String string9 = "a\r\n"; + + private static final String[] strings = buildStrings(); + + private static String[] buildStrings() { + String[] res = new String[128 * 128]; + for (int i = 0; i < 128; i++) { + for (int j = 0; j < 128; j++) { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append((char) i); + stringBuilder.append((char) j); + res[i * 128 + j] = stringBuilder.toString(); + } + } + return res; + } + + @Benchmark + public String test1_Empty_Old() { + return chompOld(string1); + } + + @Benchmark + public String test1_Empty_New() { + return chompNew(string1); + } + + @Benchmark + public String test2_No_Chomp_Old() { + return chompOld(string2); + } + + @Benchmark + public String test2_No_Chomp_New() { + return chompNew(string2); + } + + @Benchmark + public String test3_No_Chomp_Old() { + return chompOld(string3); + } + + @Benchmark + public String test3_No_Chomp_New() { + return chompNew(string3); + } + + @Benchmark + public String test4_R_Old() { + return chompOld(string4); + } + + @Benchmark + public String test4_R_New() { + return chompNew(string4); + } + + @Benchmark + public String test5_N_Old() { + return chompOld(string5); + } + + @Benchmark + public String test5_N_New() { + return chompNew(string5); + } + + @Benchmark + public String test6_R_N_Old() { + return chompOld(string6); + } + + @Benchmark + public String test6_R_N_New() { + return chompNew(string6); + } + + @Benchmark + public String test7_a_N_Old() { + return chompOld(string7); + } + + @Benchmark + public String test7_a_N_New() { + return chompNew(string7); + } + + @Benchmark + public String test8_a_N_Old() { Review comment: PS. I would also drop the chompNew method from StringUtilsChompTest. To prove that the new method is faster just call StringUtils.chomp or the copied version from 3.11: ```java @Setup public void setup() { if ("3.11".equals(name)) { method = StringUtilsChompTest::chomp_3_11; } else { method = StringUtils::chomp; } } ``` Then document the method in the test: ```java /** * The chomp method from StringUtils version 3.11. * * @param str the string * @return the chomped string */ private static String chomp_3_11(final String str) { ``` If anyone cares in the future to look at more optimisation then it would be easy to extend the benchmark. ########## File path: src/test/java/org/apache/commons/lang3/StringUtilsChompTest.java ########## @@ -0,0 +1,232 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.lang3; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.concurrent.TimeUnit; + +import static org.apache.commons.lang3.StringUtils.EMPTY; +import static org.apache.commons.lang3.StringUtils.isEmpty; +import static org.apache.commons.lang3.StringUtils.length; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) +@Fork(value = 1, jvmArgs = {"-server", "-Xms512M", "-Xmx512M"}) +public class StringUtilsChompTest { + // Different code paths with no chomp + private static final String string1 = ""; + private static final String string2 = "a"; + private static final String string3 = "aa"; + + // Single char chomp + private static final String string4 = "\r"; + private static final String string5 = "\n"; + + // Multi-char chomp + private static final String string6 = "\r\n"; + private static final String string7 = "a\n"; + private static final String string8 = "a\r"; + private static final String string9 = "a\r\n"; + + private static final String[] strings = buildStrings(); + + private static String[] buildStrings() { + String[] res = new String[128 * 128]; + for (int i = 0; i < 128; i++) { + for (int j = 0; j < 128; j++) { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append((char) i); + stringBuilder.append((char) j); + res[i * 128 + j] = stringBuilder.toString(); + } + } + return res; + } + + @Benchmark + public String test1_Empty_Old() { + return chompOld(string1); + } + + @Benchmark + public String test1_Empty_New() { + return chompNew(string1); + } + + @Benchmark + public String test2_No_Chomp_Old() { + return chompOld(string2); + } + + @Benchmark + public String test2_No_Chomp_New() { + return chompNew(string2); + } + + @Benchmark + public String test3_No_Chomp_Old() { + return chompOld(string3); + } + + @Benchmark + public String test3_No_Chomp_New() { + return chompNew(string3); + } + + @Benchmark + public String test4_R_Old() { + return chompOld(string4); + } + + @Benchmark + public String test4_R_New() { + return chompNew(string4); + } + + @Benchmark + public String test5_N_Old() { + return chompOld(string5); + } + + @Benchmark + public String test5_N_New() { + return chompNew(string5); + } + + @Benchmark + public String test6_R_N_Old() { + return chompOld(string6); + } + + @Benchmark + public String test6_R_N_New() { + return chompNew(string6); + } + + @Benchmark + public String test7_a_N_Old() { + return chompOld(string7); + } + + @Benchmark + public String test7_a_N_New() { + return chompNew(string7); + } + + @Benchmark + public String test8_a_N_Old() { Review comment: PPS. Another case we have not tried is a `null` string. Using the JMH approach you just add an enum value `NULL(null)` to `ChompString`. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org