[ https://issues.apache.org/jira/browse/LOG4J2-1274?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15179726#comment-15179726 ]
Remko Popma commented on LOG4J2-1274: ------------------------------------- Found and fixed a performance bug in TextEncoderHelper: copying chars one by one from the StringBuilder into the CharBuffer is extremely slow. This is much faster: {code} /** * Copies characters from the StringBuilder into the CharBuffer, * starting at the specified offset and ending when either all * characters have been copied or when the CharBuffer is full. * * @return the number of characters that were copied */ static int copy(final StringBuilder source, final int offset, final CharBuffer destination) { final int length = Math.min(source.length() - offset, destination.remaining()); final char[] array = destination.array(); final int start = destination.position(); source.getChars(offset, offset + length, array, start); destination.position(start + length); return length; } {code} Encoding a StringBuilder into a ByteBufferDestination went from 250 ns to 90 ns after this change. After this change the new {{Layout.encode}} method not only avoids allocation but is also a little faster than the {{Layout.toByteArray}} method (329 vs 378 ns). {code} /** * Tests Log4j2 TextEncoderHelper performance. */ // ============================== HOW TO RUN THIS TEST: ==================================== // // single thread: // java -Dfile.encoding=ISO-8859-1 -Dlog4j2.is.webapp=false -Dlog4j2.enable.threadlocals=true -jar log4j-perf/target/benchmarks.jar ".*TextEncoderHelper.*" -f 1 -wi 5 -i 10 // java -Dfile.encoding=UTF8 -Dlog4j2.is.webapp=false -Dlog4j2.enable.threadlocals=true -jar log4j-perf/target/benchmarks.jar ".*TextEncoderHelper.*" -f 1 -wi 5 -i 10 // // Usage help: // java -jar log4j-perf/target/benchmarks.jar -help // @State(Scope.Thread) public class TextEncoderHelperBenchmark { final static String STR = "AB!(%087936DZYXQWEIOP$#^~-=/><nb"; // length=32 final static String STR_TEXT = "20:01:59.9876 INFO [org.apache.logging.log4j.perf.jmh.TextEncoderHelperBenchmark] AB!(%087936DZYXQWEIOP$#^~-=/><nb"; // length=32 final static StringBuilder BUFF_TEXT = new StringBuilder(STR_TEXT); final static CharBuffer CHAR_BUFFER = CharBuffer.wrap(STR.toCharArray()); final static LogEvent EVENT = createLogEvent(); private static final Charset CHARSET_DEFAULT = Charset.defaultCharset(); private final PatternLayout PATTERN_M_C_D = PatternLayout.createLayout("%d %c %m%n", null, null, null, CHARSET_DEFAULT, false, true, null, null); private final Destination destination = new Destination(); class Destination implements ByteBufferDestination { long count = 0; ByteBuffer buffer = ByteBuffer.wrap(new byte[256*1024]); @Override public ByteBuffer getByteBuffer() { return buffer; } @Override public ByteBuffer drain(ByteBuffer buf) { buf.flip(); count += buf.limit(); buf.clear(); return buf; } } private static LogEvent createLogEvent() { final Marker marker = null; final String fqcn = "com.mycom.myproject.mypackage.MyClass"; final Level level = Level.DEBUG; final Message message = new SimpleMessage(STR); final Throwable t = null; final Map<String, String> mdc = null; final ContextStack ndc = null; final String threadName = null; final StackTraceElement location = null; final long timestamp = 12345678; return Log4jLogEvent.newBuilder() // .setLoggerName("name(ignored)") // .setMarker(marker) // .setLoggerFqcn(fqcn) // .setLevel(level) // .setMessage(message) // .setThrown(t) // .setContextMap(mdc) // .setContextStack(ndc) // .setThreadName(threadName) // .setSource(location) // .setTimeMillis(timestamp) // .build(); } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) public byte[] stringGetBytes() { return STR_TEXT.getBytes(); } private static final ThreadLocal<TextEncoderHelper> textEncoderHelper = new ThreadLocal<>(); private TextEncoderHelper getCachedTextEncoderHelper() { TextEncoderHelper result = textEncoderHelper.get(); if (result == null) { result = new TextEncoderHelper(CHARSET_DEFAULT); textEncoderHelper.set(result); } return result; } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) public long textEncoderEncode() { final TextEncoderHelper helper = getCachedTextEncoderHelper(); helper.encodeText(BUFF_TEXT, destination); return destination.count; } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) public long charBufferEncode() { final TextEncoderHelper helper = getCachedTextEncoderHelper(); CHAR_BUFFER.limit(CHAR_BUFFER.capacity()); CHAR_BUFFER.position(0); helper.encodeText(CHAR_BUFFER, destination); return destination.count; } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) public long charBufferCopyAndEncode() { final TextEncoderHelper helper = getCachedTextEncoderHelper(); CHAR_BUFFER.clear(); CHAR_BUFFER.put(STR); CHAR_BUFFER.flip(); helper.encodeText(CHAR_BUFFER, destination); return destination.count; } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) public long textHelperCopyAndEncode() { final TextEncoderHelper helper = getCachedTextEncoderHelper(); CHAR_BUFFER.clear(); copy(BUFF_TEXT, 0, CHAR_BUFFER); CHAR_BUFFER.flip(); helper.encodeText(CHAR_BUFFER, destination); return destination.count; } /** * Copies characters from the CharSequence into the CharBuffer, * starting at the specified offset and ending when either all * characters have been copied or when the CharBuffer is full. * * @return the number of characters that were copied */ private static int copy(final StringBuilder source, final int offset, final CharBuffer destination) { final int length = Math.min(source.length() - offset, destination.remaining()); final char[] array = destination.array(); final int start = destination.position(); source.getChars(offset, offset+length, array, start); destination.position(start + length); return length; } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) public long byteArrayMCD() { byte[] data = PATTERN_M_C_D.toByteArray(EVENT); ByteBuffer buff = destination.getByteBuffer(); if (buff.remaining() < data.length) { buff = destination.drain(buff); } buff.put(data); return destination.count; } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) public long serializableMCD() { String str = PATTERN_M_C_D.toSerializable(EVENT); byte[] data = str.getBytes(CHARSET_DEFAULT); ByteBuffer buff = destination.getByteBuffer(); if (buff.remaining() < data.length) { buff = destination.drain(buff); } buff.put(data); return destination.count; } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) public StringBuilder toTextMCD() { StringBuilder str = PATTERN_M_C_D.toText(EVENT); return str; } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) public String toStringMCD() { String str = PATTERN_M_C_D.toSerializable(EVENT); return str; } @Benchmark @BenchmarkMode(Mode.SampleTime) @OutputTimeUnit(TimeUnit.NANOSECONDS) public long encodeMCD() { PATTERN_M_C_D.encode(EVENT, destination); return destination.count; } } {code} > Layout improvements to enable avoiding temporary object allocation > ------------------------------------------------------------------ > > Key: LOG4J2-1274 > URL: https://issues.apache.org/jira/browse/LOG4J2-1274 > Project: Log4j 2 > Issue Type: Improvement > Components: Layouts > Affects Versions: 2.5 > Reporter: Remko Popma > Assignee: Remko Popma > Fix For: 2.6 > > > *Problem* > The current Layout API does not make it easy for implementors to avoid > creating temporary objects. Especially these methods: > {code} > byte[] toByteArray(LogEvent); > T toSerializable(LogEvent); > {code} > The byte array returned from {{toByteArray(LogEvent)}} cannot be re-used > between log events, since the caller cannot know how many bytes a partially > filled array contains. > In practice, all Layout implementations in Log4j 2 except SerializedLayout > implement the {{StringLayout}} subinterface. This means that the > {{toSerializable()}} method needs to return a new String object for every log > event. > *Forces* > I am interested in reducing or even eliminating the allocation of temporary > objects for text-based layouts. Many of these use (and re-use) a > StringBuilder to build a text representation of the current log event. Once > this text representation is built, it needs to be converted to bytes that the > Appender can consume. I am aware of two ways in the JDK to convert text to > bytes: > * the various {{String#getBytes}} methods - these all allocate a new byte > array for each invocation > * the underlying {{java.nio.charset.CharsetEncoder}} used internally by > String - especially method {{CoderResult encode(CharBuffer in, ByteBuffer > out, boolean endOfInput)}} which converts characters to bytes without object > allocation. > The last method is interesting because this gives us an opportunity to also > reduce the amount of copying by directly supplying the ByteBuffer buffer used > by RandomAccessFileAppender, or the MappedByteBuffer of the > MemoryMappedFileAppender. > The resulting API needs to support the fact that implementations may need to > call {{CharsetEncoder#encode}} multiple times: > * The ByteBuffer may not have enough remaining space to hold all the data; > {{CharsetEncoder#encode}} returns {{CoderResult.OVERFLOW}} to signal this so > the caller can consume the contents and reset/clear the buffer before > continuing. > * The CharBuffer may not be large enough to hold the full text representation > of the log event. Again, {{CharsetEncoder#encode}} may need to be invoked > multiple times. > *Proposal* > (Thinking out loud here, I'm open to suggestions.) > It may be sufficient for the layout interface to have a single additional new > method: > {code} > /** > * Formats the event suitable for display and writes the result to the > specified destination. > * > * @param event The Logging Event. > * @param destination Holds the ByteBuffer to write into. > */ > void writeTo(LogEvent e, ByteBufferDestination destination); > {code} > Appenders that want to be allocation-free need to implement the > {{ByteBufferDestination}} interface: > {code} > public interface ByteBufferDestination { > ByteBuffer getByteBuffer(); > /** > * Consumes the buffer content and returns a buffer with more available() > space > * (which may or may not be the same instance). > * <p> > * Called by the producer when the buffer becomes too full > * to write more data into it. > */ > ByteBuffer drain(ByteBuffer buf); > } > {code} > Usage: for example RandomAccessFileAppender code can look like this: > {code} > // RandomAccessFileAppender > public void append(final LogEvent event) { > getLayout().writeTo(event, (ByteBufferDestination) manager); > } > {code} > Layout implementation of the {{writeTo}} method: layouts need to know how to > convert LogEvents to text, but writing this text into the ByteBuffer can be > delegated to a helper: > {code} > // some layout > public void writeTo(LogEvent event, ByteBufferDestination destination) { > StringBuilder text = toText(event, getCachedStringBuilder()); > > TextEncoderHelper helper = getCachedHelper(); > helper.encodeWithoutAllocation(text, destination); > } > /** > * Creates a text representation of the specified log event > * and writes it into the specified StringBuilder. > * <p> > * Implementations are free to return a new StringBuilder if they can > * detect in advance that the specified StringBuilder is too small. > */ > StringBuilder toText(LogEvent e, StringBuilder destination) {} // existing > logic goes here > public String toSerializable(LogEvent event) { // factored out logic to > toText() > return toText(event, getCachedStringBuilder()).toString(); > } > {code} > Helper contains utility code for moving the text into a CharBuffer, and for > repeatedly calling {{CharsetEncoder#encode}}. > {code} > public class TextEncoderHelper { > TextEncoderHelper(Charset charset) {} // create CharsetEncoder > void encodeWithoutAllocation(StringBuilder text, BinaryDestination > destination) { > ByteBuffer byteBuf = destination.getByteBuffer(); > CharBuffer charBuf = getCachedCharBuffer(); > charBuf.reset(); > int start = 0; > int todoChars = text.length(); > do { > int copied = copy(text, start, charBuf); > start += copied; > todoChars -= copied; > boolean endOfInput = todoChars <= 0; > > charBuf.flip(); > CodeResult result; > do { > result = charsetEncoder.encode(charBuf, byteBuf, endOfInput); > if (result == CodeResult.OVERFLOW) { // byteBuf full > // destination consumes contents > // and returns byte buffer with more capacity > byteBuf = destination.drain(byteBuf); > } > } while (result == CodeResult.OVERFLOW); > } while (!endOfInput); > } > > /** > * Copies characters from the StringBuilder into the CharBuffer, > * starting at the specified offset and ending when either all > * characters have been copied or when the CharBuffer is full. > * > * @return the number of characters that were copied > */ > int copy(StringBuilder source, int offset, CharBuffer destination) {} > } > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: log4j-dev-unsubscr...@logging.apache.org For additional commands, e-mail: log4j-dev-h...@logging.apache.org