gianm commented on a change in pull request #9181: Speed up String first/last aggregators when folding isn't needed. URL: https://github.com/apache/druid/pull/9181#discussion_r367656434
########## File path: processing/src/main/java/org/apache/druid/query/aggregation/first/StringFirstLastUtils.java ########## @@ -33,23 +36,63 @@ { private static final int NULL_VALUE = -1; + /** + * Shorten "s" to "maxBytes" chars. Fast and loose because these are *chars* not *bytes*. Use + * {@link #chop(String, int)} for slower, but accurate chopping. + */ + @Nullable + public static String fastLooseChop(@Nullable final String s, final int maxBytes) + { + if (s == null || s.length() <= maxBytes) { + return s; + } else { + return s.substring(0, maxBytes); + } + } + + /** + * Shorten "s" to what could fit in "maxBytes" bytes as UTF-8. + */ @Nullable public static String chop(@Nullable final String s, final int maxBytes) { if (s == null) { return null; } else { - // Shorten firstValue to what could fit in maxBytes as UTF-8. final byte[] bytes = new byte[maxBytes]; final int len = StringUtils.toUtf8WithLimit(s, ByteBuffer.wrap(bytes)); return new String(bytes, 0, len, StandardCharsets.UTF_8); } } + /** + * Returns whether a given value selector *might* contain SerializablePairLongString objects. + */ + public static boolean selectorNeedsFoldCheck( + final BaseObjectColumnValueSelector<?> valueSelector, + @Nullable final ColumnCapabilities valueSelectorCapabilities + ) + { + if (valueSelectorCapabilities != null && valueSelectorCapabilities.getType() != ValueType.COMPLEX) { + // Known, non-complex type. + return false; + } + + if (valueSelector instanceof NilColumnValueSelector) { + // Nil column, definitely no SerializablePairLongStrings. + return false; + } + + // Check if the reported class could possibly be SerializablePairLongString. + final Class<?> clazz = valueSelector.classOfObject(); + return clazz.isAssignableFrom(SerializablePairLongString.class) Review comment: I changed it to: ```java // Check if the selector class could possibly be a SerializablePairLongString (either a superclass or subclass). ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org For additional commands, e-mail: commits-h...@druid.apache.org