This is an automated email from the ASF dual-hosted git repository. cwylie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push: new dca31d466c9 minor adjustments for performance (#16714) dca31d466c9 is described below commit dca31d466c90f9bf2faa1fa0a0f6e7aab62c8fb5 Author: Clint Wylie <cwy...@apache.org> AuthorDate: Thu Jul 11 16:57:15 2024 -0700 minor adjustments for performance (#16714) changes: * switch to stop using some string.format * switch some streams to classic loops --- .../groupby/epinephelinae/GroupByQueryEngine.java | 35 +++++------ .../epinephelinae/vector/VectorGroupByEngine.java | 67 ++++++++++++---------- .../query/timeseries/TimeseriesQueryEngine.java | 3 +- .../org/apache/druid/segment/VirtualColumns.java | 7 ++- .../nested/CompressedNestedDataComplexColumn.java | 13 ++--- .../nested/NestedCommonFormatColumnSerializer.java | 2 +- .../nested/NestedDataColumnSerializerV4.java | 10 +--- 7 files changed, 73 insertions(+), 64 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java index 085e6022aab..35f09c5446d 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java @@ -211,24 +211,25 @@ public class GroupByQueryEngine final List<DimensionSpec> dimensions ) { - return dimensions - .stream() - .allMatch( - dimension -> { - if (dimension.mustDecorate()) { - // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. - // To be safe, we must return false here. - return false; - } + for (DimensionSpec dimension : dimensions) { + if (dimension.mustDecorate()) { + // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. + // To be safe, we must return false here. + return false; + } + + // if dimension spec type is array, skip it since we can handle array or multi-valued + if (dimension.getOutputType().isArray()) { + continue; + } - // Now check column capabilities, which must be present and explicitly not multi-valued and not arrays - final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension()); - return dimension.getOutputType().isArray() - || (columnCapabilities != null - && columnCapabilities.hasMultipleValues().isFalse() - && !columnCapabilities.isArray() - ); - }); + // Now check column capabilities, which must be present and explicitly not multi-valued and not arrays + final ColumnCapabilities capabilities = inspector.getColumnCapabilities(dimension.getDimension()); + if (capabilities == null || capabilities.hasMultipleValues().isMaybeTrue() || capabilities.isArray()) { + return false; + } + } + return true; } private abstract static class GroupByEngineIterator<KeyType> implements Iterator<ResultRow>, Closeable diff --git a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java index 93633644a2e..75f4539e8c3 100644 --- a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java +++ b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java @@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.parsers.CloseableIterator; import org.apache.druid.query.DruidProcessingConfig; import org.apache.druid.query.aggregation.AggregatorAdapters; +import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.dimension.DefaultDimensionSpec; import org.apache.druid.query.dimension.DimensionSpec; import org.apache.druid.query.filter.Filter; @@ -204,9 +205,7 @@ public class VectorGroupByEngine return adapter.canVectorize(filter, query.getVirtualColumns(), false) && canVectorizeDimensions(inspector, query.getDimensions()) && VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), adapter) - && query.getAggregatorSpecs() - .stream() - .allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(inspector)); + && canVectorizeAggregators(inspector, query.getAggregatorSpecs()); } private static boolean canVectorizeDimensions( @@ -214,35 +213,45 @@ public class VectorGroupByEngine final List<DimensionSpec> dimensions ) { - return dimensions - .stream() - .allMatch( - dimension -> { - if (!dimension.canVectorize()) { - return false; - } + for (DimensionSpec dimension : dimensions) { + if (!dimension.canVectorize()) { + return false; + } - if (dimension.mustDecorate()) { - // group by on multi value dimensions are not currently supported - // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. - // To be safe, we must return false here. - return false; - } + if (dimension.mustDecorate()) { + // group by on multi value dimensions are not currently supported + // DimensionSpecs that decorate may turn singly-valued columns into multi-valued selectors. + // To be safe, we must return false here. + return false; + } - if (!dimension.getOutputType().isPrimitive()) { - // group by on arrays and complex types is not currently supported in the vector processing engine - return false; - } + if (!dimension.getOutputType().isPrimitive()) { + // group by on arrays and complex types is not currently supported in the vector processing engine + return false; + } - // Now check column capabilities. - final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension()); - // null here currently means the column does not exist, nil columns can be vectorized - if (columnCapabilities == null) { - return true; - } - // must be single valued - return columnCapabilities.hasMultipleValues().isFalse(); - }); + // Now check column capabilities. + final ColumnCapabilities columnCapabilities = inspector.getColumnCapabilities(dimension.getDimension()); + if (columnCapabilities != null && columnCapabilities.hasMultipleValues().isMaybeTrue()) { + // null here currently means the column does not exist, nil columns can be vectorized + // multi-value columns implicit unnest is not currently supported in the vector processing engine + return false; + } + } + return true; + } + + public static boolean canVectorizeAggregators( + final ColumnInspector inspector, + final List<AggregatorFactory> aggregatorFactories + ) + { + for (AggregatorFactory aggregatorFactory : aggregatorFactories) { + if (!aggregatorFactory.canVectorize(inspector)) { + return false; + } + } + return true; } @VisibleForTesting diff --git a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java index c5e83b84e87..d8369c8c6da 100644 --- a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java +++ b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java @@ -37,6 +37,7 @@ import org.apache.druid.query.aggregation.Aggregator; import org.apache.druid.query.aggregation.AggregatorAdapters; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.groupby.epinephelinae.vector.VectorGroupByEngine; import org.apache.druid.query.vector.VectorCursorGranularizer; import org.apache.druid.segment.ColumnInspector; import org.apache.druid.segment.SegmentMissingException; @@ -103,7 +104,7 @@ public class TimeseriesQueryEngine final boolean doVectorize = query.context().getVectorize().shouldVectorize( adapter.canVectorize(filter, query.getVirtualColumns(), descending) && VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), adapter) - && query.getAggregatorSpecs().stream().allMatch(aggregatorFactory -> aggregatorFactory.canVectorize(inspector)) + && VectorGroupByEngine.canVectorizeAggregators(inspector, query.getAggregatorSpecs()) ); final Sequence<Result<TimeseriesResultValue>> result; diff --git a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java index 79354a4fa2a..a32a85d16c7 100644 --- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java +++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java @@ -266,7 +266,12 @@ public class VirtualColumns implements Cacheable public boolean canVectorize(ColumnInspector columnInspector) { final ColumnInspector inspector = wrapInspector(columnInspector); - return virtualColumns.stream().allMatch(virtualColumn -> virtualColumn.canVectorize(inspector)); + for (VirtualColumn virtualColumn : virtualColumns) { + if (!virtualColumn.canVectorize(inspector)) { + return false; + } + } + return true; } /** diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index d3869bd9ef5..7faf837db1c 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -25,6 +25,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; import com.google.common.primitives.Doubles; import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.RE; @@ -915,13 +916,11 @@ public abstract class CompressedNestedDataComplexColumn<TStringDictionary extend ); // we should check this someday soon, but for now just read it to push the buffer position ahead int flags = dataBuffer.getInt(); - Preconditions.checkState( - flags == DictionaryEncodedColumnPartSerde.NO_FLAGS, - StringUtils.format( - "Unrecognized bits set in space reserved for future flags for field column [%s]", - field - ) - ); + if (flags != DictionaryEncodedColumnPartSerde.NO_FLAGS) { + throw DruidException.defensive( + "Unrecognized bits set in space reserved for future flags for field column [%s]", field + ); + } final Supplier<FixedIndexed<Integer>> localDictionarySupplier = FixedIndexed.read( dataBuffer, diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java index 59c7da7fd21..68e1da96756 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java @@ -123,6 +123,6 @@ public abstract class NestedCommonFormatColumnSerializer implements GenericColum */ public static String getInternalFileName(String fileNameBase, String field) { - return StringUtils.format("%s.%s", fileNameBase, field); + return fileNameBase + "." + field; } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java index efa31a13a3f..3d39b31bbbc 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java @@ -27,7 +27,6 @@ import org.apache.druid.common.config.NullHandling; import org.apache.druid.java.util.common.FileUtils; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.RE; -import org.apache.druid.java.util.common.StringUtils; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter; @@ -182,7 +181,7 @@ public class NestedDataColumnSerializerV4 implements GenericColumnSerializer<Str doubleDictionaryWriter.open(); rawWriter = new CompressedVariableSizedBlobColumnSerializer( - getInternalFileName(name, RAW_FILE_NAME), + NestedCommonFormatColumnSerializer.getInternalFileName(name, RAW_FILE_NAME), segmentWriteOutMedium, indexSpec.getJsonCompression() != null ? indexSpec.getJsonCompression() : CompressionStrategy.LZ4 ); @@ -390,14 +389,9 @@ public class NestedDataColumnSerializerV4 implements GenericColumnSerializer<Str private void writeInternal(FileSmoosher smoosher, Serializer serializer, String fileName) throws IOException { - final String internalName = getInternalFileName(name, fileName); + final String internalName = NestedCommonFormatColumnSerializer.getInternalFileName(name, fileName); try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(internalName, serializer.getSerializedSize())) { serializer.writeTo(smooshChannel, smoosher); } } - - public static String getInternalFileName(String fileNameBase, String field) - { - return StringUtils.format("%s.%s", fileNameBase, field); - } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org For additional commands, e-mail: commits-h...@druid.apache.org