This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new dca31d466c9 minor adjustments for performance (#16714)
dca31d466c9 is described below

commit dca31d466c90f9bf2faa1fa0a0f6e7aab62c8fb5
Author: Clint Wylie <cwy...@apache.org>
AuthorDate: Thu Jul 11 16:57:15 2024 -0700

    minor adjustments for performance (#16714)
    
    changes:
    * switch to stop using some string.format
    * switch some streams to classic loops
---
 .../groupby/epinephelinae/GroupByQueryEngine.java  | 35 +++++------
 .../epinephelinae/vector/VectorGroupByEngine.java  | 67 ++++++++++++----------
 .../query/timeseries/TimeseriesQueryEngine.java    |  3 +-
 .../org/apache/druid/segment/VirtualColumns.java   |  7 ++-
 .../nested/CompressedNestedDataComplexColumn.java  | 13 ++---
 .../nested/NestedCommonFormatColumnSerializer.java |  2 +-
 .../nested/NestedDataColumnSerializerV4.java       | 10 +---
 7 files changed, 73 insertions(+), 64 deletions(-)

diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java
index 085e6022aab..35f09c5446d 100644
--- 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/GroupByQueryEngine.java
@@ -211,24 +211,25 @@ public class GroupByQueryEngine
       final List<DimensionSpec> dimensions
   )
   {
-    return dimensions
-        .stream()
-        .allMatch(
-            dimension -> {
-              if (dimension.mustDecorate()) {
-                // DimensionSpecs that decorate may turn singly-valued columns 
into multi-valued selectors.
-                // To be safe, we must return false here.
-                return false;
-              }
+    for (DimensionSpec dimension : dimensions) {
+      if (dimension.mustDecorate()) {
+        // DimensionSpecs that decorate may turn singly-valued columns into 
multi-valued selectors.
+        // To be safe, we must return false here.
+        return false;
+      }
+
+      // if dimension spec type is array, skip it since we can handle array or 
multi-valued
+      if (dimension.getOutputType().isArray()) {
+        continue;
+      }
 
-              // Now check column capabilities, which must be present and 
explicitly not multi-valued and not arrays
-              final ColumnCapabilities columnCapabilities = 
inspector.getColumnCapabilities(dimension.getDimension());
-              return dimension.getOutputType().isArray()
-                     || (columnCapabilities != null
-                         && columnCapabilities.hasMultipleValues().isFalse()
-                         && !columnCapabilities.isArray()
-                     );
-            });
+      // Now check column capabilities, which must be present and explicitly 
not multi-valued and not arrays
+      final ColumnCapabilities capabilities = 
inspector.getColumnCapabilities(dimension.getDimension());
+      if (capabilities == null || 
capabilities.hasMultipleValues().isMaybeTrue() || capabilities.isArray()) {
+        return false;
+      }
+    }
+    return true;
   }
 
   private abstract static class GroupByEngineIterator<KeyType> implements 
Iterator<ResultRow>, Closeable
diff --git 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
index 93633644a2e..75f4539e8c3 100644
--- 
a/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
+++ 
b/processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/VectorGroupByEngine.java
@@ -29,6 +29,7 @@ import org.apache.druid.java.util.common.io.Closer;
 import org.apache.druid.java.util.common.parsers.CloseableIterator;
 import org.apache.druid.query.DruidProcessingConfig;
 import org.apache.druid.query.aggregation.AggregatorAdapters;
+import org.apache.druid.query.aggregation.AggregatorFactory;
 import org.apache.druid.query.dimension.DefaultDimensionSpec;
 import org.apache.druid.query.dimension.DimensionSpec;
 import org.apache.druid.query.filter.Filter;
@@ -204,9 +205,7 @@ public class VectorGroupByEngine
     return adapter.canVectorize(filter, query.getVirtualColumns(), false)
            && canVectorizeDimensions(inspector, query.getDimensions())
            && VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), 
adapter)
-           && query.getAggregatorSpecs()
-                   .stream()
-                   .allMatch(aggregatorFactory -> 
aggregatorFactory.canVectorize(inspector));
+           && canVectorizeAggregators(inspector, query.getAggregatorSpecs());
   }
 
   private static boolean canVectorizeDimensions(
@@ -214,35 +213,45 @@ public class VectorGroupByEngine
       final List<DimensionSpec> dimensions
   )
   {
-    return dimensions
-        .stream()
-        .allMatch(
-            dimension -> {
-              if (!dimension.canVectorize()) {
-                return false;
-              }
+    for (DimensionSpec dimension : dimensions) {
+      if (!dimension.canVectorize()) {
+        return false;
+      }
 
-              if (dimension.mustDecorate()) {
-                // group by on multi value dimensions are not currently 
supported
-                // DimensionSpecs that decorate may turn singly-valued columns 
into multi-valued selectors.
-                // To be safe, we must return false here.
-                return false;
-              }
+      if (dimension.mustDecorate()) {
+        // group by on multi value dimensions are not currently supported
+        // DimensionSpecs that decorate may turn singly-valued columns into 
multi-valued selectors.
+        // To be safe, we must return false here.
+        return false;
+      }
 
-              if (!dimension.getOutputType().isPrimitive()) {
-                // group by on arrays and complex types is not currently 
supported in the vector processing engine
-                return false;
-              }
+      if (!dimension.getOutputType().isPrimitive()) {
+        // group by on arrays and complex types is not currently supported in 
the vector processing engine
+        return false;
+      }
 
-              // Now check column capabilities.
-              final ColumnCapabilities columnCapabilities = 
inspector.getColumnCapabilities(dimension.getDimension());
-              // null here currently means the column does not exist, nil 
columns can be vectorized
-              if (columnCapabilities == null) {
-                return true;
-              }
-              // must be single valued
-              return columnCapabilities.hasMultipleValues().isFalse();
-            });
+      // Now check column capabilities.
+      final ColumnCapabilities columnCapabilities = 
inspector.getColumnCapabilities(dimension.getDimension());
+      if (columnCapabilities != null && 
columnCapabilities.hasMultipleValues().isMaybeTrue()) {
+        // null here currently means the column does not exist, nil columns 
can be vectorized
+        // multi-value columns implicit unnest is not currently supported in 
the vector processing engine
+        return false;
+      }
+    }
+    return true;
+  }
+
+  public static boolean canVectorizeAggregators(
+      final ColumnInspector inspector,
+      final List<AggregatorFactory> aggregatorFactories
+  )
+  {
+    for (AggregatorFactory aggregatorFactory : aggregatorFactories) {
+      if (!aggregatorFactory.canVectorize(inspector)) {
+        return false;
+      }
+    }
+    return true;
   }
 
   @VisibleForTesting
diff --git 
a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java
 
b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java
index c5e83b84e87..d8369c8c6da 100644
--- 
a/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java
+++ 
b/processing/src/main/java/org/apache/druid/query/timeseries/TimeseriesQueryEngine.java
@@ -37,6 +37,7 @@ import org.apache.druid.query.aggregation.Aggregator;
 import org.apache.druid.query.aggregation.AggregatorAdapters;
 import org.apache.druid.query.aggregation.AggregatorFactory;
 import org.apache.druid.query.filter.Filter;
+import org.apache.druid.query.groupby.epinephelinae.vector.VectorGroupByEngine;
 import org.apache.druid.query.vector.VectorCursorGranularizer;
 import org.apache.druid.segment.ColumnInspector;
 import org.apache.druid.segment.SegmentMissingException;
@@ -103,7 +104,7 @@ public class TimeseriesQueryEngine
     final boolean doVectorize = query.context().getVectorize().shouldVectorize(
         adapter.canVectorize(filter, query.getVirtualColumns(), descending)
         && VirtualColumns.shouldVectorize(query, query.getVirtualColumns(), 
adapter)
-        && query.getAggregatorSpecs().stream().allMatch(aggregatorFactory -> 
aggregatorFactory.canVectorize(inspector))
+        && VectorGroupByEngine.canVectorizeAggregators(inspector, 
query.getAggregatorSpecs())
     );
 
     final Sequence<Result<TimeseriesResultValue>> result;
diff --git 
a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java 
b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
index 79354a4fa2a..a32a85d16c7 100644
--- a/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
+++ b/processing/src/main/java/org/apache/druid/segment/VirtualColumns.java
@@ -266,7 +266,12 @@ public class VirtualColumns implements Cacheable
   public boolean canVectorize(ColumnInspector columnInspector)
   {
     final ColumnInspector inspector = wrapInspector(columnInspector);
-    return virtualColumns.stream().allMatch(virtualColumn -> 
virtualColumn.canVectorize(inspector));
+    for (VirtualColumn virtualColumn : virtualColumns) {
+      if (!virtualColumn.canVectorize(inspector)) {
+        return false;
+      }
+    }
+    return true;
   }
 
   /**
diff --git 
a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java
 
b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java
index d3869bd9ef5..7faf837db1c 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java
@@ -25,6 +25,7 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Sets;
 import com.google.common.primitives.Doubles;
 import org.apache.druid.collections.bitmap.ImmutableBitmap;
+import org.apache.druid.error.DruidException;
 import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.java.util.common.RE;
@@ -915,13 +916,11 @@ public abstract class 
CompressedNestedDataComplexColumn<TStringDictionary extend
       );
       // we should check this someday soon, but for now just read it to push 
the buffer position ahead
       int flags = dataBuffer.getInt();
-      Preconditions.checkState(
-          flags == DictionaryEncodedColumnPartSerde.NO_FLAGS,
-          StringUtils.format(
-              "Unrecognized bits set in space reserved for future flags for 
field column [%s]",
-              field
-          )
-      );
+      if (flags != DictionaryEncodedColumnPartSerde.NO_FLAGS) {
+        throw DruidException.defensive(
+            "Unrecognized bits set in space reserved for future flags for 
field column [%s]", field
+        );
+      }
 
       final Supplier<FixedIndexed<Integer>> localDictionarySupplier = 
FixedIndexed.read(
           dataBuffer,
diff --git 
a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java
 
b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java
index 59c7da7fd21..68e1da96756 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/nested/NestedCommonFormatColumnSerializer.java
@@ -123,6 +123,6 @@ public abstract class NestedCommonFormatColumnSerializer 
implements GenericColum
    */
   public static String getInternalFileName(String fileNameBase, String field)
   {
-    return StringUtils.format("%s.%s", fileNameBase, field);
+    return fileNameBase + "." + field;
   }
 }
diff --git 
a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java
 
b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java
index efa31a13a3f..3d39b31bbbc 100644
--- 
a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java
+++ 
b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java
@@ -27,7 +27,6 @@ import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.java.util.common.FileUtils;
 import org.apache.druid.java.util.common.ISE;
 import org.apache.druid.java.util.common.RE;
-import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.io.Closer;
 import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
 import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter;
@@ -182,7 +181,7 @@ public class NestedDataColumnSerializerV4 implements 
GenericColumnSerializer<Str
     doubleDictionaryWriter.open();
 
     rawWriter = new CompressedVariableSizedBlobColumnSerializer(
-        getInternalFileName(name, RAW_FILE_NAME),
+        NestedCommonFormatColumnSerializer.getInternalFileName(name, 
RAW_FILE_NAME),
         segmentWriteOutMedium,
         indexSpec.getJsonCompression() != null ? 
indexSpec.getJsonCompression() : CompressionStrategy.LZ4
     );
@@ -390,14 +389,9 @@ public class NestedDataColumnSerializerV4 implements 
GenericColumnSerializer<Str
 
   private void writeInternal(FileSmoosher smoosher, Serializer serializer, 
String fileName) throws IOException
   {
-    final String internalName = getInternalFileName(name, fileName);
+    final String internalName = 
NestedCommonFormatColumnSerializer.getInternalFileName(name, fileName);
     try (SmooshedWriter smooshChannel = 
smoosher.addWithSmooshedWriter(internalName, serializer.getSerializedSize())) {
       serializer.writeTo(smooshChannel, smoosher);
     }
   }
-
-  public static String getInternalFileName(String fileNameBase, String field)
-  {
-    return StringUtils.format("%s.%s", fileNameBase, field);
-  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@druid.apache.org
For additional commands, e-mail: commits-h...@druid.apache.org

Reply via email to