clintropolis commented on code in PR #19460: URL: https://github.com/apache/druid/pull/19460#discussion_r3352570208
########## processing/src/main/java/org/apache/druid/segment/projections/ClusteredValueGroupsBaseTableSchema.java: ########## @@ -0,0 +1,410 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.projections; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.Lists; +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.granularity.Granularity; +import org.apache.druid.query.OrderBy; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.segment.AggregateProjectionMetadata; +import org.apache.druid.segment.Metadata; +import org.apache.druid.segment.VirtualColumn; +import org.apache.druid.segment.VirtualColumns; +import org.apache.druid.segment.column.ColumnHolder; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.segment.column.RowSignature; +import org.apache.druid.utils.CollectionUtils; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +/** + * Top-level summary for a clustered base table whose groups are identified by discrete clustering-value tuples. Each + * tuple group is internally stored as a separate table without storing the cluster columns, which are pulled into this + * metadata. This is optimizing for use cases which typically only need to read from a single group via filters present + * on a query. Cluster groups nest inside as {@link #getClusterGroups()}; their column data live in the V10 segment + * file under dictionary-id-tuple prefixes ({@code __base$<id0>_<id1>...<idK>/<col>}), where the ids index into + * {@link #getClusteringDictionaries()}. + */ +public class ClusteredValueGroupsBaseTableSchema implements BaseTableProjectionSchema +{ + public static final String TYPE_NAME = "clustered-value-groups-base-table"; + + private final VirtualColumns virtualColumns; + private final List<String> columnNames; + private final AggregatorFactory[] aggregators; + private final List<OrderBy> ordering; + private final RowSignature clusteringColumns; + private final List<String> sharedColumns; + private final ClusteringDictionaries clusteringDictionaries; + private final List<TableClusterGroupSpec> clusterGroups; + + // computed + private final int timeColumnPosition; + private final Granularity effectiveGranularity; + + @JsonCreator + public ClusteredValueGroupsBaseTableSchema( Review Comment: I'd prefer to keep terminology aligned with MSQ 'clustered by' since this is basically pushing that operation inside of the segment. Additionally, 'group' feels slightly ambiguous when considering the 'rollup' base table projection schema, since it defines its schema as grouping columns and aggregators, https://github.com/apache/druid/blob/master/processing/src/main/java/org/apache/druid/segment/projections/RollupTableProjectionSchema.java#L74, so I've mostly left things as-is. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
