Jackie-Jiang commented on a change in pull request #8029: URL: https://github.com/apache/pinot/pull/8029#discussion_r830401982
########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/reduce/filter/RowMatcherFactory.java ########## @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.reduce.filter; + +import org.apache.pinot.common.request.context.FilterContext; + + +/** + * Factory for RowMatcher. + */ +public class RowMatcherFactory { + private RowMatcherFactory() { + } + + /** + * Helper method to construct a RowMatcher based on the given filter. + */ + public static RowMatcher getRowMatcher(FilterContext filter, ValueExtractorFactory valueExtractorFactory) { + switch (filter.getType()) { + case AND: + return new AndRowMatcher(filter.getChildren(), valueExtractorFactory); + case OR: + return new OrRowMatcher(filter.getChildren(), valueExtractorFactory); + case PREDICATE: Review comment: We need to handle `NOT` here. This is already fixed in #8366. If that one is merged first, we should integrate the fix here. ########## File path: pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java ########## @@ -197,21 +198,31 @@ protected BrokerResponseNative getBrokerResponseForSqlQuery(String sqlQuery, Pla } queryOptions.put(Request.QueryOptionKey.GROUP_BY_MODE, Request.SQL); queryOptions.put(Request.QueryOptionKey.RESPONSE_FORMAT, Request.SQL); + BrokerRequest strippedBrokerRequest = GapfillUtils.stripGapfill(brokerRequest); Review comment: (MAJOR) Query options should be preserved when stripping the gapfill ########## File path: pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java ########## @@ -197,21 +198,31 @@ protected BrokerResponseNative getBrokerResponseForSqlQuery(String sqlQuery, Pla } queryOptions.put(Request.QueryOptionKey.GROUP_BY_MODE, Request.SQL); queryOptions.put(Request.QueryOptionKey.RESPONSE_FORMAT, Request.SQL); + BrokerRequest strippedBrokerRequest = GapfillUtils.stripGapfill(brokerRequest); + queryOptions = strippedBrokerRequest.getPinotQuery().getQueryOptions(); + if (queryOptions == null) { + queryOptions = new HashMap<>(); + strippedBrokerRequest.getPinotQuery().setQueryOptions(queryOptions); + } + queryOptions.put(Request.QueryOptionKey.GROUP_BY_MODE, Request.SQL); + queryOptions.put(Request.QueryOptionKey.RESPONSE_FORMAT, Request.SQL); QueryContext queryContext = BrokerRequestToQueryContextConverter.convert(brokerRequest); - return getBrokerResponse(queryContext, planMaker); + QueryContext strippedQueryContext = BrokerRequestToQueryContextConverter.convert(strippedBrokerRequest); + return getBrokerResponse(queryContext, strippedQueryContext, planMaker); } /** * Run query on multiple index segments with custom plan maker. * <p>Use this to test the whole flow from server to broker. * <p>The result should be equivalent to querying 4 identical index segments. */ - private BrokerResponseNative getBrokerResponse(QueryContext queryContext, PlanMaker planMaker) { + private BrokerResponseNative getBrokerResponse( + QueryContext queryContext, QueryContext strippedQueryContext, PlanMaker planMaker) { Review comment: ```suggestion QueryContext queryContext, QueryContext serverQueryContext, PlanMaker planMaker) { ``` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/selection/SelectionOperatorUtils.java ########## @@ -79,7 +79,6 @@ private SelectionOperatorUtils() { private static final String FLOAT_PATTERN = "#########0.0####"; private static final String DOUBLE_PATTERN = "###################0.0#########"; private static final DecimalFormatSymbols DECIMAL_FORMAT_SYMBOLS = DecimalFormatSymbols.getInstance(Locale.US); - Review comment: (minor) revert this file ########## File path: pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java ########## @@ -217,7 +218,10 @@ private BrokerResponseNative handleSQLRequest(long requestId, String query, Json requestStatistics.setErrorCode(QueryException.PQL_PARSING_ERROR_CODE); return new BrokerResponseNative(QueryException.getException(QueryException.PQL_PARSING_ERROR, e)); } - PinotQuery pinotQuery = brokerRequest.getPinotQuery(); + + BrokerRequest serverBrokerRequest = GapfillUtils.stripGapfill(brokerRequest); Review comment: (MAJOR) Let's set the query options first (some options are already set during the query compilation), then strip the gapfill. You may just set the stripped query options to be the original query options without making a copy ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/reduce/GapfillProcessor.java ########## @@ -0,0 +1,471 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.reduce; + +import com.google.common.base.Preconditions; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.FunctionContext; +import org.apache.pinot.common.response.broker.BrokerResponseNative; +import org.apache.pinot.common.response.broker.ResultTable; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.data.table.Key; +import org.apache.pinot.core.query.aggregation.function.AggregationFunction; +import org.apache.pinot.core.query.aggregation.function.AggregationFunctionFactory; +import org.apache.pinot.core.query.aggregation.function.CountAggregationFunction; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.core.query.request.context.QueryContext; +import org.apache.pinot.core.util.GapfillUtils; +import org.apache.pinot.spi.data.DateTimeFormatSpec; +import org.apache.pinot.spi.data.DateTimeGranularitySpec; + + +/** + * Helper class to reduce and set gap fill results into the BrokerResponseNative + */ +@SuppressWarnings({"rawtypes", "unchecked"}) +public class GapfillProcessor { + private final QueryContext _queryContext; + + private final int _limitForAggregatedResult; + private final DateTimeGranularitySpec _gapfillDateTimeGranularity; + private final DateTimeGranularitySpec _postGapfillDateTimeGranularity; + private final DateTimeFormatSpec _dateTimeFormatter; + private final long _startMs; + private final long _endMs; + private final long _gapfillTimeBucketSize; + private final long _postGapfillTimeBucketSize; + private final int _numOfTimeBuckets; + private final List<Integer> _groupByKeyIndexes; + private final Set<Key> _groupByKeys; + private final Map<Key, Object[]> _previousByGroupKey; + private final Map<String, ExpressionContext> _fillExpressions; + private final List<ExpressionContext> _timeSeries; + private final GapfillUtils.GapfillType _gapfillType; + private int _limitForGapfilledResult; + private boolean[] _isGroupBySelections; + private final int _timeBucketColumnIndex; + private int[] _sourceColumnIndexForResultSchema = null; + private final int _aggregationSize; + + GapfillProcessor(QueryContext queryContext, GapfillUtils.GapfillType gapfillType) { + _queryContext = queryContext; + _gapfillType = gapfillType; + _limitForAggregatedResult = queryContext.getLimit(); + if (_gapfillType == GapfillUtils.GapfillType.AGGREGATE_GAP_FILL + || _gapfillType == GapfillUtils.GapfillType.GAP_FILL) { + _limitForGapfilledResult = queryContext.getLimit(); + } else { + _limitForGapfilledResult = queryContext.getSubquery().getLimit(); + } + + ExpressionContext gapFillSelection = GapfillUtils.getGapfillExpressionContext(queryContext, _gapfillType); + _timeBucketColumnIndex = GapfillUtils.findTimeBucketColumnIndex(queryContext, _gapfillType); + + List<ExpressionContext> args = gapFillSelection.getFunction().getArguments(); + + _dateTimeFormatter = new DateTimeFormatSpec(args.get(1).getLiteral()); + _gapfillDateTimeGranularity = new DateTimeGranularitySpec(args.get(4).getLiteral()); + _postGapfillDateTimeGranularity = new DateTimeGranularitySpec(args.get(5).getLiteral()); Review comment: Trying to understand when we need to use different granularity for gapfill and post-gapfill. Does this align with the gapfill definition? ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/reduce/GapfillProcessor.java ########## @@ -0,0 +1,471 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.reduce; + +import com.google.common.base.Preconditions; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.FunctionContext; +import org.apache.pinot.common.response.broker.BrokerResponseNative; +import org.apache.pinot.common.response.broker.ResultTable; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.data.table.Key; +import org.apache.pinot.core.query.aggregation.function.AggregationFunction; +import org.apache.pinot.core.query.aggregation.function.AggregationFunctionFactory; +import org.apache.pinot.core.query.aggregation.function.CountAggregationFunction; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.core.query.request.context.QueryContext; +import org.apache.pinot.core.util.GapfillUtils; +import org.apache.pinot.spi.data.DateTimeFormatSpec; +import org.apache.pinot.spi.data.DateTimeGranularitySpec; + + +/** + * Helper class to reduce and set gap fill results into the BrokerResponseNative + */ +@SuppressWarnings({"rawtypes", "unchecked"}) +public class GapfillProcessor { + private final QueryContext _queryContext; + + private final int _limitForAggregatedResult; + private final DateTimeGranularitySpec _gapfillDateTimeGranularity; + private final DateTimeGranularitySpec _postGapfillDateTimeGranularity; + private final DateTimeFormatSpec _dateTimeFormatter; + private final long _startMs; + private final long _endMs; + private final long _gapfillTimeBucketSize; + private final long _postGapfillTimeBucketSize; + private final int _numOfTimeBuckets; + private final List<Integer> _groupByKeyIndexes; + private final Set<Key> _groupByKeys; + private final Map<Key, Object[]> _previousByGroupKey; + private final Map<String, ExpressionContext> _fillExpressions; + private final List<ExpressionContext> _timeSeries; + private final GapfillUtils.GapfillType _gapfillType; + private int _limitForGapfilledResult; + private boolean[] _isGroupBySelections; + private final int _timeBucketColumnIndex; + private int[] _sourceColumnIndexForResultSchema = null; + private final int _aggregationSize; + + GapfillProcessor(QueryContext queryContext, GapfillUtils.GapfillType gapfillType) { + _queryContext = queryContext; + _gapfillType = gapfillType; + _limitForAggregatedResult = queryContext.getLimit(); + if (_gapfillType == GapfillUtils.GapfillType.AGGREGATE_GAP_FILL + || _gapfillType == GapfillUtils.GapfillType.GAP_FILL) { + _limitForGapfilledResult = queryContext.getLimit(); + } else { + _limitForGapfilledResult = queryContext.getSubquery().getLimit(); + } + + ExpressionContext gapFillSelection = GapfillUtils.getGapfillExpressionContext(queryContext, _gapfillType); + _timeBucketColumnIndex = GapfillUtils.findTimeBucketColumnIndex(queryContext, _gapfillType); + + List<ExpressionContext> args = gapFillSelection.getFunction().getArguments(); + + _dateTimeFormatter = new DateTimeFormatSpec(args.get(1).getLiteral()); + _gapfillDateTimeGranularity = new DateTimeGranularitySpec(args.get(4).getLiteral()); + _postGapfillDateTimeGranularity = new DateTimeGranularitySpec(args.get(5).getLiteral()); + String start = args.get(2).getLiteral(); + _startMs = truncate(_dateTimeFormatter.fromFormatToMillis(start)); + String end = args.get(3).getLiteral(); + _endMs = truncate(_dateTimeFormatter.fromFormatToMillis(end)); + _gapfillTimeBucketSize = _gapfillDateTimeGranularity.granularityToMillis(); + _postGapfillTimeBucketSize = _postGapfillDateTimeGranularity.granularityToMillis(); + _numOfTimeBuckets = (int) ((_endMs - _startMs) / _gapfillTimeBucketSize); + + _fillExpressions = GapfillUtils.getFillExpressions(gapFillSelection); + + _aggregationSize = (int) (_postGapfillTimeBucketSize / _gapfillTimeBucketSize); + + _previousByGroupKey = new HashMap<>(); + _groupByKeyIndexes = new ArrayList<>(); + _groupByKeys = new HashSet<>(); + + ExpressionContext timeseriesOn = GapfillUtils.getTimeSeriesOnExpressionContext(gapFillSelection); + _timeSeries = timeseriesOn.getFunction().getArguments(); + } + + private int findGapfillBucketIndex(long time) { + return (int) ((time - _startMs) / _gapfillTimeBucketSize); + } + + private void replaceColumnNameWithAlias(DataSchema dataSchema) { + QueryContext queryContext; + if (_gapfillType == GapfillUtils.GapfillType.AGGREGATE_GAP_FILL_AGGREGATE) { + queryContext = _queryContext.getSubquery().getSubquery(); + } else if (_gapfillType == GapfillUtils.GapfillType.GAP_FILL) { + queryContext = _queryContext; + } else { + queryContext = _queryContext.getSubquery(); + } + List<String> aliasList = queryContext.getAliasList(); + Map<String, String> columnNameToAliasMap = new HashMap<>(); + for (int i = 0; i < aliasList.size(); i++) { + if (aliasList.get(i) != null) { + ExpressionContext selection = queryContext.getSelectExpressions().get(i); + if (GapfillUtils.isGapfill(selection)) { + selection = selection.getFunction().getArguments().get(0); + } + columnNameToAliasMap.put(selection.toString(), aliasList.get(i)); + } + } + for (int i = 0; i < dataSchema.getColumnNames().length; i++) { + if (columnNameToAliasMap.containsKey(dataSchema.getColumnNames()[i])) { + dataSchema.getColumnNames()[i] = columnNameToAliasMap.get(dataSchema.getColumnNames()[i]); + } + } + } + + /** + * Here are three things that happen + * 1. Sort the result sets from all pinot servers based on timestamp + * 2. Gapfill the data for missing entities per time bucket + * 3. Aggregate the dataset per time bucket. + */ + public void process(BrokerResponseNative brokerResponseNative) { + DataSchema dataSchema = brokerResponseNative.getResultTable().getDataSchema(); + DataSchema resultTableSchema = getResultTableDataSchema(dataSchema); + if (brokerResponseNative.getResultTable().getRows().isEmpty()) { + brokerResponseNative.setResultTable(new ResultTable(resultTableSchema, Collections.emptyList())); + return; + } + + String[] columns = dataSchema.getColumnNames(); + + Map<String, Integer> indexes = new HashMap<>(); + for (int i = 0; i < columns.length; i++) { + indexes.put(columns[i], i); + } + + _isGroupBySelections = new boolean[dataSchema.getColumnDataTypes().length]; + + // The first one argument of timeSeries is time column. The left ones are defining entity. + for (ExpressionContext entityColum : _timeSeries) { + int index = indexes.get(entityColum.getIdentifier()); + _isGroupBySelections[index] = true; + } + + for (int i = 0; i < _isGroupBySelections.length; i++) { + if (_isGroupBySelections[i]) { + _groupByKeyIndexes.add(i); + } + } + + List<Object[]>[] timeBucketedRawRows = putRawRowsIntoTimeBucket(brokerResponseNative.getResultTable().getRows()); + + replaceColumnNameWithAlias(dataSchema); + + if (_queryContext.getAggregationFunctions() == null) { + + Map<String, Integer> sourceColumnsIndexes = new HashMap<>(); + for (int i = 0; i < dataSchema.getColumnNames().length; i++) { + sourceColumnsIndexes.put(dataSchema.getColumnName(i), i); + } + _sourceColumnIndexForResultSchema = new int[resultTableSchema.getColumnNames().length]; + for (int i = 0; i < _sourceColumnIndexForResultSchema.length; i++) { + _sourceColumnIndexForResultSchema[i] = sourceColumnsIndexes.get(resultTableSchema.getColumnName(i)); + } + } + + List<Object[]> resultRows = gapFillAndAggregate(timeBucketedRawRows, resultTableSchema, dataSchema); + brokerResponseNative.setResultTable(new ResultTable(resultTableSchema, resultRows)); + } + + /** + * Constructs the DataSchema for the ResultTable. + */ + private DataSchema getResultTableDataSchema(DataSchema dataSchema) { + if (_gapfillType == GapfillUtils.GapfillType.GAP_FILL) { + return dataSchema; + } + + int numOfColumns = _queryContext.getSelectExpressions().size(); + String[] columnNames = new String[numOfColumns]; + ColumnDataType[] columnDataTypes = new ColumnDataType[numOfColumns]; + for (int i = 0; i < numOfColumns; i++) { + ExpressionContext expressionContext = _queryContext.getSelectExpressions().get(i); + if (GapfillUtils.isGapfill(expressionContext)) { + expressionContext = expressionContext.getFunction().getArguments().get(0); + } + if (expressionContext.getType() != ExpressionContext.Type.FUNCTION) { + columnNames[i] = expressionContext.getIdentifier(); + columnDataTypes[i] = ColumnDataType.STRING; + } else { + FunctionContext functionContext = expressionContext.getFunction(); + AggregationFunction aggregationFunction = + AggregationFunctionFactory.getAggregationFunction(functionContext, _queryContext); + columnDataTypes[i] = aggregationFunction.getFinalResultColumnType(); + columnNames[i] = functionContext.toString(); + } + } + return new DataSchema(columnNames, columnDataTypes); + } + + private Key constructGroupKeys(Object[] row) { + Object[] groupKeys = new Object[_groupByKeyIndexes.size()]; + for (int i = 0; i < _groupByKeyIndexes.size(); i++) { + groupKeys[i] = row[_groupByKeyIndexes.get(i)]; + } + return new Key(groupKeys); + } + + private long truncate(long epoch) { + int sz = _gapfillDateTimeGranularity.getSize(); + return epoch / sz * sz; + } + + private List<Object[]> gapFillAndAggregate(List<Object[]>[] timeBucketedRawRows, + DataSchema dataSchemaForAggregatedResult, DataSchema dataSchema) { + List<Object[]> result = new ArrayList<>(); + + GapfillFilterHandler postGapfillFilterHandler = null; + if (_queryContext.getSubquery() != null && _queryContext.getFilter() != null) { + postGapfillFilterHandler = new GapfillFilterHandler(_queryContext.getFilter(), dataSchema); + } + GapfillFilterHandler postAggregateHavingFilterHandler = null; + if (_queryContext.getHavingFilter() != null) { + postAggregateHavingFilterHandler = + new GapfillFilterHandler(_queryContext.getHavingFilter(), dataSchemaForAggregatedResult); + } + long start = _startMs; + ColumnDataType[] resultColumnDataTypes = dataSchema.getColumnDataTypes(); + List<Object[]> bucketedResult = new ArrayList<>(); + for (long time = _startMs; time < _endMs; time += _gapfillTimeBucketSize) { + int index = findGapfillBucketIndex(time); + gapfill(time, bucketedResult, timeBucketedRawRows[index], dataSchema, postGapfillFilterHandler); + if (_queryContext.getAggregationFunctions() == null) { + for (Object [] row : bucketedResult) { + Object[] resultRow = new Object[_sourceColumnIndexForResultSchema.length]; + for (int i = 0; i < _sourceColumnIndexForResultSchema.length; i++) { + resultRow[i] = row[_sourceColumnIndexForResultSchema[i]]; + } + result.add(resultRow); + } + bucketedResult = new ArrayList<>(); Review comment: Unnecessary allocation ```suggestion bucketedResult = bucketedResult.clear(); ``` ########## File path: pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java ########## @@ -197,21 +198,31 @@ protected BrokerResponseNative getBrokerResponseForSqlQuery(String sqlQuery, Pla } queryOptions.put(Request.QueryOptionKey.GROUP_BY_MODE, Request.SQL); queryOptions.put(Request.QueryOptionKey.RESPONSE_FORMAT, Request.SQL); + BrokerRequest strippedBrokerRequest = GapfillUtils.stripGapfill(brokerRequest); Review comment: (minor) rename it to `serverBrokerRequest` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/reduce/GapfillProcessor.java ########## @@ -0,0 +1,471 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.reduce; + +import com.google.common.base.Preconditions; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.FunctionContext; +import org.apache.pinot.common.response.broker.BrokerResponseNative; +import org.apache.pinot.common.response.broker.ResultTable; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.data.table.Key; +import org.apache.pinot.core.query.aggregation.function.AggregationFunction; +import org.apache.pinot.core.query.aggregation.function.AggregationFunctionFactory; +import org.apache.pinot.core.query.aggregation.function.CountAggregationFunction; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.core.query.request.context.QueryContext; +import org.apache.pinot.core.util.GapfillUtils; +import org.apache.pinot.spi.data.DateTimeFormatSpec; +import org.apache.pinot.spi.data.DateTimeGranularitySpec; + + +/** + * Helper class to reduce and set gap fill results into the BrokerResponseNative + */ +@SuppressWarnings({"rawtypes", "unchecked"}) +public class GapfillProcessor { + private final QueryContext _queryContext; + + private final int _limitForAggregatedResult; + private final DateTimeGranularitySpec _gapfillDateTimeGranularity; + private final DateTimeGranularitySpec _postGapfillDateTimeGranularity; + private final DateTimeFormatSpec _dateTimeFormatter; + private final long _startMs; + private final long _endMs; + private final long _gapfillTimeBucketSize; + private final long _postGapfillTimeBucketSize; + private final int _numOfTimeBuckets; + private final List<Integer> _groupByKeyIndexes; + private final Set<Key> _groupByKeys; + private final Map<Key, Object[]> _previousByGroupKey; + private final Map<String, ExpressionContext> _fillExpressions; + private final List<ExpressionContext> _timeSeries; + private final GapfillUtils.GapfillType _gapfillType; + private int _limitForGapfilledResult; + private boolean[] _isGroupBySelections; + private final int _timeBucketColumnIndex; + private int[] _sourceColumnIndexForResultSchema = null; + private final int _aggregationSize; + + GapfillProcessor(QueryContext queryContext, GapfillUtils.GapfillType gapfillType) { + _queryContext = queryContext; + _gapfillType = gapfillType; + _limitForAggregatedResult = queryContext.getLimit(); + if (_gapfillType == GapfillUtils.GapfillType.AGGREGATE_GAP_FILL + || _gapfillType == GapfillUtils.GapfillType.GAP_FILL) { + _limitForGapfilledResult = queryContext.getLimit(); + } else { + _limitForGapfilledResult = queryContext.getSubquery().getLimit(); + } + + ExpressionContext gapFillSelection = GapfillUtils.getGapfillExpressionContext(queryContext, _gapfillType); + _timeBucketColumnIndex = GapfillUtils.findTimeBucketColumnIndex(queryContext, _gapfillType); + + List<ExpressionContext> args = gapFillSelection.getFunction().getArguments(); + + _dateTimeFormatter = new DateTimeFormatSpec(args.get(1).getLiteral()); + _gapfillDateTimeGranularity = new DateTimeGranularitySpec(args.get(4).getLiteral()); + _postGapfillDateTimeGranularity = new DateTimeGranularitySpec(args.get(5).getLiteral()); + String start = args.get(2).getLiteral(); + _startMs = truncate(_dateTimeFormatter.fromFormatToMillis(start)); + String end = args.get(3).getLiteral(); + _endMs = truncate(_dateTimeFormatter.fromFormatToMillis(end)); + _gapfillTimeBucketSize = _gapfillDateTimeGranularity.granularityToMillis(); + _postGapfillTimeBucketSize = _postGapfillDateTimeGranularity.granularityToMillis(); + _numOfTimeBuckets = (int) ((_endMs - _startMs) / _gapfillTimeBucketSize); + + _fillExpressions = GapfillUtils.getFillExpressions(gapFillSelection); + + _aggregationSize = (int) (_postGapfillTimeBucketSize / _gapfillTimeBucketSize); + + _previousByGroupKey = new HashMap<>(); + _groupByKeyIndexes = new ArrayList<>(); + _groupByKeys = new HashSet<>(); + + ExpressionContext timeseriesOn = GapfillUtils.getTimeSeriesOnExpressionContext(gapFillSelection); + _timeSeries = timeseriesOn.getFunction().getArguments(); + } + + private int findGapfillBucketIndex(long time) { + return (int) ((time - _startMs) / _gapfillTimeBucketSize); + } + + private void replaceColumnNameWithAlias(DataSchema dataSchema) { + QueryContext queryContext; + if (_gapfillType == GapfillUtils.GapfillType.AGGREGATE_GAP_FILL_AGGREGATE) { + queryContext = _queryContext.getSubquery().getSubquery(); + } else if (_gapfillType == GapfillUtils.GapfillType.GAP_FILL) { + queryContext = _queryContext; + } else { + queryContext = _queryContext.getSubquery(); + } + List<String> aliasList = queryContext.getAliasList(); + Map<String, String> columnNameToAliasMap = new HashMap<>(); + for (int i = 0; i < aliasList.size(); i++) { + if (aliasList.get(i) != null) { + ExpressionContext selection = queryContext.getSelectExpressions().get(i); + if (GapfillUtils.isGapfill(selection)) { + selection = selection.getFunction().getArguments().get(0); + } + columnNameToAliasMap.put(selection.toString(), aliasList.get(i)); + } + } + for (int i = 0; i < dataSchema.getColumnNames().length; i++) { + if (columnNameToAliasMap.containsKey(dataSchema.getColumnNames()[i])) { + dataSchema.getColumnNames()[i] = columnNameToAliasMap.get(dataSchema.getColumnNames()[i]); + } + } + } + + /** + * Here are three things that happen + * 1. Sort the result sets from all pinot servers based on timestamp + * 2. Gapfill the data for missing entities per time bucket + * 3. Aggregate the dataset per time bucket. + */ + public void process(BrokerResponseNative brokerResponseNative) { + DataSchema dataSchema = brokerResponseNative.getResultTable().getDataSchema(); + DataSchema resultTableSchema = getResultTableDataSchema(dataSchema); + if (brokerResponseNative.getResultTable().getRows().isEmpty()) { + brokerResponseNative.setResultTable(new ResultTable(resultTableSchema, Collections.emptyList())); + return; + } + + String[] columns = dataSchema.getColumnNames(); + + Map<String, Integer> indexes = new HashMap<>(); + for (int i = 0; i < columns.length; i++) { + indexes.put(columns[i], i); + } + + _isGroupBySelections = new boolean[dataSchema.getColumnDataTypes().length]; + + // The first one argument of timeSeries is time column. The left ones are defining entity. + for (ExpressionContext entityColum : _timeSeries) { + int index = indexes.get(entityColum.getIdentifier()); + _isGroupBySelections[index] = true; + } + + for (int i = 0; i < _isGroupBySelections.length; i++) { + if (_isGroupBySelections[i]) { + _groupByKeyIndexes.add(i); + } + } + + List<Object[]>[] timeBucketedRawRows = putRawRowsIntoTimeBucket(brokerResponseNative.getResultTable().getRows()); + + replaceColumnNameWithAlias(dataSchema); + + if (_queryContext.getAggregationFunctions() == null) { + + Map<String, Integer> sourceColumnsIndexes = new HashMap<>(); + for (int i = 0; i < dataSchema.getColumnNames().length; i++) { + sourceColumnsIndexes.put(dataSchema.getColumnName(i), i); + } + _sourceColumnIndexForResultSchema = new int[resultTableSchema.getColumnNames().length]; + for (int i = 0; i < _sourceColumnIndexForResultSchema.length; i++) { + _sourceColumnIndexForResultSchema[i] = sourceColumnsIndexes.get(resultTableSchema.getColumnName(i)); + } + } + + List<Object[]> resultRows = gapFillAndAggregate(timeBucketedRawRows, resultTableSchema, dataSchema); + brokerResponseNative.setResultTable(new ResultTable(resultTableSchema, resultRows)); + } + + /** + * Constructs the DataSchema for the ResultTable. + */ + private DataSchema getResultTableDataSchema(DataSchema dataSchema) { + if (_gapfillType == GapfillUtils.GapfillType.GAP_FILL) { + return dataSchema; + } + + int numOfColumns = _queryContext.getSelectExpressions().size(); + String[] columnNames = new String[numOfColumns]; + ColumnDataType[] columnDataTypes = new ColumnDataType[numOfColumns]; + for (int i = 0; i < numOfColumns; i++) { + ExpressionContext expressionContext = _queryContext.getSelectExpressions().get(i); + if (GapfillUtils.isGapfill(expressionContext)) { + expressionContext = expressionContext.getFunction().getArguments().get(0); + } + if (expressionContext.getType() != ExpressionContext.Type.FUNCTION) { + columnNames[i] = expressionContext.getIdentifier(); + columnDataTypes[i] = ColumnDataType.STRING; + } else { + FunctionContext functionContext = expressionContext.getFunction(); + AggregationFunction aggregationFunction = + AggregationFunctionFactory.getAggregationFunction(functionContext, _queryContext); + columnDataTypes[i] = aggregationFunction.getFinalResultColumnType(); + columnNames[i] = functionContext.toString(); + } + } + return new DataSchema(columnNames, columnDataTypes); + } + + private Key constructGroupKeys(Object[] row) { + Object[] groupKeys = new Object[_groupByKeyIndexes.size()]; + for (int i = 0; i < _groupByKeyIndexes.size(); i++) { + groupKeys[i] = row[_groupByKeyIndexes.get(i)]; + } + return new Key(groupKeys); + } + + private long truncate(long epoch) { + int sz = _gapfillDateTimeGranularity.getSize(); + return epoch / sz * sz; + } + + private List<Object[]> gapFillAndAggregate(List<Object[]>[] timeBucketedRawRows, + DataSchema dataSchemaForAggregatedResult, DataSchema dataSchema) { + List<Object[]> result = new ArrayList<>(); + + GapfillFilterHandler postGapfillFilterHandler = null; + if (_queryContext.getSubquery() != null && _queryContext.getFilter() != null) { + postGapfillFilterHandler = new GapfillFilterHandler(_queryContext.getFilter(), dataSchema); + } + GapfillFilterHandler postAggregateHavingFilterHandler = null; + if (_queryContext.getHavingFilter() != null) { + postAggregateHavingFilterHandler = + new GapfillFilterHandler(_queryContext.getHavingFilter(), dataSchemaForAggregatedResult); + } + long start = _startMs; + ColumnDataType[] resultColumnDataTypes = dataSchema.getColumnDataTypes(); + List<Object[]> bucketedResult = new ArrayList<>(); + for (long time = _startMs; time < _endMs; time += _gapfillTimeBucketSize) { + int index = findGapfillBucketIndex(time); + gapfill(time, bucketedResult, timeBucketedRawRows[index], dataSchema, postGapfillFilterHandler); + if (_queryContext.getAggregationFunctions() == null) { + for (Object [] row : bucketedResult) { + Object[] resultRow = new Object[_sourceColumnIndexForResultSchema.length]; + for (int i = 0; i < _sourceColumnIndexForResultSchema.length; i++) { + resultRow[i] = row[_sourceColumnIndexForResultSchema[i]]; + } + result.add(resultRow); + } + bucketedResult = new ArrayList<>(); + } else if (index % _aggregationSize == _aggregationSize - 1 && bucketedResult.size() > 0) { Review comment: Can `bucketedResult` ever be empty? If it is empty, do we need to update the `start`? ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/reduce/BrokerReduceService.java ########## @@ -103,11 +104,23 @@ public BrokerResponseNative reduceOnDataTable(BrokerRequest brokerRequest, return brokerResponseNative; } - QueryContext queryContext = BrokerRequestToQueryContextConverter.convert(brokerRequest); - DataTableReducer dataTableReducer = ResultReducerFactory.getResultReducer(queryContext); + QueryContext serverQueryContext = BrokerRequestToQueryContextConverter.convert(serverBrokerRequest); + DataTableReducer dataTableReducer = ResultReducerFactory.getResultReducer(serverQueryContext); dataTableReducer.reduceAndSetResults(rawTableName, cachedDataSchema, dataTableMap, brokerResponseNative, new DataTableReducerContext(_reduceExecutorService, _maxReduceThreadsPerQuery, reduceTimeOutMs, _groupByTrimThreshold), brokerMetrics); + QueryContext queryContext; + if (brokerRequest == serverBrokerRequest) { + queryContext = serverQueryContext; + } else { + queryContext = BrokerRequestToQueryContextConverter.convert(brokerRequest); + } + + GapfillUtils.GapfillType gapfillType = GapfillUtils.getGapfillType(queryContext); + if (gapfillType != null) { + GapfillProcessor gapfillProcessor = new GapfillProcessor(queryContext, gapfillType); + gapfillProcessor.process(brokerResponseNative); + } Review comment: No need to check gapfill type when server request is the same as broker request ```suggestion if (brokerRequest == serverBrokerRequest) { queryContext = serverQueryContext; } else { queryContext = BrokerRequestToQueryContextConverter.convert(brokerRequest); GapfillUtils.GapfillType gapfillType = GapfillUtils.getGapfillType(queryContext); if (gapfillType != null) { GapfillProcessor gapfillProcessor = new GapfillProcessor(queryContext, gapfillType); gapfillProcessor.process(brokerResponseNative); } } ``` ########## File path: pinot-core/src/test/java/org/apache/pinot/queries/BaseQueriesTest.java ########## @@ -197,21 +198,31 @@ protected BrokerResponseNative getBrokerResponseForSqlQuery(String sqlQuery, Pla } queryOptions.put(Request.QueryOptionKey.GROUP_BY_MODE, Request.SQL); queryOptions.put(Request.QueryOptionKey.RESPONSE_FORMAT, Request.SQL); + BrokerRequest strippedBrokerRequest = GapfillUtils.stripGapfill(brokerRequest); + queryOptions = strippedBrokerRequest.getPinotQuery().getQueryOptions(); + if (queryOptions == null) { + queryOptions = new HashMap<>(); + strippedBrokerRequest.getPinotQuery().setQueryOptions(queryOptions); + } + queryOptions.put(Request.QueryOptionKey.GROUP_BY_MODE, Request.SQL); + queryOptions.put(Request.QueryOptionKey.RESPONSE_FORMAT, Request.SQL); QueryContext queryContext = BrokerRequestToQueryContextConverter.convert(brokerRequest); - return getBrokerResponse(queryContext, planMaker); + QueryContext strippedQueryContext = BrokerRequestToQueryContextConverter.convert(strippedBrokerRequest); Review comment: Let's compare the reference before converting the `strippedBrokerRequest`, and rename it to `serverQueryContext`. Same for `getBrokerResponseForOptimizedSqlQuery` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/query/reduce/GapfillProcessor.java ########## @@ -0,0 +1,471 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.reduce; + +import com.google.common.base.Preconditions; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.FunctionContext; +import org.apache.pinot.common.response.broker.BrokerResponseNative; +import org.apache.pinot.common.response.broker.ResultTable; +import org.apache.pinot.common.utils.DataSchema; +import org.apache.pinot.common.utils.DataSchema.ColumnDataType; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.data.table.Key; +import org.apache.pinot.core.query.aggregation.function.AggregationFunction; +import org.apache.pinot.core.query.aggregation.function.AggregationFunctionFactory; +import org.apache.pinot.core.query.aggregation.function.CountAggregationFunction; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.core.query.request.context.QueryContext; +import org.apache.pinot.core.util.GapfillUtils; +import org.apache.pinot.spi.data.DateTimeFormatSpec; +import org.apache.pinot.spi.data.DateTimeGranularitySpec; + + +/** + * Helper class to reduce and set gap fill results into the BrokerResponseNative + */ +@SuppressWarnings({"rawtypes", "unchecked"}) +public class GapfillProcessor { + private final QueryContext _queryContext; + + private final int _limitForAggregatedResult; + private final DateTimeGranularitySpec _gapfillDateTimeGranularity; + private final DateTimeGranularitySpec _postGapfillDateTimeGranularity; + private final DateTimeFormatSpec _dateTimeFormatter; + private final long _startMs; + private final long _endMs; + private final long _gapfillTimeBucketSize; + private final long _postGapfillTimeBucketSize; + private final int _numOfTimeBuckets; + private final List<Integer> _groupByKeyIndexes; + private final Set<Key> _groupByKeys; + private final Map<Key, Object[]> _previousByGroupKey; + private final Map<String, ExpressionContext> _fillExpressions; + private final List<ExpressionContext> _timeSeries; + private final GapfillUtils.GapfillType _gapfillType; + private int _limitForGapfilledResult; + private boolean[] _isGroupBySelections; + private final int _timeBucketColumnIndex; + private int[] _sourceColumnIndexForResultSchema = null; + private final int _aggregationSize; + + GapfillProcessor(QueryContext queryContext, GapfillUtils.GapfillType gapfillType) { + _queryContext = queryContext; + _gapfillType = gapfillType; + _limitForAggregatedResult = queryContext.getLimit(); + if (_gapfillType == GapfillUtils.GapfillType.AGGREGATE_GAP_FILL + || _gapfillType == GapfillUtils.GapfillType.GAP_FILL) { + _limitForGapfilledResult = queryContext.getLimit(); + } else { + _limitForGapfilledResult = queryContext.getSubquery().getLimit(); + } + + ExpressionContext gapFillSelection = GapfillUtils.getGapfillExpressionContext(queryContext, _gapfillType); + _timeBucketColumnIndex = GapfillUtils.findTimeBucketColumnIndex(queryContext, _gapfillType); + + List<ExpressionContext> args = gapFillSelection.getFunction().getArguments(); + + _dateTimeFormatter = new DateTimeFormatSpec(args.get(1).getLiteral()); + _gapfillDateTimeGranularity = new DateTimeGranularitySpec(args.get(4).getLiteral()); + _postGapfillDateTimeGranularity = new DateTimeGranularitySpec(args.get(5).getLiteral()); + String start = args.get(2).getLiteral(); + _startMs = truncate(_dateTimeFormatter.fromFormatToMillis(start)); + String end = args.get(3).getLiteral(); + _endMs = truncate(_dateTimeFormatter.fromFormatToMillis(end)); + _gapfillTimeBucketSize = _gapfillDateTimeGranularity.granularityToMillis(); + _postGapfillTimeBucketSize = _postGapfillDateTimeGranularity.granularityToMillis(); + _numOfTimeBuckets = (int) ((_endMs - _startMs) / _gapfillTimeBucketSize); + + _fillExpressions = GapfillUtils.getFillExpressions(gapFillSelection); + + _aggregationSize = (int) (_postGapfillTimeBucketSize / _gapfillTimeBucketSize); + + _previousByGroupKey = new HashMap<>(); + _groupByKeyIndexes = new ArrayList<>(); + _groupByKeys = new HashSet<>(); + + ExpressionContext timeseriesOn = GapfillUtils.getTimeSeriesOnExpressionContext(gapFillSelection); + _timeSeries = timeseriesOn.getFunction().getArguments(); + } + + private int findGapfillBucketIndex(long time) { + return (int) ((time - _startMs) / _gapfillTimeBucketSize); + } + + private void replaceColumnNameWithAlias(DataSchema dataSchema) { + QueryContext queryContext; + if (_gapfillType == GapfillUtils.GapfillType.AGGREGATE_GAP_FILL_AGGREGATE) { + queryContext = _queryContext.getSubquery().getSubquery(); + } else if (_gapfillType == GapfillUtils.GapfillType.GAP_FILL) { + queryContext = _queryContext; + } else { + queryContext = _queryContext.getSubquery(); + } + List<String> aliasList = queryContext.getAliasList(); + Map<String, String> columnNameToAliasMap = new HashMap<>(); + for (int i = 0; i < aliasList.size(); i++) { + if (aliasList.get(i) != null) { + ExpressionContext selection = queryContext.getSelectExpressions().get(i); + if (GapfillUtils.isGapfill(selection)) { + selection = selection.getFunction().getArguments().get(0); + } + columnNameToAliasMap.put(selection.toString(), aliasList.get(i)); + } + } + for (int i = 0; i < dataSchema.getColumnNames().length; i++) { + if (columnNameToAliasMap.containsKey(dataSchema.getColumnNames()[i])) { + dataSchema.getColumnNames()[i] = columnNameToAliasMap.get(dataSchema.getColumnNames()[i]); + } + } + } + + /** + * Here are three things that happen + * 1. Sort the result sets from all pinot servers based on timestamp + * 2. Gapfill the data for missing entities per time bucket + * 3. Aggregate the dataset per time bucket. + */ + public void process(BrokerResponseNative brokerResponseNative) { + DataSchema dataSchema = brokerResponseNative.getResultTable().getDataSchema(); + DataSchema resultTableSchema = getResultTableDataSchema(dataSchema); + if (brokerResponseNative.getResultTable().getRows().isEmpty()) { + brokerResponseNative.setResultTable(new ResultTable(resultTableSchema, Collections.emptyList())); + return; + } + + String[] columns = dataSchema.getColumnNames(); + + Map<String, Integer> indexes = new HashMap<>(); + for (int i = 0; i < columns.length; i++) { + indexes.put(columns[i], i); + } + + _isGroupBySelections = new boolean[dataSchema.getColumnDataTypes().length]; + + // The first one argument of timeSeries is time column. The left ones are defining entity. + for (ExpressionContext entityColum : _timeSeries) { + int index = indexes.get(entityColum.getIdentifier()); + _isGroupBySelections[index] = true; + } + + for (int i = 0; i < _isGroupBySelections.length; i++) { + if (_isGroupBySelections[i]) { + _groupByKeyIndexes.add(i); + } + } + + List<Object[]>[] timeBucketedRawRows = putRawRowsIntoTimeBucket(brokerResponseNative.getResultTable().getRows()); + + replaceColumnNameWithAlias(dataSchema); + + if (_queryContext.getAggregationFunctions() == null) { + + Map<String, Integer> sourceColumnsIndexes = new HashMap<>(); + for (int i = 0; i < dataSchema.getColumnNames().length; i++) { + sourceColumnsIndexes.put(dataSchema.getColumnName(i), i); + } + _sourceColumnIndexForResultSchema = new int[resultTableSchema.getColumnNames().length]; + for (int i = 0; i < _sourceColumnIndexForResultSchema.length; i++) { + _sourceColumnIndexForResultSchema[i] = sourceColumnsIndexes.get(resultTableSchema.getColumnName(i)); + } + } + + List<Object[]> resultRows = gapFillAndAggregate(timeBucketedRawRows, resultTableSchema, dataSchema); + brokerResponseNative.setResultTable(new ResultTable(resultTableSchema, resultRows)); + } + + /** + * Constructs the DataSchema for the ResultTable. + */ + private DataSchema getResultTableDataSchema(DataSchema dataSchema) { + if (_gapfillType == GapfillUtils.GapfillType.GAP_FILL) { + return dataSchema; + } + + int numOfColumns = _queryContext.getSelectExpressions().size(); + String[] columnNames = new String[numOfColumns]; + ColumnDataType[] columnDataTypes = new ColumnDataType[numOfColumns]; + for (int i = 0; i < numOfColumns; i++) { + ExpressionContext expressionContext = _queryContext.getSelectExpressions().get(i); + if (GapfillUtils.isGapfill(expressionContext)) { + expressionContext = expressionContext.getFunction().getArguments().get(0); + } + if (expressionContext.getType() != ExpressionContext.Type.FUNCTION) { + columnNames[i] = expressionContext.getIdentifier(); + columnDataTypes[i] = ColumnDataType.STRING; + } else { + FunctionContext functionContext = expressionContext.getFunction(); + AggregationFunction aggregationFunction = + AggregationFunctionFactory.getAggregationFunction(functionContext, _queryContext); + columnDataTypes[i] = aggregationFunction.getFinalResultColumnType(); + columnNames[i] = functionContext.toString(); + } + } + return new DataSchema(columnNames, columnDataTypes); + } + + private Key constructGroupKeys(Object[] row) { + Object[] groupKeys = new Object[_groupByKeyIndexes.size()]; + for (int i = 0; i < _groupByKeyIndexes.size(); i++) { + groupKeys[i] = row[_groupByKeyIndexes.get(i)]; + } + return new Key(groupKeys); + } + + private long truncate(long epoch) { + int sz = _gapfillDateTimeGranularity.getSize(); + return epoch / sz * sz; + } + + private List<Object[]> gapFillAndAggregate(List<Object[]>[] timeBucketedRawRows, + DataSchema dataSchemaForAggregatedResult, DataSchema dataSchema) { + List<Object[]> result = new ArrayList<>(); + + GapfillFilterHandler postGapfillFilterHandler = null; + if (_queryContext.getSubquery() != null && _queryContext.getFilter() != null) { + postGapfillFilterHandler = new GapfillFilterHandler(_queryContext.getFilter(), dataSchema); + } + GapfillFilterHandler postAggregateHavingFilterHandler = null; + if (_queryContext.getHavingFilter() != null) { + postAggregateHavingFilterHandler = + new GapfillFilterHandler(_queryContext.getHavingFilter(), dataSchemaForAggregatedResult); + } + long start = _startMs; + ColumnDataType[] resultColumnDataTypes = dataSchema.getColumnDataTypes(); + List<Object[]> bucketedResult = new ArrayList<>(); + for (long time = _startMs; time < _endMs; time += _gapfillTimeBucketSize) { + int index = findGapfillBucketIndex(time); + gapfill(time, bucketedResult, timeBucketedRawRows[index], dataSchema, postGapfillFilterHandler); + if (_queryContext.getAggregationFunctions() == null) { + for (Object [] row : bucketedResult) { + Object[] resultRow = new Object[_sourceColumnIndexForResultSchema.length]; + for (int i = 0; i < _sourceColumnIndexForResultSchema.length; i++) { + resultRow[i] = row[_sourceColumnIndexForResultSchema[i]]; + } + result.add(resultRow); + } + bucketedResult = new ArrayList<>(); + } else if (index % _aggregationSize == _aggregationSize - 1 && bucketedResult.size() > 0) { + Object timeCol; + if (resultColumnDataTypes[_timeBucketColumnIndex] == ColumnDataType.LONG) { + timeCol = Long.valueOf(_dateTimeFormatter.fromMillisToFormat(start)); + } else { + timeCol = _dateTimeFormatter.fromMillisToFormat(start); + } + List<Object[]> aggregatedRows = aggregateGapfilledData(timeCol, bucketedResult, dataSchema); + for (Object[] aggregatedRow : aggregatedRows) { + if (postAggregateHavingFilterHandler == null || postAggregateHavingFilterHandler.isMatch(aggregatedRow)) { + result.add(aggregatedRow); + } + if (result.size() >= _limitForAggregatedResult) { + return result; + } + } + bucketedResult = new ArrayList<>(); Review comment: ```suggestion bucketedResult.clear(); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
