jihaozh commented on a change in pull request #3830: [TE] Presto Connector backend and front end URL: https://github.com/apache/incubator-pinot/pull/3830#discussion_r256602090
########## File path: thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/datasource/presto/SqlUtils.java ########## @@ -0,0 +1,386 @@ +package org.apache.pinot.thirdeye.datasource.presto; + +import com.google.common.base.Joiner; +import com.google.common.base.Predicate; +import com.google.common.collect.Collections2; +import com.google.common.collect.Multimap; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import javax.annotation.Nullable; +import org.apache.commons.lang3.StringUtils; +import org.apache.pinot.thirdeye.common.time.TimeGranularity; +import org.apache.pinot.thirdeye.common.time.TimeSpec; +import org.apache.pinot.thirdeye.constant.MetricAggFunction; +import org.apache.pinot.thirdeye.datalayer.dto.MetricConfigDTO; +import org.apache.pinot.thirdeye.datasource.MetricFunction; +import org.apache.pinot.thirdeye.datasource.ThirdEyeRequest; +import org.apache.pinot.thirdeye.datasource.pinot.PqlUtils; +import org.apache.pinot.thirdeye.util.ThirdEyeUtils; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class SqlUtils { + + private static final Joiner AND = Joiner.on(" AND "); + private static final Joiner COMMA = Joiner.on(", "); + + private static final String PREFIX_NOT_EQUALS = "!"; + private static final String PREFIX_LESS_THAN = "<"; + private static final String PREFIX_LESS_THAN_EQUALS = "<="; + private static final String PREFIX_GREATER_THAN = ">"; + private static final String PREFIX_GREATER_THAN_EQUALS = ">="; + + private static final String OPERATOR_EQUALS = "IN"; + private static final String OPERATOR_NOT_EQUALS = "NOT IN"; + private static final String OPERATOR_LESS_THAN = "<"; + private static final String OPERATOR_LESS_THAN_EQUALS = "<="; + private static final String OPERATOR_GREATER_THAN = ">"; + private static final String OPERATOR_GREATER_THAN_EQUALS = ">="; + + private static final Logger LOGGER = LoggerFactory.getLogger(PqlUtils.class); + private static final int DEFAULT_LIMIT = 100000; + private static final String PERCENTILE_TDIGEST_PREFIX = "percentileTDigest"; + + + /** + * Returns sql to calculate the sum of all raw metrics required for <tt>request</tt>, grouped by + * time within the requested date range. </br> + * Due to the summation, all metric column values can be assumed to be doubles. + * @throws ExecutionException + */ + public static String getSql(ThirdEyeRequest request, MetricFunction metricFunction, + Multimap<String, String> filterSet, TimeSpec dataTimeSpec) { + return getSql(metricFunction, request.getStartTimeInclusive(), request.getEndTimeExclusive(), filterSet, + request.getGroupBy(), request.getGroupByTimeGranularity(), dataTimeSpec, request.getLimit()); + } + + + private static String getSql(MetricFunction metricFunction, DateTime startTime, + DateTime endTimeExclusive, Multimap<String, String> filterSet, List<String> groupBy, + TimeGranularity timeGranularity, TimeSpec dataTimeSpec, int limit) { + + MetricConfigDTO metricConfig = ThirdEyeUtils.getMetricConfigFromId(metricFunction.getMetricId()); + String dataset = metricFunction.getDataset(); + + StringBuilder sb = new StringBuilder(); + + + String selectionClause = getSelectionClause(metricConfig, metricFunction, groupBy, timeGranularity, dataTimeSpec); + + String tableName = ThirdEyeUtils.computePrestoTableName(dataset); + + sb.append("SELECT ").append(selectionClause).append(" FROM ").append(tableName); + String betweenClause = getBetweenClause(startTime, endTimeExclusive, dataTimeSpec, dataset); + String datePartitionClause = getDatePartitionClause(startTime); + sb.append(" WHERE ") + .append(datePartitionClause) + .append(" AND ") + .append(betweenClause); + + String dimensionWhereClause = getDimensionWhereClause(filterSet); + if (StringUtils.isNotBlank(dimensionWhereClause)) { + sb.append(" AND ").append(dimensionWhereClause); + } + + if (limit <= 0) { + limit = DEFAULT_LIMIT; + } + + String groupByClause = getDimensionGroupByClause(groupBy, timeGranularity, dataTimeSpec); + if (StringUtils.isNotBlank(groupByClause)) { + sb.append(" ").append(groupByClause); + sb.append(" LIMIT " + limit); + } + + return sb.toString(); + } + + public static String getMaxDataTimeSQL(String timeColumn, String tableName) { + return "SELECT MAX(" + timeColumn + ") FROM " + tableName + " WHERE datepartition >= daysago(1)"; + } + + public static String getDimensionFiltersSQL(String dimension, String tableName) { + return "SELECT DISTINCT(" + dimension + ") FROM " + tableName + " WHERE datepartition >= daysago(1)"; + } + + private static String getSelectionClause(MetricConfigDTO metricConfig, MetricFunction metricFunction, List<String> groupByKeys, TimeGranularity granularity, TimeSpec dateTimeSpec) { + StringBuilder builder = new StringBuilder(); + + if (granularity != null) { + String timeFormat = dateTimeSpec.getFormat(); + // Epoch case + if (timeFormat == null || TimeSpec.SINCE_EPOCH_FORMAT.equals(timeFormat)) { + builder.append(dateTimeSpec.getColumnName()).append(", "); + } else { //timeFormat case + builder.append(dateTimeSpec.getColumnName()).append(", "); + } + } + + for (String groupByKey: groupByKeys) { + builder.append(groupByKey).append(", "); + } + + String metricName = null; + if (metricFunction.getMetricName().equals("*")) { + metricName = "*"; + } else { + metricName = metricConfig.getName(); + } + builder.append(convertAggFunction(metricFunction.getFunctionName())).append("(").append(metricName).append(")"); + + return builder.toString(); + } + + static String getBetweenClause(DateTime start, DateTime endExclusive, TimeSpec timeFieldSpec, String dataset) { + TimeGranularity dataGranularity = timeFieldSpec.getDataGranularity(); + long dataGranularityMillis = dataGranularity.toMillis(); + + String timeField = timeFieldSpec.getColumnName(); + String timeFormat = timeFieldSpec.getFormat(); + + // epoch case + if (timeFormat == null || TimeSpec.SINCE_EPOCH_FORMAT.equals(timeFormat)) { + long startUnits = (long) Math.ceil(start.getMillis() / (double) dataGranularityMillis); + long endUnits = (long) Math.ceil(endExclusive.getMillis() / (double) dataGranularityMillis); + + // point query Review comment: not a 'point query' ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@pinot.apache.org For additional commands, e-mail: dev-h...@pinot.apache.org