Github user amansinha100 commented on a diff in the pull request:
https://github.com/apache/drill/pull/637#discussion_r86566263
--- Diff:
exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
---
@@ -0,0 +1,282 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.stat;
+
+import com.google.common.base.Preconditions;
+import org.apache.drill.common.exceptions.DrillRuntimeException;
+import org.apache.drill.common.expression.FunctionHolderExpression;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.common.expression.fn.CastFunctions;
+import org.apache.drill.common.expression.fn.FuncHolder;
+import org.apache.drill.common.expression.visitors.AbstractExprVisitor;
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.Types;
+import org.apache.drill.exec.expr.DrillSimpleFunc;
+import org.apache.drill.exec.expr.fn.DrillSimpleFuncHolder;
+import org.apache.drill.exec.expr.fn.interpreter.InterpreterEvaluator;
+import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.Float4Holder;
+import org.apache.drill.exec.expr.holders.Float8Holder;
+import org.apache.drill.exec.expr.holders.IntHolder;
+import org.apache.drill.exec.expr.holders.ValueHolder;
+import org.apache.drill.exec.store.parquet.stat.ColumnStatistics;
+import org.apache.drill.exec.vector.ValueHolderHelper;
+import org.apache.parquet.column.statistics.DoubleStatistics;
+import org.apache.parquet.column.statistics.FloatStatistics;
+import org.apache.parquet.column.statistics.IntStatistics;
+import org.apache.parquet.column.statistics.LongStatistics;
+import org.apache.parquet.column.statistics.Statistics;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+public class RangeExprEvaluator extends AbstractExprVisitor<Statistics,
Void, RuntimeException> {
+ static final Logger logger =
LoggerFactory.getLogger(RangeExprEvaluator.class);
+
+ private final Map<SchemaPath, ColumnStatistics> columnStatMap;
+ private final long rowCount;
+
+ public RangeExprEvaluator(final Map<SchemaPath, ColumnStatistics>
columnStatMap, long rowCount) {
+ this.columnStatMap = columnStatMap;
+ this.rowCount = rowCount;
+ }
+
+ public long getRowCount() {
+ return this.rowCount;
+ }
+
+ @Override
+ public Statistics visitUnknown(LogicalExpression e, Void value) throws
RuntimeException {
+ if (e instanceof TypedFieldExpr) {
+ TypedFieldExpr fieldExpr = (TypedFieldExpr) e;
+ final ColumnStatistics columnStatistics =
columnStatMap.get(fieldExpr.getPath());
+ if (columnStatistics != null) {
+ return columnStatistics.getStatistics();
+ } else {
+ // field does not exist.
+
Preconditions.checkArgument(fieldExpr.getMajorType().equals(Types.OPTIONAL_INT));
+ IntStatistics intStatistics = new IntStatistics();
+ intStatistics.setNumNulls(rowCount); // all values are nulls
+ return intStatistics;
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public Statistics visitIntConstant(ValueExpressions.IntExpression expr,
Void value) throws RuntimeException {
+ return getStatistics(expr.getInt());
+ }
+
+ @Override
+ public Statistics visitLongConstant(ValueExpressions.LongExpression
expr, Void value) throws RuntimeException {
+ return getStatistics(expr.getLong());
+ }
+
+ @Override
+ public Statistics visitFloatConstant(ValueExpressions.FloatExpression
expr, Void value) throws RuntimeException {
+ return getStatistics(expr.getFloat());
+ }
+
+ @Override
+ public Statistics visitDoubleConstant(ValueExpressions.DoubleExpression
expr, Void value) throws RuntimeException {
+ return getStatistics(expr.getDouble());
+ }
+
+ @Override
+ public Statistics visitDateConstant(ValueExpressions.DateExpression
expr, Void value) throws RuntimeException {
+ long dateInMillis = expr.getDate();
+ return getStatistics(dateInMillis);
+ }
+
+ @Override
+ public Statistics
visitTimeStampConstant(ValueExpressions.TimeStampExpression tsExpr, Void value)
throws RuntimeException {
+ long tsInMillis = tsExpr.getTimeStamp();
+ return getStatistics(tsInMillis);
+ }
+
+ @Override
+ public Statistics visitTimeConstant(ValueExpressions.TimeExpression
timeExpr, Void value) throws RuntimeException {
+ int milliSeconds = timeExpr.getTime();
+ return getStatistics(milliSeconds);
+ }
+
+ @Override
+ public Statistics visitFunctionHolderExpression(FunctionHolderExpression
holderExpr, Void value) throws RuntimeException {
+ FuncHolder funcHolder = holderExpr.getHolder();
+
+ if (! (funcHolder instanceof DrillSimpleFuncHolder)) {
+ // Only Drill function is allowed.
+ return null;
+ }
+
+ final String funcName = ((DrillSimpleFuncHolder)
funcHolder).getRegisteredNames()[0];
+
+ if (CastFunctions.isCastFunction(funcName)) {
+ Statistics stat = holderExpr.args.get(0).accept(this, null);
+ if (stat != null && ! stat.isEmpty()) {
+ return evalCastFunc(holderExpr, stat);
+ }
+ }
+ return null;
+ }
+
+ private IntStatistics getStatistics(int value) {
+ return getStatistics(value, value);
+ }
+
+ private IntStatistics getStatistics(int min, int max) {
+ final IntStatistics intStatistics = new IntStatistics();
+ intStatistics.setMinMax(min, max);
+ return intStatistics;
+ }
+
+ private LongStatistics getStatistics(long value) {
+ return getStatistics(value, value);
+ }
+
+ private LongStatistics getStatistics(long min, long max) {
+ final LongStatistics longStatistics = new LongStatistics();
+ longStatistics.setMinMax(min, max);
+ return longStatistics;
+ }
+
+ private DoubleStatistics getStatistics(double value) {
+ return getStatistics(value, value);
+ }
+
+ private DoubleStatistics getStatistics(double min, double max) {
+ final DoubleStatistics doubleStatistics = new DoubleStatistics();
+ doubleStatistics.setMinMax(min, max);
+ return doubleStatistics;
+ }
+
+ private FloatStatistics getStatistics(float value) {
+ return getStatistics(value, value);
+ }
+
+ private FloatStatistics getStatistics(float min, float max) {
+ final FloatStatistics floatStatistics = new FloatStatistics();
+ floatStatistics.setMinMax(min, max);
+ return floatStatistics;
+ }
+
+// private int convertDrillDateValue(long dateInMillis) {
+// // Specific for date column created by Drill CTAS prior fix for
DRILL-4203.
+// // Apply the same shift as in ParquetOutputRecordWriter.java for
data value.
+// final int intValue = (int)
(DateTimeUtils.toJulianDayNumber(dateInMillis) + JULIAN_DAY_EPOC);
+// return intValue;
+// }
+
+ private Statistics evalCastFunc(FunctionHolderExpression holderExpr,
Statistics input) {
+ try {
+ DrillSimpleFuncHolder funcHolder = (DrillSimpleFuncHolder)
holderExpr.getHolder();
+
+ DrillSimpleFunc interpreter = funcHolder.createInterpreter();
+
+ final ValueHolder minHolder, maxHolder;
+
+ TypeProtos.MinorType srcType =
holderExpr.args.get(0).getMajorType().getMinorType();
+ TypeProtos.MinorType destType =
holderExpr.getMajorType().getMinorType();
+
+ if (srcType.equals(destType)) {
+ // same type cast ==> NoOp.
+ return input;
+ } else if (!CAST_FUNC.containsKey(srcType) ||
!CAST_FUNC.get(srcType).contains(destType)) {
+ return null; // cast func between srcType and destType is NOT
allowed.
+ }
+
+ switch (srcType) {
+ case INT :
+ minHolder =
ValueHolderHelper.getIntHolder(((IntStatistics)input).getMin());
+ maxHolder =
ValueHolderHelper.getIntHolder(((IntStatistics)input).getMax());
+ break;
+ case BIGINT:
+ minHolder =
ValueHolderHelper.getBigIntHolder(((LongStatistics)input).getMin());
+ maxHolder =
ValueHolderHelper.getBigIntHolder(((LongStatistics)input).getMax());
+ break;
+ case FLOAT4:
+ minHolder =
ValueHolderHelper.getFloat4Holder(((FloatStatistics)input).getMin());
+ maxHolder =
ValueHolderHelper.getFloat4Holder(((FloatStatistics)input).getMax());
+ break;
+ case FLOAT8:
+ minHolder =
ValueHolderHelper.getFloat8Holder(((DoubleStatistics)input).getMin());
+ maxHolder =
ValueHolderHelper.getFloat8Holder(((DoubleStatistics)input).getMax());
+ break;
+ default:
+ return null;
+ }
+
+ final ValueHolder[] args1 = {minHolder};
+ final ValueHolder[] args2 = {maxHolder};
+
+ final ValueHolder minFuncHolder =
InterpreterEvaluator.evaluateFunction(interpreter, args1, holderExpr.getName());
+ final ValueHolder maxFuncHolder =
InterpreterEvaluator.evaluateFunction(interpreter, args2, holderExpr.getName());
+
+ switch (destType) {
+ //TODO : need handle # of nulls.
+ case INT:
+ return getStatistics( ((IntHolder)minFuncHolder).value,
((IntHolder)maxFuncHolder).value);
+ case BIGINT:
+ return getStatistics( ((BigIntHolder)minFuncHolder).value,
((BigIntHolder)maxFuncHolder).value);
+ case FLOAT4:
+ return getStatistics( ((Float4Holder)minFuncHolder).value,
((Float4Holder)maxFuncHolder).value);
+ case FLOAT8:
+ return getStatistics( ((Float8Holder)minFuncHolder).value,
((Float8Holder)maxFuncHolder).value);
+ default:
+ return null;
+ }
+ } catch (Exception e) {
+ throw new DrillRuntimeException("Error in evaluating function of " +
holderExpr.getName() );
+ }
+ }
+
+ static Map<TypeProtos.MinorType, Set<TypeProtos.MinorType>> CAST_FUNC =
new HashMap<>();
--- End diff --
Right, the date/timestamp cast would be quite useful for row-group pruning,
but its ok to restrict for now. Will we document the types of casts that will
be supported for such pruning ?
Also, I just noticed that this is populating a static HashMap. This won't
be thread-safe.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---