Repository: hive Updated Branches: refs/heads/branch-1.2 43b1b3d7a -> 2f5bf19a0
HIVE-10286: SARGs: Type Safety via PredicateLeaf.type (Prasanth Jayachandran reviewed by Gopal V) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2f5bf19a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2f5bf19a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2f5bf19a Branch: refs/heads/branch-1.2 Commit: 2f5bf19a0458d2f0194cfcbb82d923fdee1c20f3 Parents: 43b1b3d Author: Prasanth Jayachandran <j.prasant...@gmail.com> Authored: Fri May 1 12:06:36 2015 -0700 Committer: Prasanth Jayachandran <j.prasant...@gmail.com> Committed: Fri May 1 12:06:36 2015 -0700 ---------------------------------------------------------------------- .../hive/ql/io/orc/ColumnStatisticsImpl.java | 17 +- .../hive/ql/io/orc/DateColumnStatistics.java | 6 +- .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 222 +++++++++++-------- .../hive/ql/io/sarg/SearchArgumentImpl.java | 85 ++----- .../hive/ql/io/orc/TestColumnStatistics.java | 20 +- .../hive/ql/io/orc/TestRecordReaderImpl.java | 170 ++++++++++++-- .../hive/ql/io/sarg/TestSearchArgumentImpl.java | 104 +++------ .../hadoop/hive/ql/io/sarg/PredicateLeaf.java | 19 +- 8 files changed, 367 insertions(+), 276 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/2f5bf19a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java index 9c2c9c2..7cfbd81 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java @@ -17,6 +17,9 @@ */ package org.apache.hadoop.hive.ql.io.orc; +import java.sql.Date; +import java.sql.Timestamp; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -24,8 +27,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; -import java.sql.Timestamp; - class ColumnStatisticsImpl implements ColumnStatistics { private static final class BooleanStatisticsImpl extends ColumnStatisticsImpl @@ -697,15 +698,15 @@ class ColumnStatisticsImpl implements ColumnStatistics { private transient final DateWritable maxDate = new DateWritable(); @Override - public DateWritable getMinimum() { + public Date getMinimum() { minDate.set(minimum); - return minDate; + return minDate.get(); } @Override - public DateWritable getMaximum() { + public Date getMaximum() { maxDate.set(maximum); - return maxDate; + return maxDate.get(); } @Override @@ -713,9 +714,9 @@ class ColumnStatisticsImpl implements ColumnStatistics { StringBuilder buf = new StringBuilder(super.toString()); if (getNumberOfValues() != 0) { buf.append(" min: "); - buf.append(minimum); + buf.append(getMinimum()); buf.append(" max: "); - buf.append(maximum); + buf.append(getMaximum()); } return buf.toString(); } http://git-wip-us.apache.org/repos/asf/hive/blob/2f5bf19a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java index 03cdeef..ae3fe31 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/DateColumnStatistics.java @@ -17,7 +17,7 @@ */ package org.apache.hadoop.hive.ql.io.orc; -import org.apache.hadoop.hive.serde2.io.DateWritable; +import java.util.Date; /** * Statistics for DATE columns. @@ -27,11 +27,11 @@ public interface DateColumnStatistics extends ColumnStatistics { * Get the minimum value for the column. * @return minimum value */ - DateWritable getMinimum(); + Date getMinimum(); /** * Get the maximum value for the column. * @return maximum value */ - DateWritable getMaximum(); + Date getMaximum(); } http://git-wip-us.apache.org/repos/asf/hive/blob/2f5bf19a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 9e7ac4b..a5a5943 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -30,7 +30,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -60,7 +59,6 @@ import org.apache.hadoop.io.Text; class RecordReaderImpl implements RecordReader { static final Log LOG = LogFactory.getLog(RecordReaderImpl.class); - private static final boolean isLogTraceEnabled = LOG.isTraceEnabled(); private static final boolean isLogDebugEnabled = LOG.isDebugEnabled(); private final Path path; @@ -280,9 +278,9 @@ class RecordReaderImpl implements RecordReader { return ((TimestampColumnStatistics) index).getMaximum(); } else if (index instanceof BooleanColumnStatistics) { if (((BooleanColumnStatistics)index).getTrueCount()!=0) { - return "true"; + return Boolean.TRUE; } else { - return "false"; + return Boolean.FALSE; } } else { return null; @@ -310,9 +308,9 @@ class RecordReaderImpl implements RecordReader { return ((TimestampColumnStatistics) index).getMinimum(); } else if (index instanceof BooleanColumnStatistics) { if (((BooleanColumnStatistics)index).getFalseCount()!=0) { - return "false"; + return Boolean.FALSE; } else { - return "true"; + return Boolean.TRUE; } } else { return null; @@ -367,18 +365,12 @@ class RecordReaderImpl implements RecordReader { } TruthValue result; - // Predicate object and stats object can be one of the following base types - // LONG, DOUBLE, STRING, DATE, DECIMAL - // Out of these DATE is not implicitly convertible to other types and rest - // others are implicitly convertible. In cases where DATE cannot be converted - // the stats object is converted to text and comparison is performed. - // When STRINGs are converted to other base types, NumberFormat exception - // can occur in which case TruthValue.YES_NO_NULL value is returned try { - Object baseObj = predicate.getLiteral(PredicateLeaf.FileFormat.ORC); - Object minValue = getConvertedStatsObj(min, baseObj); - Object maxValue = getConvertedStatsObj(max, baseObj); - Object predObj = getBaseObjectForComparison(baseObj, minValue); + // Predicate object and stats objects are converted to the type of the predicate object. + Object baseObj = predicate.getLiteral(); + Object minValue = getBaseObjectForComparison(predicate.getType(), min); + Object maxValue = getBaseObjectForComparison(predicate.getType(), max); + Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj); result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull); if (bloomFilter != null && result != TruthValue.NO_NULL && result != TruthValue.NO) { @@ -390,7 +382,11 @@ class RecordReaderImpl implements RecordReader { LOG.warn("Exception when evaluating predicate. Skipping ORC PPD." + " Exception: " + ExceptionUtils.getStackTrace(e)); } - result = hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO; + if (predicate.getOperator().equals(PredicateLeaf.Operator.NULL_SAFE_EQUALS) || !hasNull) { + result = TruthValue.YES_NO; + } else { + result = TruthValue.YES_NO_NULL; + } } return result; } @@ -440,8 +436,8 @@ class RecordReaderImpl implements RecordReader { if (minValue.equals(maxValue)) { // for a single value, look through to see if that value is in the // set - for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) { - predObj = getBaseObjectForComparison(arg, minValue); + for (Object arg : predicate.getLiteralList()) { + predObj = getBaseObjectForComparison(predicate.getType(), arg); loc = compareToRange((Comparable) predObj, minValue, maxValue); if (loc == Location.MIN) { return hasNull ? TruthValue.YES_NULL : TruthValue.YES; @@ -450,8 +446,8 @@ class RecordReaderImpl implements RecordReader { return hasNull ? TruthValue.NO_NULL : TruthValue.NO; } else { // are all of the values outside of the range? - for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) { - predObj = getBaseObjectForComparison(arg, minValue); + for (Object arg : predicate.getLiteralList()) { + predObj = getBaseObjectForComparison(predicate.getType(), arg); loc = compareToRange((Comparable) predObj, minValue, maxValue); if (loc == Location.MIN || loc == Location.MIDDLE || loc == Location.MAX) { @@ -461,12 +457,12 @@ class RecordReaderImpl implements RecordReader { return hasNull ? TruthValue.NO_NULL : TruthValue.NO; } case BETWEEN: - List<Object> args = predicate.getLiteralList(PredicateLeaf.FileFormat.ORC); - Object predObj1 = getBaseObjectForComparison(args.get(0), minValue); + List<Object> args = predicate.getLiteralList(); + Object predObj1 = getBaseObjectForComparison(predicate.getType(), args.get(0)); loc = compareToRange((Comparable) predObj1, minValue, maxValue); if (loc == Location.BEFORE || loc == Location.MIN) { - Object predObj2 = getBaseObjectForComparison(args.get(1), minValue); + Object predObj2 = getBaseObjectForComparison(predicate.getType(), args.get(1)); Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue); if (loc2 == Location.AFTER || loc2 == Location.MAX) { @@ -489,8 +485,8 @@ class RecordReaderImpl implements RecordReader { } } - private static TruthValue evaluatePredicateBloomFilter(PredicateLeaf predicate, Object predObj, - BloomFilterIO bloomFilter, boolean hasNull) { + private static TruthValue evaluatePredicateBloomFilter(PredicateLeaf predicate, + final Object predObj, BloomFilterIO bloomFilter, boolean hasNull) { switch (predicate.getOperator()) { case NULL_SAFE_EQUALS: // null safe equals does not return *_NULL variant. So set hasNull to false @@ -498,9 +494,10 @@ class RecordReaderImpl implements RecordReader { case EQUALS: return checkInBloomFilter(bloomFilter, predObj, hasNull); case IN: - for (Object arg : predicate.getLiteralList(PredicateLeaf.FileFormat.ORC)) { + for (Object arg : predicate.getLiteralList()) { // if atleast one value in IN list exist in bloom filter, qualify the row group/stripe - TruthValue result = checkInBloomFilter(bloomFilter, arg, hasNull); + Object predObjItem = getBaseObjectForComparison(predicate.getType(), arg); + TruthValue result = checkInBloomFilter(bloomFilter, predObjItem, hasNull); if (result == TruthValue.YES_NO_NULL || result == TruthValue.YES_NO) { return result; } @@ -527,14 +524,6 @@ class RecordReaderImpl implements RecordReader { if (bf.testString(predObj.toString())) { result = TruthValue.YES_NO_NULL; } - } else if (predObj instanceof Date) { - if (bf.testLong(DateWritable.dateToDays((Date) predObj))) { - result = TruthValue.YES_NO_NULL; - } - } else if (predObj instanceof DateWritable) { - if (bf.testLong(((DateWritable) predObj).getDays())) { - result = TruthValue.YES_NO_NULL; - } } else if (predObj instanceof Timestamp) { if (bf.testLong(((Timestamp) predObj).getTime())) { result = TruthValue.YES_NO_NULL; @@ -543,14 +532,18 @@ class RecordReaderImpl implements RecordReader { if (bf.testLong(((TimestampWritable) predObj).getTimestamp().getTime())) { result = TruthValue.YES_NO_NULL; } - } else { - // if the predicate object is null and if hasNull says there are no nulls then return NO - if (predObj == null && !hasNull) { - result = TruthValue.NO; - } else { + } else if (predObj instanceof Date) { + if (bf.testLong(DateWritable.dateToDays((Date) predObj))) { result = TruthValue.YES_NO_NULL; } - } + } else { + // if the predicate object is null and if hasNull says there are no nulls then return NO + if (predObj == null && !hasNull) { + result = TruthValue.NO; + } else { + result = TruthValue.YES_NO_NULL; + } + } if (result == TruthValue.YES_NO_NULL && !hasNull) { result = TruthValue.YES_NO; @@ -563,58 +556,109 @@ class RecordReaderImpl implements RecordReader { return result; } - private static Object getBaseObjectForComparison(Object predObj, Object statsObj) { - if (predObj != null) { - if (predObj instanceof ExprNodeConstantDesc) { - predObj = ((ExprNodeConstantDesc) predObj).getValue(); + private static Object getBaseObjectForComparison(PredicateLeaf.Type type, Object obj) { + if (obj != null) { + if (obj instanceof ExprNodeConstantDesc) { + obj = ((ExprNodeConstantDesc) obj).getValue(); } - // following are implicitly convertible - if (statsObj instanceof Long) { - if (predObj instanceof Double) { - return ((Double) predObj).longValue(); - } else if (predObj instanceof HiveDecimal) { - return ((HiveDecimal) predObj).longValue(); - } else if (predObj instanceof String) { - return Long.valueOf(predObj.toString()); + } else { + return null; + } + switch (type) { + case BOOLEAN: + if (obj instanceof Boolean) { + return obj; + } else { + // will only be true if the string conversion yields "true", all other values are + // considered false + return Boolean.valueOf(obj.toString()); } - } else if (statsObj instanceof Double) { - if (predObj instanceof Long) { - return ((Long) predObj).doubleValue(); - } else if (predObj instanceof HiveDecimal) { - return ((HiveDecimal) predObj).doubleValue(); - } else if (predObj instanceof String) { - return Double.valueOf(predObj.toString()); + case DATE: + if (obj instanceof Date) { + return obj; + } else if (obj instanceof String) { + return Date.valueOf((String) obj); + } else if (obj instanceof Timestamp) { + return DateWritable.timeToDate(((Timestamp) obj).getTime() / 1000L); } - } else if (statsObj instanceof String) { - return predObj.toString(); - } else if (statsObj instanceof HiveDecimal) { - if (predObj instanceof Long) { - return HiveDecimal.create(((Long) predObj)); - } else if (predObj instanceof Double) { - return HiveDecimal.create(predObj.toString()); - } else if (predObj instanceof String) { - return HiveDecimal.create(predObj.toString()); - } else if (predObj instanceof BigDecimal) { - return HiveDecimal.create((BigDecimal)predObj); + // always string, but prevent the comparison to numbers (are they days/seconds/milliseconds?) + break; + case DECIMAL: + if (obj instanceof Boolean) { + return ((Boolean) obj).booleanValue() ? HiveDecimal.ONE : HiveDecimal.ZERO; + } else if (obj instanceof Integer) { + return HiveDecimal.create(((Integer) obj).intValue()); + } else if (obj instanceof Long) { + return HiveDecimal.create(((Long) obj)); + } else if (obj instanceof Float || obj instanceof Double || + obj instanceof String) { + return HiveDecimal.create(obj.toString()); + } else if (obj instanceof BigDecimal) { + return HiveDecimal.create((BigDecimal) obj); + } else if (obj instanceof HiveDecimal) { + return obj; + } else if (obj instanceof Timestamp) { + return HiveDecimal.create( + new Double(new TimestampWritable((Timestamp) obj).getDouble()).toString()); } - } - } - return predObj; - } - - private static Object getConvertedStatsObj(Object statsObj, Object predObj) { - - // converting between date and other types is not implicit, so convert to - // text for comparison - if (((predObj instanceof DateWritable) && !(statsObj instanceof DateWritable)) - || ((statsObj instanceof DateWritable) && !(predObj instanceof DateWritable))) { - return StringUtils.stripEnd(statsObj.toString(), null); + break; + case FLOAT: + if (obj instanceof Number) { + // widening conversion + return ((Number) obj).doubleValue(); + } else if (obj instanceof HiveDecimal) { + return ((HiveDecimal) obj).doubleValue(); + } else if (obj instanceof String) { + return Double.valueOf(obj.toString()); + } else if (obj instanceof Timestamp) { + return new TimestampWritable((Timestamp)obj).getDouble(); + } else if (obj instanceof HiveDecimal) { + return ((HiveDecimal) obj).doubleValue(); + } else if (obj instanceof BigDecimal) { + return ((BigDecimal) obj).doubleValue(); + } + break; + case INTEGER: + // fall through + case LONG: + if (obj instanceof Number) { + // widening conversion + return ((Number) obj).longValue(); + } else if (obj instanceof HiveDecimal) { + return ((HiveDecimal) obj).longValue(); + } else if (obj instanceof String) { + return Long.valueOf(obj.toString()); + } + break; + case STRING: + if (obj != null) { + return (obj.toString()); + } + break; + case TIMESTAMP: + if (obj instanceof Timestamp) { + return obj; + } else if (obj instanceof Float) { + return TimestampWritable.doubleToTimestamp(((Float) obj).doubleValue()); + } else if (obj instanceof Double) { + return TimestampWritable.doubleToTimestamp(((Double) obj).doubleValue()); + } else if (obj instanceof HiveDecimal) { + return TimestampWritable.decimalToTimestamp((HiveDecimal) obj); + } else if (obj instanceof Date) { + return new Timestamp(((Date) obj).getTime()); + } + // float/double conversion to timestamp is interpreted as seconds whereas integer conversion + // to timestamp is interpreted as milliseconds by default. The integer to timestamp casting + // is also config driven. The filter operator changes its promotion based on config: + // "int.timestamp.conversion.in.seconds". Disable PPD for integer cases. + break; + default: + break; } - if (statsObj instanceof String) { - return StringUtils.stripEnd(statsObj.toString(), null); - } - return statsObj; + throw new IllegalArgumentException(String.format( + "ORC SARGS could not convert from %s to %s", obj == null ? "(null)" : obj.getClass() + .getSimpleName(), type)); } public static class SargApplier { http://git-wip-us.apache.org/repos/asf/hive/blob/2f5bf19a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index a451bfb..efe03ab 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -18,9 +18,15 @@ package org.apache.hadoop.hive.ql.io.sarg; -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Input; -import com.esotericsoftware.kryo.io.Output; +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Deque; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.StringUtils; @@ -54,15 +60,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import java.math.BigDecimal; -import java.sql.Timestamp; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Deque; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; import parquet.filter2.predicate.FilterApi; import parquet.filter2.predicate.FilterPredicate; @@ -116,55 +116,12 @@ final class SearchArgumentImpl implements SearchArgument { } @Override - public Object getLiteral(FileFormat format) { - // To get around a kryo 2.22 bug while deserialize a Timestamp into Date - // (https://github.com/EsotericSoftware/kryo/issues/88) - // When we see a Date, convert back into Timestamp - if (literal instanceof java.util.Date) { - return new Timestamp(((java.util.Date) literal).getTime()); - } - - switch (format) { - case ORC: - // adapt base type to what orc needs - if (literal instanceof Integer) { - return ((Number) literal).longValue(); - } - return literal; - case PARQUET: - return literal; - default: - throw new RuntimeException( - "File format " + format + "is not support to build search arguments"); - } + public Object getLiteral() { + return literal; } @Override - public List<Object> getLiteralList(FileFormat format) { - switch (format) { - case ORC: - return getOrcLiteralList(); - case PARQUET: - return getParquetLiteralList(); - default: - throw new RuntimeException("File format is not support to build search arguments"); - } - } - - private List<Object> getOrcLiteralList() { - // no need to cast - if (literalList == null || literalList.size() == 0 || !(literalList.get(0) instanceof - Integer)) { - return literalList; - } - List<Object> result = new ArrayList<Object>(literalList.size()); - for (Object o : literalList) { - result.add(((Number) o).longValue()); - } - return result; - } - - private List<Object> getParquetLiteralList() { + public List<Object> getLiteralList() { return literalList; } @@ -350,13 +307,17 @@ final class SearchArgumentImpl implements SearchArgument { try { builder = leafFilterFactory .getLeafFilterBuilderByType(leaf.getType()); - if (builder == null) return null; + if (builder == null) { + return null; + } if (isMultiLiteralsOperator(leaf.getOperator())) { - return builder.buildPredicate(leaf.getOperator(), leaf.getLiteralList( - PredicateLeaf.FileFormat.PARQUET), leaf.getColumnName()); + return builder.buildPredicate(leaf.getOperator(), + leaf.getLiteralList(), + leaf.getColumnName()); } else { return builder - .buildPredict(leaf.getOperator(), leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET), + .buildPredict(leaf.getOperator(), + leaf.getLiteral(), leaf.getColumnName()); } } catch (Exception e) { http://git-wip-us.apache.org/repos/asf/hive/blob/2f5bf19a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java index 5c7fe60..4d30377 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestColumnStatistics.java @@ -20,6 +20,12 @@ package org.apache.hadoop.hive.ql.io.orc; import static junit.framework.Assert.assertEquals; +import java.io.File; +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.sql.Timestamp; +import java.util.List; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -35,12 +41,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestName; -import java.io.File; -import java.io.FileOutputStream; -import java.io.PrintStream; -import java.sql.Timestamp; -import java.util.List; - /** * Test ColumnStatisticsImpl for ORC. */ @@ -130,14 +130,14 @@ public class TestColumnStatistics { stats2.updateDate(new DateWritable(2000)); stats1.merge(stats2); DateColumnStatistics typed = (DateColumnStatistics) stats1; - assertEquals(new DateWritable(10), typed.getMinimum()); - assertEquals(new DateWritable(2000), typed.getMaximum()); + assertEquals(new DateWritable(10).get(), typed.getMinimum()); + assertEquals(new DateWritable(2000).get(), typed.getMaximum()); stats1.reset(); stats1.updateDate(new DateWritable(-10)); stats1.updateDate(new DateWritable(10000)); stats1.merge(stats2); - assertEquals(-10, typed.getMinimum().getDays()); - assertEquals(10000, typed.getMaximum().getDays()); + assertEquals(new DateWritable(-10).get(), typed.getMinimum()); + assertEquals(new DateWritable(10000).get(), typed.getMaximum()); } @Test http://git-wip-us.apache.org/repos/asf/hive/blob/2f5bf19a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java index 78d779c..957f54e 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestRecordReaderImpl.java @@ -219,7 +219,8 @@ public class TestRecordReaderImpl { @Test public void testGetMin() throws Exception { - assertEquals(10L, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L)))); + assertEquals(10L, RecordReaderImpl.getMin( + ColumnStatisticsImpl.deserialize(createIntStats(10L, 100L)))); assertEquals(10.0d, RecordReaderImpl.getMin(ColumnStatisticsImpl.deserialize( OrcProto.ColumnStatistics.newBuilder() .setDoubleStatistics(OrcProto.DoubleStatistics.newBuilder() @@ -252,6 +253,13 @@ public class TestRecordReaderImpl { .setIntStatistics(intStats.build()).build(); } + private static OrcProto.ColumnStatistics createBooleanStats(int n, int trueCount) { + OrcProto.BucketStatistics.Builder boolStats = OrcProto.BucketStatistics.newBuilder(); + boolStats.addCount(trueCount); + return OrcProto.ColumnStatistics.newBuilder().setNumberOfValues(n).setBucketStatistics( + boolStats.build()).build(); + } + private static OrcProto.ColumnStatistics createIntStats(int min, int max) { OrcProto.IntegerStatistics.Builder intStats = OrcProto.IntegerStatistics.newBuilder(); intStats.setMinimum(min); @@ -289,7 +297,7 @@ public class TestRecordReaderImpl { return OrcProto.ColumnStatistics.newBuilder().setDateStatistics(dateStats.build()).build(); } - private static OrcProto.ColumnStatistics createTimestampStats(int min, int max) { + private static OrcProto.ColumnStatistics createTimestampStats(long min, long max) { OrcProto.TimestampStatistics.Builder tsStats = OrcProto.TimestampStatistics.newBuilder(); tsStats.setMinimum(min); tsStats.setMaximum(max); @@ -334,6 +342,30 @@ public class TestRecordReaderImpl { } @Test + public void testPredEvalWithBooleanStats() throws Exception { + PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", true, null); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null)); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf( + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", "true", null); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null)); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf( + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.BOOLEAN, "x", "hello", null); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 10), pred, null)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createBooleanStats(10, 0), pred, null)); + } + + @Test public void testPredEvalWithIntStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); @@ -345,20 +377,27 @@ public class TestRecordReaderImpl { assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); + // Stats gets converted to column type. "15" is outside of "10" and "100" pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); - assertEquals(TruthValue.YES_NO, + assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); + // Integer stats will not be converted date because of days/seconds/millis ambiguity pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DATE, "x", new DateWritable(15), null); - assertEquals(TruthValue.NO, + PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); + assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createIntStats(10, 100), pred, null)); } @Test @@ -373,20 +412,32 @@ public class TestRecordReaderImpl { assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); + // Stats gets converted to column type. "15.0" is outside of "10.0" and "100.0" pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); - assertEquals(TruthValue.YES_NO, + assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); + // Double is not converted to date type because of days/seconds/millis ambiguity pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DATE, "x", new DateWritable(15), null); - assertEquals(TruthValue.NO, + PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); + assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15*1000L), null); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150*1000L), null); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicateProto(createDoubleStats(10.0, 100.0), pred, null)); } @Test @@ -406,27 +457,35 @@ public class TestRecordReaderImpl { assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null)); + // IllegalArgumentException is thrown when converting String to Date, hence YES_NO pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DATE, "x", new DateWritable(100), null); - assertEquals(TruthValue.NO, + PredicateLeaf.Type.DATE, "x", new DateWritable(100).get(), null); + assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(100), null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(100), null); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createStringStats("10", "1000"), pred, null)); } @Test public void testPredEvalWithDateStats() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); - assertEquals(TruthValue.NO, + // Date to Integer conversion is not possible. + assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + // Date to Float conversion is also not possible. pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.FLOAT, "x", 15.0, null); - assertEquals(TruthValue.NO, + assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, @@ -460,20 +519,30 @@ public class TestRecordReaderImpl { RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DATE, "x", new DateWritable(15), null); + PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DATE, "x", new DateWritable(150), null); + PredicateLeaf.Type.DATE, "x", new DateWritable(150).get(), null); assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + // Date to Decimal conversion is also not possible. pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null); assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15L * 24L * 60L * 60L * 1000L), null); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createDateStats(10, 100), pred, null)); } @Test @@ -488,21 +557,79 @@ public class TestRecordReaderImpl { assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + // "15" out of range of "10.0" and "100.0" pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.STRING, "x", "15", null); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + + // Decimal to Date not possible. + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); assertEquals(TruthValue.YES_NO, RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, - PredicateLeaf.Type.DATE, "x", new DateWritable(15), null); + PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15 * 1000L), null); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(150 * 1000L), null); assertEquals(TruthValue.NO, RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + } + + @Test + public void testPredEvalWithTimestampStats() throws Exception { + PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.INTEGER, "x", 15L, null); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.FLOAT, "x", 15.0, null); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.STRING, "x", "15", null); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.STRING, "x", new Timestamp(15).toString(), null); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.DATE, "x", new DateWritable(15).get(), null); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10 * 24L * 60L * 60L * 1000L, + 100 * 24L * 60L * 60L * 1000L), pred, null)); pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DECIMAL, "x", HiveDecimal.create(15), null); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); assertEquals(TruthValue.YES_NO, - RecordReaderImpl.evaluatePredicateProto(createDecimalStats("10.0", "100.0"), pred, null)); + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null)); + pred = TestSearchArgumentImpl.createPredicateLeaf(PredicateLeaf.Operator.NULL_SAFE_EQUALS, + PredicateLeaf.Type.TIMESTAMP, "x", new Timestamp(15), null); + assertEquals(TruthValue.YES_NO, + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10, 100), pred, null)); + assertEquals(TruthValue.NO, + RecordReaderImpl.evaluatePredicateProto(createTimestampStats(10000, 100000), pred, null)); } @Test @@ -1303,8 +1430,8 @@ public class TestRecordReaderImpl { @Test public void testDateWritableNullSafeEqualsBloomFilter() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( - PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15), - null); + PredicateLeaf.Operator.NULL_SAFE_EQUALS, PredicateLeaf.Type.DATE, "x", + new DateWritable(15).get(), null); BloomFilterIO bf = new BloomFilterIO(10000); for (int i = 20; i < 1000; i++) { bf.addLong((new DateWritable(i)).getDays()); @@ -1319,7 +1446,8 @@ public class TestRecordReaderImpl { @Test public void testDateWritableEqualsBloomFilter() throws Exception { PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf( - PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DATE, "x", new DateWritable(15), null); + PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.DATE, "x", + new DateWritable(15).get(), null); BloomFilterIO bf = new BloomFilterIO(10000); for (int i = 20; i < 1000; i++) { bf.addLong((new DateWritable(i)).getDays()); @@ -1334,8 +1462,8 @@ public class TestRecordReaderImpl { @Test public void testDateWritableInBloomFilter() throws Exception { List<Object> args = new ArrayList<Object>(); - args.add(new DateWritable(15)); - args.add(new DateWritable(19)); + args.add(new DateWritable(15).get()); + args.add(new DateWritable(19).get()); PredicateLeaf pred = TestSearchArgumentImpl.createPredicateLeaf (PredicateLeaf.Operator.IN, PredicateLeaf.Type.DATE, "x", null, args); http://git-wip-us.apache.org/repos/asf/hive/blob/2f5bf19a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java index 22eea0b..5e61aba 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java @@ -802,64 +802,55 @@ public class TestSearchArgumentImpl { assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("john", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("john", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("john", leaf.getLiteral()); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("greg", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("greg", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("greg", leaf.getLiteral()); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("alan", leaf.getLiteral()); leaf = leaves.get(3); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(12, leaf.getLiteral()); leaf = leaves.get(4); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(13L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(13, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(13, leaf.getLiteral()); leaf = leaves.get(5); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(15L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(15, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(15, leaf.getLiteral()); leaf = leaves.get(6); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(16L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(16, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(16, leaf.getLiteral()); leaf = leaves.get(7); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(30L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(30, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(30, leaf.getLiteral()); leaf = leaves.get(8); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.NULL_SAFE_EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("owen", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("owen", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("owen", leaf.getLiteral()); assertEquals("(and (or leaf-0 (not leaf-1) leaf-2 (not leaf-3)" + " (not leaf-4) leaf-5 leaf-6 leaf-7)" + @@ -1090,31 +1081,26 @@ public class TestSearchArgumentImpl { assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.IS_NULL, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); - assertEquals(null, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC)); - assertEquals(null, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(null, leaf.getLiteral()); + assertEquals(null, leaf.getLiteralList()); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("sue", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("sue", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("sue", leaf.getLiteral()); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(12, leaf.getLiteral()); leaf = leaves.get(3); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN_EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(4L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(4, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(4, leaf.getLiteral()); assertEquals("(or leaf-0 (not leaf-1) (not leaf-2) leaf-3)", sarg.getExpression().toString()); @@ -1525,26 +1511,21 @@ public class TestSearchArgumentImpl { assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.BETWEEN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(null, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); - assertEquals(23L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0)); - assertEquals(23, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0)); - assertEquals(45L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1)); - assertEquals(45, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1)); + assertEquals(null, leaf.getLiteral()); + assertEquals(23, leaf.getLiteralList().get(0)); + assertEquals(45, leaf.getLiteralList().get(1)); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("alan", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("alan", leaf.getLiteral()); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("last_name", leaf.getColumnName()); - assertEquals("smith", leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals("smith", leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals("smith", leaf.getLiteral()); assertEquals("(and leaf-0 leaf-1 leaf-2)", sarg.getExpression().toString()); @@ -1751,26 +1732,21 @@ public class TestSearchArgumentImpl { assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.EQUALS, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(12, leaf.getLiteral()); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.STRING, leaf.getType()); assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator()); assertEquals("first_name", leaf.getColumnName()); - assertEquals("john", leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0)); - assertEquals("sue", leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1)); - assertEquals("john", leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0)); - assertEquals("sue", leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1)); + assertEquals("john", leaf.getLiteralList().get(0)); + assertEquals("sue", leaf.getLiteralList().get(1)); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.IN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(34L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(0)); - assertEquals(50L, leaf.getLiteralList(PredicateLeaf.FileFormat.ORC).get(1)); - assertEquals(34, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(0)); - assertEquals(50, leaf.getLiteralList(PredicateLeaf.FileFormat.PARQUET).get(1)); + assertEquals(34, leaf.getLiteralList().get(0)); + assertEquals(50, leaf.getLiteralList().get(1)); assertEquals("(and (not leaf-0) leaf-1 leaf-2)", sarg.getExpression().toString()); @@ -2016,8 +1992,6 @@ public class TestSearchArgumentImpl { assertEquals(PredicateLeaf.Operator.BETWEEN, leaves.get(0).getOperator()); assertEquals("first_name", leaves.get(0).getColumnName()); - assertEquals("david", leaves.get(0).getLiteralList(PredicateLeaf.FileFormat.ORC).get(0)); - assertEquals("greg", leaves.get(0).getLiteralList(PredicateLeaf.FileFormat.ORC).get(1)); assertEquals("leaf-0", sarg.getExpression().toString()); @@ -2515,64 +2489,55 @@ public class TestSearchArgumentImpl { assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(18L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(18, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(18, leaf.getLiteral()); leaf = leaves.get(1); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(10L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(10, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(10, leaf.getLiteral()); leaf = leaves.get(2); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(13L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(13, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(13, leaf.getLiteral()); leaf = leaves.get(3); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(16L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(16, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(16, leaf.getLiteral()); leaf = leaves.get(4); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(11L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(11, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(11, leaf.getLiteral()); leaf = leaves.get(5); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(12L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(12, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(12, leaf.getLiteral()); leaf = leaves.get(6); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(14L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(14, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(14, leaf.getLiteral()); leaf = leaves.get(7); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(15L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(15, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(15, leaf.getLiteral()); leaf = leaves.get(8); assertEquals(PredicateLeaf.Type.INTEGER, leaf.getType()); assertEquals(PredicateLeaf.Operator.LESS_THAN, leaf.getOperator()); assertEquals("id", leaf.getColumnName()); - assertEquals(17L, leaf.getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(17, leaf.getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(17, leaf.getLiteral()); assertEquals("(and" + " (or leaf-0 leaf-1 leaf-2 leaf-3)" + @@ -2917,8 +2882,7 @@ public class TestSearchArgumentImpl { assertEquals(PredicateLeaf.Operator.LESS_THAN, leaves.get(0).getOperator()); assertEquals("id", leaves.get(0).getColumnName()); - assertEquals(10L, leaves.get(0).getLiteral(PredicateLeaf.FileFormat.ORC)); - assertEquals(10, leaves.get(0).getLiteral(PredicateLeaf.FileFormat.PARQUET)); + assertEquals(10, leaves.get(0).getLiteral()); assertEquals("(and (not leaf-0) (not leaf-0))", sarg.getExpression().toString()); http://git-wip-us.apache.org/repos/asf/hive/blob/2f5bf19a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java index 41a14c2..0a95363 100644 --- a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java +++ b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java @@ -54,14 +54,6 @@ public interface PredicateLeaf { } /** - * file format which supports search arguments - */ - public static enum FileFormat { - ORC, - PARQUET - } - - /** * Get the operator for the leaf. */ public Operator getOperator(); @@ -79,16 +71,17 @@ public interface PredicateLeaf { /** * Get the literal half of the predicate leaf. Adapt the original type for what orc needs - * @return a Long, Double, or String for Orc and a Int, Long, Double, or String for parquet + * + * @return an Integer, Long, Double, or String */ - public Object getLiteral(FileFormat format); + public Object getLiteral(); /** * For operators with multiple literals (IN and BETWEEN), get the literals. * - * @return the list of literals (Longs, Doubles, or Strings) for orc or the list of literals - * (Integer, Longs, Doubles, or String) for parquet + * @return the list of literals (Integer, Longs, Doubles, or Strings) + * */ - public List<Object> getLiteralList(FileFormat format); + public List<Object> getLiteralList(); }