HIVE-10553: Remove hardcoded Parquet references from SearchArgumentImpl (Owen O'Malley reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1280ccae Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1280ccae Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1280ccae Branch: refs/heads/beeline-cli Commit: 1280ccaeed2912d20a92df4fa609c8aa5b79f35d Parents: 02e762f Author: Prasanth Jayachandran <j.prasant...@gmail.com> Authored: Tue Jul 7 23:07:15 2015 -0700 Committer: Prasanth Jayachandran <j.prasant...@gmail.com> Committed: Tue Jul 7 23:07:15 2015 -0700 ---------------------------------------------------------------------- .../read/ParquetRecordReaderWrapper.java | 102 +++++- .../hive/ql/io/sarg/SearchArgumentImpl.java | 343 ++++--------------- .../hive/ql/io/sarg/TestSearchArgumentImpl.java | 32 +- .../hadoop/hive/ql/io/sarg/ExpressionTree.java | 157 +++++++++ .../hadoop/hive/ql/io/sarg/SearchArgument.java | 14 +- 5 files changed, 349 insertions(+), 299 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/1280ccae/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java index 80a7301..a64ec06 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java @@ -24,7 +24,12 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder; +import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory; import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher; +import org.apache.hadoop.hive.ql.io.sarg.ExpressionTree; +import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; @@ -41,6 +46,7 @@ import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.parquet.filter2.compat.FilterCompat; import org.apache.parquet.filter2.compat.RowGroupFilter; +import org.apache.parquet.filter2.predicate.FilterApi; import org.apache.parquet.filter2.predicate.FilterPredicate; import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.ParquetInputFormat; @@ -142,9 +148,10 @@ public class ParquetRecordReaderWrapper implements RecordReader<NullWritable, A return null; } - FilterPredicate p = - SearchArgumentFactory.create(Utilities.deserializeExpression(serializedPushdown)) - .toFilterPredicate(); + SearchArgument sarg = + SearchArgumentFactory.create(Utilities.deserializeExpression + (serializedPushdown)); + FilterPredicate p = toFilterPredicate(sarg); if (p != null) { LOG.debug("Predicate filter for parquet is " + p.toString()); ParquetInputFormat.setFilterPredicate(conf, p); @@ -303,4 +310,93 @@ public class ParquetRecordReaderWrapper implements RecordReader<NullWritable, A public List<BlockMetaData> getFiltedBlocks() { return filtedBlocks; } + + /** + * Translate the search argument to the filter predicate parquet used + * @return translate the sarg into a filter predicate + */ + public static FilterPredicate toFilterPredicate(SearchArgument sarg) { + return translate(sarg.getExpression(), + sarg.getLeaves()); + } + + private static boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) { + return (op == PredicateLeaf.Operator.IN) || + (op == PredicateLeaf.Operator.BETWEEN); + } + + private static FilterPredicate translate(ExpressionTree root, + List<PredicateLeaf> leafs){ + FilterPredicate p = null; + switch (root.getOperator()) { + case OR: + for(ExpressionTree child: root.getChildren()) { + if (p == null) { + p = translate(child, leafs); + } else { + FilterPredicate right = translate(child, leafs); + // constant means no filter, ignore it when it is null + if(right != null){ + p = FilterApi.or(p, right); + } + } + } + return p; + case AND: + for(ExpressionTree child: root.getChildren()) { + if (p == null) { + p = translate(child, leafs); + } else { + FilterPredicate right = translate(child, leafs); + // constant means no filter, ignore it when it is null + if(right != null){ + p = FilterApi.and(p, right); + } + } + } + return p; + case NOT: + FilterPredicate op = translate(root.getChildren().get(0), leafs); + if (op != null) { + return FilterApi.not(op); + } else { + return null; + } + case LEAF: + return buildFilterPredicateFromPredicateLeaf(leafs.get(root.getLeaf())); + case CONSTANT: + return null;// no filter will be executed for constant + default: + throw new IllegalStateException("Unknown operator: " + + root.getOperator()); + } + } + + private static FilterPredicate buildFilterPredicateFromPredicateLeaf + (PredicateLeaf leaf) { + LeafFilterFactory leafFilterFactory = new LeafFilterFactory(); + FilterPredicateLeafBuilder builder; + try { + builder = leafFilterFactory + .getLeafFilterBuilderByType(leaf.getType()); + if (builder == null) { + return null; + } + if (isMultiLiteralsOperator(leaf.getOperator())) { + return builder.buildPredicate(leaf.getOperator(), + leaf.getLiteralList(), + leaf.getColumnName()); + } else { + return builder + .buildPredict(leaf.getOperator(), + leaf.getLiteral(), + leaf.getColumnName()); + } + } catch (Exception e) { + LOG.error("fail to build predicate filter leaf with errors" + e, e); + return null; + } + } + + } http://git-wip-us.apache.org/repos/asf/hive/blob/1280ccae/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java index 782b5f8..46f1e4e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java @@ -22,7 +22,6 @@ import java.math.BigDecimal; import java.sql.Timestamp; import java.util.ArrayDeque; import java.util.ArrayList; -import java.util.Collections; import java.util.Deque; import java.util.HashMap; import java.util.List; @@ -35,8 +34,6 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.io.parquet.FilterPredicateLeafBuilder; -import org.apache.hadoop.hive.ql.io.parquet.LeafFilterFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -64,9 +61,6 @@ import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; -import org.apache.parquet.filter2.predicate.FilterApi; -import org.apache.parquet.filter2.predicate.FilterPredicate; - /** * The implementation of SearchArguments. */ @@ -188,199 +182,6 @@ final class SearchArgumentImpl implements SearchArgument { } } - static class ExpressionTree { - static enum Operator {OR, AND, NOT, LEAF, CONSTANT} - private final Operator operator; - private final List<ExpressionTree> children; - private final int leaf; - private final TruthValue constant; - - ExpressionTree() { - operator = null; - children = null; - leaf = 0; - constant = null; - } - - ExpressionTree(Operator op, ExpressionTree... kids) { - operator = op; - children = new ArrayList<ExpressionTree>(); - leaf = -1; - this.constant = null; - Collections.addAll(children, kids); - } - - ExpressionTree(int leaf) { - operator = Operator.LEAF; - children = null; - this.leaf = leaf; - this.constant = null; - } - - ExpressionTree(TruthValue constant) { - operator = Operator.CONSTANT; - children = null; - this.leaf = -1; - this.constant = constant; - } - - ExpressionTree(ExpressionTree other) { - this.operator = other.operator; - if (other.children == null) { - this.children = null; - } else { - this.children = new ArrayList<ExpressionTree>(); - for(ExpressionTree child: other.children) { - children.add(new ExpressionTree(child)); - } - } - this.leaf = other.leaf; - this.constant = other.constant; - } - - TruthValue evaluate(TruthValue[] leaves) { - TruthValue result = null; - switch (operator) { - case OR: - for(ExpressionTree child: children) { - result = child.evaluate(leaves).or(result); - } - return result; - case AND: - for(ExpressionTree child: children) { - result = child.evaluate(leaves).and(result); - } - return result; - case NOT: - return children.get(0).evaluate(leaves).not(); - case LEAF: - return leaves[leaf]; - case CONSTANT: - return constant; - default: - throw new IllegalStateException("Unknown operator: " + operator); - } - } - - FilterPredicate translate(List<PredicateLeaf> leafs){ - FilterPredicate p = null; - switch (operator) { - case OR: - for(ExpressionTree child: children) { - if (p == null) { - p = child.translate(leafs); - } else { - FilterPredicate right = child.translate(leafs); - // constant means no filter, ignore it when it is null - if(right != null){ - p = FilterApi.or(p, right); - } - } - } - return p; - case AND: - for(ExpressionTree child: children) { - if (p == null) { - p = child.translate(leafs); - } else { - FilterPredicate right = child.translate(leafs); - // constant means no filter, ignore it when it is null - if(right != null){ - p = FilterApi.and(p, right); - } - } - } - return p; - case NOT: - FilterPredicate op = children.get(0).translate(leafs); - if (op != null) { - return FilterApi.not(op); - } else { - return null; - } - case LEAF: - return buildFilterPredicateFromPredicateLeaf(leafs.get(leaf)); - case CONSTANT: - return null;// no filter will be executed for constant - default: - throw new IllegalStateException("Unknown operator: " + operator); - } - } - - private FilterPredicate buildFilterPredicateFromPredicateLeaf(PredicateLeaf leaf) { - LeafFilterFactory leafFilterFactory = new LeafFilterFactory(); - FilterPredicateLeafBuilder builder; - try { - builder = leafFilterFactory - .getLeafFilterBuilderByType(leaf.getType()); - if (builder == null) { - return null; - } - if (isMultiLiteralsOperator(leaf.getOperator())) { - return builder.buildPredicate(leaf.getOperator(), - leaf.getLiteralList(), - leaf.getColumnName()); - } else { - return builder - .buildPredict(leaf.getOperator(), - leaf.getLiteral(), - leaf.getColumnName()); - } - } catch (Exception e) { - LOG.error("fail to build predicate filter leaf with errors" + e, e); - return null; - } - } - - private boolean isMultiLiteralsOperator(PredicateLeaf.Operator op) { - return (op == PredicateLeaf.Operator.IN) || (op == PredicateLeaf.Operator.BETWEEN); - } - - @Override - public String toString() { - StringBuilder buffer = new StringBuilder(); - switch (operator) { - case OR: - buffer.append("(or"); - for(ExpressionTree child: children) { - buffer.append(' '); - buffer.append(child.toString()); - } - buffer.append(')'); - break; - case AND: - buffer.append("(and"); - for(ExpressionTree child: children) { - buffer.append(' '); - buffer.append(child.toString()); - } - buffer.append(')'); - break; - case NOT: - buffer.append("(not "); - buffer.append(children.get(0)); - buffer.append(')'); - break; - case LEAF: - buffer.append("leaf-"); - buffer.append(leaf); - break; - case CONSTANT: - buffer.append(constant); - break; - } - return buffer.toString(); - } - - Operator getOperator() { - return operator; - } - - List<ExpressionTree> getChildren() { - return children; - } - } - static class ExpressionBuilder { // max threshold for CNF conversion. having >8 elements in andList will be converted to maybe private static final int CNF_COMBINATIONS_THRESHOLD = 256; @@ -587,7 +388,7 @@ final class SearchArgumentImpl implements SearchArgument { private ExpressionTree negate(ExpressionTree expr) { ExpressionTree result = new ExpressionTree(ExpressionTree.Operator.NOT); - result.children.add(expr); + result.getChildren().add(expr); return result; } @@ -595,7 +396,7 @@ final class SearchArgumentImpl implements SearchArgument { ExprNodeGenericFuncDesc node, List<PredicateLeaf> leafCache) { for(ExprNodeDesc child: node.getChildren()) { - result.children.add(parse(child, leafCache)); + result.getChildren().add(parse(child, leafCache)); } } @@ -671,24 +472,24 @@ final class SearchArgumentImpl implements SearchArgument { * nodes of the original expression. */ static ExpressionTree pushDownNot(ExpressionTree root) { - if (root.operator == ExpressionTree.Operator.NOT) { - ExpressionTree child = root.children.get(0); - switch (child.operator) { + if (root.getOperator() == ExpressionTree.Operator.NOT) { + ExpressionTree child = root.getChildren().get(0); + switch (child.getOperator()) { case NOT: - return pushDownNot(child.children.get(0)); + return pushDownNot(child.getChildren().get(0)); case CONSTANT: - return new ExpressionTree(child.constant.not()); + return new ExpressionTree(child.getConstant().not()); case AND: root = new ExpressionTree(ExpressionTree.Operator.OR); - for(ExpressionTree kid: child.children) { - root.children.add(pushDownNot(new + for(ExpressionTree kid: child.getChildren()) { + root.getChildren().add(pushDownNot(new ExpressionTree(ExpressionTree.Operator.NOT, kid))); } break; case OR: root = new ExpressionTree(ExpressionTree.Operator.AND); - for(ExpressionTree kid: child.children) { - root.children.add(pushDownNot(new ExpressionTree + for(ExpressionTree kid: child.getChildren()) { + root.getChildren().add(pushDownNot(new ExpressionTree (ExpressionTree.Operator.NOT, kid))); } break; @@ -696,10 +497,10 @@ final class SearchArgumentImpl implements SearchArgument { default: break; } - } else if (root.children != null) { + } else if (root.getChildren() != null) { // iterate through children and push down not for each one - for(int i=0; i < root.children.size(); ++i) { - root.children.set(i, pushDownNot(root.children.get(i))); + for(int i=0; i < root.getChildren().size(); ++i) { + root.getChildren().set(i, pushDownNot(root.getChildren().get(i))); } } return root; @@ -713,13 +514,13 @@ final class SearchArgumentImpl implements SearchArgument { * @return The cleaned up expression */ static ExpressionTree foldMaybe(ExpressionTree expr) { - if (expr.children != null) { - for(int i=0; i < expr.children.size(); ++i) { - ExpressionTree child = foldMaybe(expr.children.get(i)); - if (child.constant == TruthValue.YES_NO_NULL) { - switch (expr.operator) { + if (expr.getChildren() != null) { + for(int i=0; i < expr.getChildren().size(); ++i) { + ExpressionTree child = foldMaybe(expr.getChildren().get(i)); + if (child.getConstant() == TruthValue.YES_NO_NULL) { + switch (expr.getOperator()) { case AND: - expr.children.remove(i); + expr.getChildren().remove(i); i -= 1; break; case OR: @@ -730,10 +531,10 @@ final class SearchArgumentImpl implements SearchArgument { expr); } } else { - expr.children.set(i, child); + expr.getChildren().set(i, child); } } - if (expr.children.isEmpty()) { + if (expr.getChildren().isEmpty()) { return new ExpressionTree(TruthValue.YES_NO_NULL); } } @@ -754,15 +555,15 @@ final class SearchArgumentImpl implements SearchArgument { List<ExpressionTree> andList, List<ExpressionTree> nonAndList ) { - List<ExpressionTree> kids = andList.get(0).children; + List<ExpressionTree> kids = andList.get(0).getChildren(); if (result.isEmpty()) { for(ExpressionTree kid: kids) { ExpressionTree or = new ExpressionTree(ExpressionTree.Operator.OR); result.add(or); for(ExpressionTree node: nonAndList) { - or.children.add(new ExpressionTree(node)); + or.getChildren().add(new ExpressionTree(node)); } - or.children.add(kid); + or.getChildren().add(kid); } } else { List<ExpressionTree> work = new ArrayList<ExpressionTree>(result); @@ -770,7 +571,7 @@ final class SearchArgumentImpl implements SearchArgument { for(ExpressionTree kid: kids) { for(ExpressionTree or: work) { ExpressionTree copy = new ExpressionTree(or); - copy.children.add(kid); + copy.getChildren().add(kid); result.add(copy); } } @@ -789,23 +590,23 @@ final class SearchArgumentImpl implements SearchArgument { * @return the normalized expression */ static ExpressionTree convertToCNF(ExpressionTree root) { - if (root.children != null) { + if (root.getChildren() != null) { // convert all of the children to CNF - int size = root.children.size(); + int size = root.getChildren().size(); for(int i=0; i < size; ++i) { - root.children.set(i, convertToCNF(root.children.get(i))); + root.getChildren().set(i, convertToCNF(root.getChildren().get(i))); } - if (root.operator == ExpressionTree.Operator.OR) { + if (root.getOperator() == ExpressionTree.Operator.OR) { // a list of leaves that weren't under AND expressions List<ExpressionTree> nonAndList = new ArrayList<ExpressionTree>(); // a list of AND expressions that we need to distribute List<ExpressionTree> andList = new ArrayList<ExpressionTree>(); - for(ExpressionTree child: root.children) { - if (child.operator == ExpressionTree.Operator.AND) { + for(ExpressionTree child: root.getChildren()) { + if (child.getOperator() == ExpressionTree.Operator.AND) { andList.add(child); - } else if (child.operator == ExpressionTree.Operator.OR) { + } else if (child.getOperator() == ExpressionTree.Operator.OR) { // pull apart the kids of the OR expression - for(ExpressionTree grandkid: child.children) { + for(ExpressionTree grandkid: child.getChildren()) { nonAndList.add(grandkid); } } else { @@ -815,7 +616,7 @@ final class SearchArgumentImpl implements SearchArgument { if (!andList.isEmpty()) { if (checkCombinationsThreshold(andList)) { root = new ExpressionTree(ExpressionTree.Operator.AND); - generateAllCombinations(root.children, andList, nonAndList); + generateAllCombinations(root.getChildren(), andList, nonAndList); } else { root = new ExpressionTree(TruthValue.YES_NO_NULL); } @@ -828,7 +629,7 @@ final class SearchArgumentImpl implements SearchArgument { private static boolean checkCombinationsThreshold(List<ExpressionTree> andList) { int numComb = 1; for (ExpressionTree tree : andList) { - numComb *= tree.children.size(); + numComb *= tree.getChildren().size(); if (numComb > CNF_COMBINATIONS_THRESHOLD) { return false; } @@ -843,33 +644,33 @@ final class SearchArgumentImpl implements SearchArgument { * potentially modified children. */ static ExpressionTree flatten(ExpressionTree root) { - if (root.children != null) { + if (root.getChildren() != null) { // iterate through the index, so that if we add more children, // they don't get re-visited - for(int i=0; i < root.children.size(); ++i) { - ExpressionTree child = flatten(root.children.get(i)); + for(int i=0; i < root.getChildren().size(); ++i) { + ExpressionTree child = flatten(root.getChildren().get(i)); // do we need to flatten? - if (child.operator == root.operator && - child.operator != ExpressionTree.Operator.NOT) { + if (child.getOperator() == root.getOperator() && + child.getOperator() != ExpressionTree.Operator.NOT) { boolean first = true; - for(ExpressionTree grandkid: child.children) { + for(ExpressionTree grandkid: child.getChildren()) { // for the first grandkid replace the original parent if (first) { first = false; - root.children.set(i, grandkid); + root.getChildren().set(i, grandkid); } else { - root.children.add(++i, grandkid); + root.getChildren().add(++i, grandkid); } } } else { - root.children.set(i, child); + root.getChildren().set(i, child); } } // if we have a singleton AND or OR, just return the child - if ((root.operator == ExpressionTree.Operator.OR || - root.operator == ExpressionTree.Operator.AND) && - root.children.size() == 1) { - return root.children.get(0); + if ((root.getOperator() == ExpressionTree.Operator.OR || + root.getOperator() == ExpressionTree.Operator.AND) && + root.getChildren().size() == 1) { + return root.getChildren().get(0); } } return root; @@ -888,13 +689,13 @@ final class SearchArgumentImpl implements SearchArgument { List<PredicateLeaf> leafCache, Map<PredicateLeaf, ExpressionTree> lookup) { - if (expr.children != null) { - for(int i=0; i < expr.children.size(); ++i) { - expr.children.set(i, buildLeafList(expr.children.get(i), leafCache, - lookup)); + if (expr.getChildren() != null) { + for(int i=0; i < expr.getChildren().size(); ++i) { + expr.getChildren().set(i, buildLeafList(expr.getChildren().get(i), + leafCache, lookup)); } - } else if (expr.operator == ExpressionTree.Operator.LEAF) { - PredicateLeaf leaf = leafCache.get(expr.leaf); + } else if (expr.getOperator() == ExpressionTree.Operator.LEAF) { + PredicateLeaf leaf = leafCache.get(expr.getLeaf()); ExpressionTree val = lookup.get(leaf); if (val == null) { val = new ExpressionTree(leaves.size()); @@ -975,7 +776,8 @@ final class SearchArgumentImpl implements SearchArgument { return expression == null ? TruthValue.YES : expression.evaluate(leaves); } - ExpressionTree getExpression() { + @Override + public ExpressionTree getExpression() { return expression; } @@ -1006,11 +808,6 @@ final class SearchArgumentImpl implements SearchArgument { return new Kryo().readObject(input, SearchArgumentImpl.class); } - @Override - public FilterPredicate toFilterPredicate() { - return expression.translate(leaves); - } - private static class BuilderImpl implements Builder { private final Deque<ExpressionTree> currentTree = new ArrayDeque<ExpressionTree>(); @@ -1022,7 +819,7 @@ final class SearchArgumentImpl implements SearchArgument { ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.OR); if (currentTree.size() != 0) { ExpressionTree parent = currentTree.getFirst(); - parent.children.add(node); + parent.getChildren().add(node); } currentTree.addFirst(node); return this; @@ -1033,7 +830,7 @@ final class SearchArgumentImpl implements SearchArgument { ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.AND); if (currentTree.size() != 0) { ExpressionTree parent = currentTree.getFirst(); - parent.children.add(node); + parent.getChildren().add(node); } currentTree.addFirst(node); return this; @@ -1044,7 +841,7 @@ final class SearchArgumentImpl implements SearchArgument { ExpressionTree node = new ExpressionTree(ExpressionTree.Operator.NOT); if (currentTree.size() != 0) { ExpressionTree parent = currentTree.getFirst(); - parent.children.add(node); + parent.getChildren().add(node); } currentTree.addFirst(node); return this; @@ -1053,12 +850,12 @@ final class SearchArgumentImpl implements SearchArgument { @Override public Builder end() { root = currentTree.removeFirst(); - if (root.children.size() == 0) { + if (root.getChildren().size() == 0) { throw new IllegalArgumentException("Can't create expression " + root + " with no children."); } - if (root.operator == ExpressionTree.Operator.NOT && - root.children.size() != 1) { + if (root.getOperator() == ExpressionTree.Operator.NOT && + root.getChildren().size() != 1) { throw new IllegalArgumentException("Can't create not expression " + root + " with more than 1 child."); } @@ -1127,7 +924,7 @@ final class SearchArgumentImpl implements SearchArgument { new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN, getType(box), column, box, null); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } @@ -1139,7 +936,7 @@ final class SearchArgumentImpl implements SearchArgument { new PredicateLeafImpl(PredicateLeaf.Operator.LESS_THAN_EQUALS, getType(box), column, box, null); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } @@ -1151,7 +948,7 @@ final class SearchArgumentImpl implements SearchArgument { new PredicateLeafImpl(PredicateLeaf.Operator.EQUALS, getType(box), column, box, null); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } @@ -1163,7 +960,7 @@ final class SearchArgumentImpl implements SearchArgument { new PredicateLeafImpl(PredicateLeaf.Operator.NULL_SAFE_EQUALS, getType(box), column, box, null); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } @@ -1183,7 +980,7 @@ final class SearchArgumentImpl implements SearchArgument { new PredicateLeafImpl(PredicateLeaf.Operator.IN, getType(argList.get(0)), column, null, argList); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } @@ -1194,7 +991,7 @@ final class SearchArgumentImpl implements SearchArgument { new PredicateLeafImpl(PredicateLeaf.Operator.IS_NULL, PredicateLeaf.Type.STRING, column, null, null); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } @@ -1208,7 +1005,7 @@ final class SearchArgumentImpl implements SearchArgument { new PredicateLeafImpl(PredicateLeaf.Operator.BETWEEN, getType(argList.get(0)), column, null, argList); leaves.add(leaf); - parent.children.add(new ExpressionTree(leaves.size() - 1)); + parent.getChildren().add(new ExpressionTree(leaves.size() - 1)); return this; } http://git-wip-us.apache.org/repos/asf/hive/blob/1280ccae/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java index 9e6adef..46ce49c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/sarg/TestSearchArgumentImpl.java @@ -27,9 +27,9 @@ import com.google.common.collect.Sets; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionBuilder; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.ExpressionTree; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl.PredicateLeafImpl; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -784,7 +784,7 @@ public class TestSearchArgumentImpl { List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals(9, leaves.size()); - FilterPredicate p = sarg.toFilterPredicate(); + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); String[] conditions = new String[]{ "eq(first_name, Binary{\"john\"})", /* first_name = 'john' */ "not(lteq(first_name, Binary{\"greg\"}))", /* 'greg' < first_name */ @@ -1076,7 +1076,7 @@ public class TestSearchArgumentImpl { "lteq(id, 4)" /* id <= 4 */ }; - FilterPredicate p = sarg.toFilterPredicate(); + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); String expected = String.format("or(or(or(%1$s, %2$s), %3$s), %4$s)", conditions); assertEquals(expected, p.toString()); @@ -1506,7 +1506,7 @@ public class TestSearchArgumentImpl { "eq(last_name, Binary{\"smith\"})" /* 'smith' = last_name */ }; - FilterPredicate p = sarg.toFilterPredicate(); + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); String expected = String.format("and(and(and(%1$s, %2$s), %3$s), %4$s)", conditions); assertEquals(expected, p.toString()); @@ -1727,7 +1727,7 @@ public class TestSearchArgumentImpl { "or(eq(id, 34), eq(id, 50))" /* id in (34,50) */ }; - FilterPredicate p = sarg.toFilterPredicate(); + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); String expected = String.format("and(and(%1$s, %2$s), %3$s)", conditions); assertEquals(expected, p.toString()); @@ -1986,7 +1986,7 @@ public class TestSearchArgumentImpl { List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals(1, leaves.size()); - FilterPredicate p = sarg.toFilterPredicate(); + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); String expected = "and(lt(first_name, Binary{\"greg\"}), not(lteq(first_name, Binary{\"david\"})))"; assertEquals(p.toString(), expected); @@ -2466,7 +2466,7 @@ public class TestSearchArgumentImpl { List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals(9, leaves.size()); - FilterPredicate p = sarg.toFilterPredicate(); + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); String expected = "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(" + "or(or(or(lt(id, 18), lt(id, 10)), lt(id, 13)), lt(id, 16)), " + "or(or(or(lt(id, 18), lt(id, 11)), lt(id, 13)), lt(id, 16))), " + @@ -2622,7 +2622,7 @@ public class TestSearchArgumentImpl { List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals(0, leaves.size()); - FilterPredicate p = sarg.toFilterPredicate(); + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); assertNull(p); assertEquals("YES_NO_NULL", @@ -2877,7 +2877,7 @@ public class TestSearchArgumentImpl { List<PredicateLeaf> leaves = sarg.getLeaves(); assertEquals(1, leaves.size()); - FilterPredicate p = sarg.toFilterPredicate(); + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); String expected = "and(not(lt(id, 10)), not(lt(id, 10)))"; assertEquals(expected, p.toString()); @@ -2934,7 +2934,7 @@ public class TestSearchArgumentImpl { "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString()); - FilterPredicate p = sarg.toFilterPredicate(); + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); String expected = "and(and(and(not(eq(x, null)), not(and(lt(y, 20), not(lteq(y, 10))))), not(or(or(eq(z, 1), " + "eq(z, 2)), eq(z, 3)))), not(eq(a, Binary{\"stinger\"})))"; @@ -2955,7 +2955,8 @@ public class TestSearchArgumentImpl { "leaf-1 = (LESS_THAN_EQUALS y hi)\n" + "leaf-2 = (EQUALS z 1)\n" + "expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString()); - assertEquals("lteq(y, Binary{\"hi\"})", sarg.toFilterPredicate().toString()); + assertEquals("lteq(y, Binary{\"hi\"})", + ParquetRecordReaderWrapper.toFilterPredicate(sarg).toString()); sarg = SearchArgumentFactory.newBuilder() .startNot() @@ -2973,7 +2974,7 @@ public class TestSearchArgumentImpl { "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString()); - FilterPredicate p = sarg.toFilterPredicate(); + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " + "not(eq(a, Binary{\"stinger\"})))"; assertEquals(expected, p.toString()); @@ -2993,7 +2994,8 @@ public class TestSearchArgumentImpl { "leaf-1 = (LESS_THAN_EQUALS y hi)\n" + "leaf-2 = (EQUALS z 1.0)\n" + "expr = (and leaf-0 leaf-1 leaf-2)", sarg.toString()); - assertEquals("lteq(y, Binary{\"hi\"})", sarg.toFilterPredicate().toString()); + assertEquals("lteq(y, Binary{\"hi\"})", + ParquetRecordReaderWrapper.toFilterPredicate(sarg).toString()); sarg = SearchArgumentFactory.newBuilder() .startNot() @@ -3011,7 +3013,7 @@ public class TestSearchArgumentImpl { "leaf-3 = (NULL_SAFE_EQUALS a stinger)\n" + "expr = (and (not leaf-0) (not leaf-1) (not leaf-2) (not leaf-3))", sarg.toString()); - FilterPredicate p = sarg.toFilterPredicate(); + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); String expected = "and(and(not(eq(x, null)), not(or(or(eq(z, 1), eq(z, 2)), eq(z, 3)))), " + "not(eq(a, Binary{\"stinger\"})))"; assertEquals(expected, p.toString()); @@ -3036,7 +3038,7 @@ public class TestSearchArgumentImpl { "leaf-4 = (EQUALS z1 0.22)\n" + "expr = (and leaf-0 leaf-1 leaf-2 leaf-3 leaf-4)", sarg.toString()); - FilterPredicate p = sarg.toFilterPredicate(); + FilterPredicate p = ParquetRecordReaderWrapper.toFilterPredicate(sarg); String expected = "and(and(and(and(lt(x, 22), lt(x1, 22)), lteq(y, Binary{\"hi\"})), eq(z, " + "0.22)), eq(z1, 0.22))"; assertEquals(expected, p.toString()); http://git-wip-us.apache.org/repos/asf/hive/blob/1280ccae/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java new file mode 100644 index 0000000..2dd3a45 --- /dev/null +++ b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.sarg; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * The inner representation of the SearchArgument. Most users should not + * need this interface, it is only for file formats that need to translate + * the SearchArgument into an internal form. + */ +public class ExpressionTree { + public enum Operator {OR, AND, NOT, LEAF, CONSTANT} + private final Operator operator; + private final List<ExpressionTree> children; + private final int leaf; + private final SearchArgument.TruthValue constant; + + ExpressionTree() { + operator = null; + children = null; + leaf = 0; + constant = null; + } + + ExpressionTree(Operator op, ExpressionTree... kids) { + operator = op; + children = new ArrayList<ExpressionTree>(); + leaf = -1; + this.constant = null; + Collections.addAll(children, kids); + } + + ExpressionTree(int leaf) { + operator = Operator.LEAF; + children = null; + this.leaf = leaf; + this.constant = null; + } + + ExpressionTree(SearchArgument.TruthValue constant) { + operator = Operator.CONSTANT; + children = null; + this.leaf = -1; + this.constant = constant; + } + + ExpressionTree(ExpressionTree other) { + this.operator = other.operator; + if (other.children == null) { + this.children = null; + } else { + this.children = new ArrayList<ExpressionTree>(); + for(ExpressionTree child: other.children) { + children.add(new ExpressionTree(child)); + } + } + this.leaf = other.leaf; + this.constant = other.constant; + } + + public SearchArgument.TruthValue evaluate(SearchArgument.TruthValue[] leaves + ) { + SearchArgument.TruthValue result = null; + switch (operator) { + case OR: + for(ExpressionTree child: children) { + result = child.evaluate(leaves).or(result); + } + return result; + case AND: + for(ExpressionTree child: children) { + result = child.evaluate(leaves).and(result); + } + return result; + case NOT: + return children.get(0).evaluate(leaves).not(); + case LEAF: + return leaves[leaf]; + case CONSTANT: + return constant; + default: + throw new IllegalStateException("Unknown operator: " + operator); + } + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder(); + switch (operator) { + case OR: + buffer.append("(or"); + for(ExpressionTree child: children) { + buffer.append(' '); + buffer.append(child.toString()); + } + buffer.append(')'); + break; + case AND: + buffer.append("(and"); + for(ExpressionTree child: children) { + buffer.append(' '); + buffer.append(child.toString()); + } + buffer.append(')'); + break; + case NOT: + buffer.append("(not "); + buffer.append(children.get(0)); + buffer.append(')'); + break; + case LEAF: + buffer.append("leaf-"); + buffer.append(leaf); + break; + case CONSTANT: + buffer.append(constant); + break; + } + return buffer.toString(); + } + + public Operator getOperator() { + return operator; + } + + public List<ExpressionTree> getChildren() { + return children; + } + + public SearchArgument.TruthValue getConstant() { + return constant; + } + + public int getLeaf() { + return leaf; + } +} + http://git-wip-us.apache.org/repos/asf/hive/blob/1280ccae/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java index df208d4..84604cb 100644 --- a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java +++ b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.io.sarg; -import org.apache.parquet.filter2.predicate.FilterPredicate; - import java.util.List; /** @@ -159,6 +157,12 @@ public interface SearchArgument { public List<PredicateLeaf> getLeaves(); /** + * Get the expression tree. This should only needed for file formats that + * need to translate the expression to an internal form. + */ + public ExpressionTree getExpression(); + + /** * Evaluate the entire predicate based on the values for the leaf predicates. * @param leaves the value of each leaf predicate * @return the value of hte entire predicate @@ -177,12 +181,6 @@ public interface SearchArgument { public String toKryo(); /** - * Translate the search argument to the filter predicate parquet used - * @return - */ - public FilterPredicate toFilterPredicate(); - - /** * A builder object for contexts outside of Hive where it isn't easy to * get a ExprNodeDesc. The user must call startOr, startAnd, or startNot * before adding any leaves.