http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java deleted file mode 100644 index 3a92565..0000000 --- a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java +++ /dev/null @@ -1,104 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.sarg; - -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; - -import java.sql.Date; -import java.sql.Timestamp; -import java.util.List; - -/** - * The primitive predicates that form a SearchArgument. - */ -public interface PredicateLeaf { - - /** - * The possible operators for predicates. To get the opposites, construct - * an expression with a not operator. - */ - public static enum Operator { - EQUALS, - NULL_SAFE_EQUALS, - LESS_THAN, - LESS_THAN_EQUALS, - IN, - BETWEEN, - IS_NULL - } - - /** - * The possible types for sargs. - */ - public static enum Type { - INTEGER(Integer.class), // all of the integer types except long - LONG(Long.class), - FLOAT(Double.class), // float and double - STRING(String.class), // string, char, varchar - DATE(Date.class), - DECIMAL(HiveDecimalWritable.class), - TIMESTAMP(Timestamp.class), - BOOLEAN(Boolean.class); - - private final Class cls; - Type(Class cls) { - this.cls = cls; - } - - /** - * For all SARG leaves, the values must be the matching class. - * @return the value class - */ - public Class getValueClass() { - return cls; - } - } - - /** - * Get the operator for the leaf. - */ - public Operator getOperator(); - - /** - * Get the type of the column and literal by the file format. - */ - public Type getType(); - - /** - * Get the simple column name. - * @return the column name - */ - public String getColumnName(); - - /** - * Get the literal half of the predicate leaf. Adapt the original type for what orc needs - * - * @return an Integer, Long, Double, or String - */ - public Object getLiteral(); - - /** - * For operators with multiple literals (IN and BETWEEN), get the literals. - * - * @return the list of literals (Integer, Longs, Doubles, or Strings) - * - */ - public List<Object> getLiteralList(); - -}
http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java b/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java deleted file mode 100644 index bc0d503..0000000 --- a/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java +++ /dev/null @@ -1,298 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io.sarg; - -import java.util.List; - -/** - * Primary interface for <a href="http://en.wikipedia.org/wiki/Sargable"> - * SearchArgument</a>, which are the subset of predicates - * that can be pushed down to the RecordReader. Each SearchArgument consists - * of a series of SearchClauses that must each be true for the row to be - * accepted by the filter. - * - * This requires that the filter be normalized into conjunctive normal form - * (<a href="http://en.wikipedia.org/wiki/Conjunctive_normal_form">CNF</a>). - */ -public interface SearchArgument { - - /** - * The potential result sets of logical operations. - */ - public static enum TruthValue { - YES, NO, NULL, YES_NULL, NO_NULL, YES_NO, YES_NO_NULL; - - /** - * Compute logical or between the two values. - * @param right the other argument or null - * @return the result - */ - public TruthValue or(TruthValue right) { - if (right == null || right == this) { - return this; - } - if (right == YES || this == YES) { - return YES; - } - if (right == YES_NULL || this == YES_NULL) { - return YES_NULL; - } - if (right == NO) { - return this; - } - if (this == NO) { - return right; - } - if (this == NULL) { - if (right == NO_NULL) { - return NULL; - } else { - return YES_NULL; - } - } - if (right == NULL) { - if (this == NO_NULL) { - return NULL; - } else { - return YES_NULL; - } - } - return YES_NO_NULL; - } - - /** - * Compute logical AND between the two values. - * @param right the other argument or null - * @return the result - */ - public TruthValue and(TruthValue right) { - if (right == null || right == this) { - return this; - } - if (right == NO || this == NO) { - return NO; - } - if (right == NO_NULL || this == NO_NULL) { - return NO_NULL; - } - if (right == YES) { - return this; - } - if (this == YES) { - return right; - } - if (this == NULL) { - if (right == YES_NULL) { - return NULL; - } else { - return NO_NULL; - } - } - if (right == NULL) { - if (this == YES_NULL) { - return NULL; - } else { - return NO_NULL; - } - } - return YES_NO_NULL; - } - - public TruthValue not() { - switch (this) { - case NO: - return YES; - case YES: - return NO; - case NULL: - case YES_NO: - case YES_NO_NULL: - return this; - case NO_NULL: - return YES_NULL; - case YES_NULL: - return NO_NULL; - default: - throw new IllegalArgumentException("Unknown value: " + this); - } - } - - /** - * Does the RecordReader need to include this set of records? - * @return true unless none of the rows qualify - */ - public boolean isNeeded() { - switch (this) { - case NO: - case NULL: - case NO_NULL: - return false; - default: - return true; - } - } - } - - /** - * Get the leaf predicates that are required to evaluate the predicate. The - * list will have the duplicates removed. - * @return the list of leaf predicates - */ - public List<PredicateLeaf> getLeaves(); - - /** - * Get the expression tree. This should only needed for file formats that - * need to translate the expression to an internal form. - */ - public ExpressionTree getExpression(); - - /** - * Evaluate the entire predicate based on the values for the leaf predicates. - * @param leaves the value of each leaf predicate - * @return the value of hte entire predicate - */ - public TruthValue evaluate(TruthValue[] leaves); - - /** - * Serialize the SARG as a kyro object and return the base64 string. - * - * Hive should replace the current XML-based AST serialization for predicate pushdown - * with the Kryo serialization of the SARG because the representation is much more - * compact and focused on what is needed for predicate pushdown. - * - * @return the serialized SARG - */ - public String toKryo(); - - /** - * A builder object for contexts outside of Hive where it isn't easy to - * get a ExprNodeDesc. The user must call startOr, startAnd, or startNot - * before adding any leaves. - */ - public interface Builder { - - /** - * Start building an or operation and push it on the stack. - * @return this - */ - public Builder startOr(); - - /** - * Start building an and operation and push it on the stack. - * @return this - */ - public Builder startAnd(); - - /** - * Start building a not operation and push it on the stack. - * @return this - */ - public Builder startNot(); - - /** - * Finish the current operation and pop it off of the stack. Each start - * call must have a matching end. - * @return this - */ - public Builder end(); - - /** - * Add a less than leaf to the current item on the stack. - * @param column the name of the column - * @param type the type of the expression - * @param literal the literal - * @return this - */ - public Builder lessThan(String column, PredicateLeaf.Type type, - Object literal); - - /** - * Add a less than equals leaf to the current item on the stack. - * @param column the name of the column - * @param type the type of the expression - * @param literal the literal - * @return this - */ - public Builder lessThanEquals(String column, PredicateLeaf.Type type, - Object literal); - - /** - * Add an equals leaf to the current item on the stack. - * @param column the name of the column - * @param type the type of the expression - * @param literal the literal - * @return this - */ - public Builder equals(String column, PredicateLeaf.Type type, - Object literal); - - /** - * Add a null safe equals leaf to the current item on the stack. - * @param column the name of the column - * @param type the type of the expression - * @param literal the literal - * @return this - */ - public Builder nullSafeEquals(String column, PredicateLeaf.Type type, - Object literal); - - /** - * Add an in leaf to the current item on the stack. - * @param column the name of the column - * @param type the type of the expression - * @param literal the literal - * @return this - */ - public Builder in(String column, PredicateLeaf.Type type, - Object... literal); - - /** - * Add an is null leaf to the current item on the stack. - * @param column the name of the column - * @param type the type of the expression - * @return this - */ - public Builder isNull(String column, PredicateLeaf.Type type); - - /** - * Add a between leaf to the current item on the stack. - * @param column the name of the column - * @param type the type of the expression - * @param lower the literal - * @param upper the literal - * @return this - */ - public Builder between(String column, PredicateLeaf.Type type, - Object lower, Object upper); - - /** - * Add a truth value to the expression. - * @param truth - * @return this - */ - public Builder literal(TruthValue truth); - - /** - * Build and return the SearchArgument that has been defined. All of the - * starts must have been ended before this call. - * @return the new SearchArgument - */ - public SearchArgument build(); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java deleted file mode 100644 index 0578d24..0000000 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveDecimalWritable.java +++ /dev/null @@ -1,174 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.serde2.io; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.math.BigInteger; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.type.HiveDecimal; - -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.io.WritableUtils; - -public class HiveDecimalWritable implements WritableComparable<HiveDecimalWritable> { - - static final private Log LOG = LogFactory.getLog(HiveDecimalWritable.class); - - private byte[] internalStorage = new byte[0]; - private int scale; - - public HiveDecimalWritable() { - } - - public HiveDecimalWritable(String value) { - set(HiveDecimal.create(value)); - } - - public HiveDecimalWritable(byte[] bytes, int scale) { - set(bytes, scale); - } - - public HiveDecimalWritable(HiveDecimalWritable writable) { - set(writable.getHiveDecimal()); - } - - public HiveDecimalWritable(HiveDecimal value) { - set(value); - } - - public HiveDecimalWritable(long value) { - set((HiveDecimal.create(value))); - } - - public void set(HiveDecimal value) { - set(value.unscaledValue().toByteArray(), value.scale()); - } - - public void set(HiveDecimal value, int maxPrecision, int maxScale) { - set(HiveDecimal.enforcePrecisionScale(value, maxPrecision, maxScale)); - } - - public void set(HiveDecimalWritable writable) { - set(writable.getHiveDecimal()); - } - - public void set(byte[] bytes, int scale) { - this.internalStorage = bytes; - this.scale = scale; - } - - public HiveDecimal getHiveDecimal() { - return HiveDecimal.create(new BigInteger(internalStorage), scale); - } - - /** - * Get a HiveDecimal instance from the writable and constraint it with maximum precision/scale. - * - * @param maxPrecision maximum precision - * @param maxScale maximum scale - * @return HiveDecimal instance - */ - public HiveDecimal getHiveDecimal(int maxPrecision, int maxScale) { - return HiveDecimal.enforcePrecisionScale(HiveDecimal. - create(new BigInteger(internalStorage), scale), - maxPrecision, maxScale); - } - - @Override - public void readFields(DataInput in) throws IOException { - scale = WritableUtils.readVInt(in); - int byteArrayLen = WritableUtils.readVInt(in); - if (internalStorage.length != byteArrayLen) { - internalStorage = new byte[byteArrayLen]; - } - in.readFully(internalStorage); - } - - @Override - public void write(DataOutput out) throws IOException { - WritableUtils.writeVInt(out, scale); - WritableUtils.writeVInt(out, internalStorage.length); - out.write(internalStorage); - } - - @Override - public int compareTo(HiveDecimalWritable that) { - return getHiveDecimal().compareTo(that.getHiveDecimal()); - } - - @Override - public String toString() { - return getHiveDecimal().toString(); - } - - @Override - public boolean equals(Object other) { - if (this == other) { - return true; - } - if (other == null || getClass() != other.getClass()) { - return false; - } - HiveDecimalWritable bdw = (HiveDecimalWritable) other; - - // 'equals' and 'compareTo' are not compatible with HiveDecimals. We want - // compareTo which returns true iff the numbers are equal (e.g.: 3.14 is - // the same as 3.140). 'Equals' returns true iff equal and the same scale - // is set in the decimals (e.g.: 3.14 is not the same as 3.140) - return getHiveDecimal().compareTo(bdw.getHiveDecimal()) == 0; - } - - @Override - public int hashCode() { - return getHiveDecimal().hashCode(); - } - - /* (non-Javadoc) - * In order to update a Decimal128 fast (w/o allocation) we need to expose access to the - * internal storage bytes and scale. - * @return - */ - public byte[] getInternalStorage() { - return internalStorage; - } - - /* (non-Javadoc) - * In order to update a Decimal128 fast (w/o allocation) we need to expose access to the - * internal storage bytes and scale. - */ - public int getScale() { - return scale; - } - - public static - HiveDecimalWritable enforcePrecisionScale(HiveDecimalWritable writable, - int precision, int scale) { - if (writable == null) { - return null; - } - - HiveDecimal dec = - HiveDecimal.enforcePrecisionScale(writable.getHiveDecimal(), precision, - scale); - return dec == null ? null : new HiveDecimalWritable(dec); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/pom.xml ---------------------------------------------------------------------- diff --git a/storage-api/pom.xml b/storage-api/pom.xml new file mode 100644 index 0000000..71b51b8 --- /dev/null +++ b/storage-api/pom.xml @@ -0,0 +1,85 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.hive</groupId> + <artifactId>hive</artifactId> + <version>2.0.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <artifactId>hive-storage-api</artifactId> + <packaging>jar</packaging> + <name>Hive Storage API</name> + + <properties> + <hive.path.to.root>..</hive.path.to.root> + </properties> + + <dependencies> + <!-- dependencies are always listed in sorted order by groupId, artifectId --> + <!-- inter-project --> + <dependency> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + <version>${log4j.version}</version> + </dependency> + <!-- test inter-project --> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>${junit.version}</version> + <scope>test</scope> + </dependency> + </dependencies> + + <profiles> + <profile> + <id>hadoop-1</id> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-core</artifactId> + <version>${hadoop-20S.version}</version> + <optional>true</optional> + </dependency> + </dependencies> + </profile> + <profile> + <id>hadoop-2</id> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop-23.version}</version> + <optional>true</optional> + </dependency> + </dependencies> + </profile> + </profiles> + + <build> + <sourceDirectory>${basedir}/src/java</sourceDirectory> + <testSourceDirectory>${basedir}/src/test</testSourceDirectory> + <testResources> + <testResource> + <directory>${basedir}/src/test/resources</directory> + </testResource> + </testResources> + </build> +</project> http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java new file mode 100644 index 0000000..7d7fb28 --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java @@ -0,0 +1,312 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.math.RoundingMode; + +/** + * + * HiveDecimal. Simple wrapper for BigDecimal. Adds fixed max precision and non scientific string + * representation + * + */ +public class HiveDecimal implements Comparable<HiveDecimal> { + public static final int MAX_PRECISION = 38; + public static final int MAX_SCALE = 38; + + /** + * Default precision/scale when user doesn't specify in the column metadata, such as + * decimal and decimal(8). + */ + public static final int USER_DEFAULT_PRECISION = 10; + public static final int USER_DEFAULT_SCALE = 0; + + /** + * Default precision/scale when system is not able to determine them, such as in case + * of a non-generic udf. + */ + public static final int SYSTEM_DEFAULT_PRECISION = 38; + public static final int SYSTEM_DEFAULT_SCALE = 18; + + public static final HiveDecimal ZERO = new HiveDecimal(BigDecimal.ZERO); + public static final HiveDecimal ONE = new HiveDecimal(BigDecimal.ONE); + + public static final int ROUND_FLOOR = BigDecimal.ROUND_FLOOR; + public static final int ROUND_CEILING = BigDecimal.ROUND_CEILING; + public static final int ROUND_HALF_UP = BigDecimal.ROUND_HALF_UP; + + private BigDecimal bd = BigDecimal.ZERO; + + private HiveDecimal(BigDecimal bd) { + this.bd = bd; + } + + public static HiveDecimal create(BigDecimal b) { + return create(b, true); + } + + public static HiveDecimal create(BigDecimal b, boolean allowRounding) { + BigDecimal bd = normalize(b, allowRounding); + return bd == null ? null : new HiveDecimal(bd); + } + + public static HiveDecimal create(BigInteger unscaled, int scale) { + BigDecimal bd = normalize(new BigDecimal(unscaled, scale), true); + return bd == null ? null : new HiveDecimal(bd); + } + + public static HiveDecimal create(String dec) { + BigDecimal bd; + try { + bd = new BigDecimal(dec.trim()); + } catch (NumberFormatException ex) { + return null; + } + + bd = normalize(bd, true); + return bd == null ? null : new HiveDecimal(bd); + } + + public static HiveDecimal create(BigInteger bi) { + BigDecimal bd = normalize(new BigDecimal(bi), true); + return bd == null ? null : new HiveDecimal(bd); + } + + public static HiveDecimal create(int i) { + return new HiveDecimal(new BigDecimal(i)); + } + + public static HiveDecimal create(long l) { + return new HiveDecimal(new BigDecimal(l)); + } + + @Override + public String toString() { + return bd.toPlainString(); + } + + public HiveDecimal setScale(int i) { + return new HiveDecimal(bd.setScale(i, RoundingMode.HALF_UP)); + } + + @Override + public int compareTo(HiveDecimal dec) { + return bd.compareTo(dec.bd); + } + + @Override + public int hashCode() { + return bd.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != getClass()) { + return false; + } + return bd.equals(((HiveDecimal) obj).bd); + } + + public int scale() { + return bd.scale(); + } + + /** + * Returns the number of digits (integer and fractional) in the number, which is equivalent + * to SQL decimal precision. Note that this is different from BigDecimal.precision(), + * which returns the precision of the unscaled value (BigDecimal.valueOf(0.01).precision() = 1, + * whereas HiveDecimal.create("0.01").precision() = 2). + * If you want the BigDecimal precision, use HiveDecimal.bigDecimalValue().precision() + * @return + */ + public int precision() { + int bdPrecision = bd.precision(); + int bdScale = bd.scale(); + + if (bdPrecision < bdScale) { + // This can happen for numbers less than 0.1 + // For 0.001234: bdPrecision=4, bdScale=6 + // In this case, we'll set the type to have the same precision as the scale. + return bdScale; + } + return bdPrecision; + } + + public int intValue() { + return bd.intValue(); + } + + public double doubleValue() { + return bd.doubleValue(); + } + + public long longValue() { + return bd.longValue(); + } + + public short shortValue() { + return bd.shortValue(); + } + + public float floatValue() { + return bd.floatValue(); + } + + public BigDecimal bigDecimalValue() { + return bd; + } + + public byte byteValue() { + return bd.byteValue(); + } + + public HiveDecimal setScale(int adjustedScale, int rm) { + return create(bd.setScale(adjustedScale, rm)); + } + + public HiveDecimal subtract(HiveDecimal dec) { + return create(bd.subtract(dec.bd)); + } + + public HiveDecimal multiply(HiveDecimal dec) { + return create(bd.multiply(dec.bd), false); + } + + public BigInteger unscaledValue() { + return bd.unscaledValue(); + } + + public HiveDecimal scaleByPowerOfTen(int n) { + return create(bd.scaleByPowerOfTen(n)); + } + + public HiveDecimal abs() { + return create(bd.abs()); + } + + public HiveDecimal negate() { + return create(bd.negate()); + } + + public HiveDecimal add(HiveDecimal dec) { + return create(bd.add(dec.bd)); + } + + public HiveDecimal pow(int n) { + BigDecimal result = normalize(bd.pow(n), false); + return result == null ? null : new HiveDecimal(result); + } + + public HiveDecimal remainder(HiveDecimal dec) { + return create(bd.remainder(dec.bd)); + } + + public HiveDecimal divide(HiveDecimal dec) { + return create(bd.divide(dec.bd, MAX_SCALE, RoundingMode.HALF_UP), true); + } + + /** + * Get the sign of the underlying decimal. + * @return 0 if the decimal is equal to 0, -1 if less than zero, and 1 if greater than 0 + */ + public int signum() { + return bd.signum(); + } + + private static BigDecimal trim(BigDecimal d) { + if (d.compareTo(BigDecimal.ZERO) == 0) { + // Special case for 0, because java doesn't strip zeros correctly on that number. + d = BigDecimal.ZERO; + } else { + d = d.stripTrailingZeros(); + if (d.scale() < 0) { + // no negative scale decimals + d = d.setScale(0); + } + } + return d; + } + + private static BigDecimal normalize(BigDecimal bd, boolean allowRounding) { + if (bd == null) { + return null; + } + + bd = trim(bd); + + int intDigits = bd.precision() - bd.scale(); + + if (intDigits > MAX_PRECISION) { + return null; + } + + int maxScale = Math.min(MAX_SCALE, Math.min(MAX_PRECISION - intDigits, bd.scale())); + if (bd.scale() > maxScale ) { + if (allowRounding) { + bd = bd.setScale(maxScale, RoundingMode.HALF_UP); + // Trimming is again necessary, because rounding may introduce new trailing 0's. + bd = trim(bd); + } else { + bd = null; + } + } + + return bd; + } + + public static BigDecimal enforcePrecisionScale(BigDecimal bd, int maxPrecision, int maxScale) { + if (bd == null) { + return null; + } + + bd = trim(bd); + + if (bd.scale() > maxScale) { + bd = bd.setScale(maxScale, RoundingMode.HALF_UP); + } + + int maxIntDigits = maxPrecision - maxScale; + int intDigits = bd.precision() - bd.scale(); + if (intDigits > maxIntDigits) { + return null; + } + + return bd; + } + + public static HiveDecimal enforcePrecisionScale(HiveDecimal dec, int maxPrecision, int maxScale) { + if (dec == null) { + return null; + } + + // Minor optimization, avoiding creating new objects. + if (dec.precision() - dec.scale() <= maxPrecision - maxScale && + dec.scale() <= maxScale) { + return dec; + } + + BigDecimal bd = enforcePrecisionScale(dec.bd, maxPrecision, maxScale); + if (bd == null) { + return null; + } + + return HiveDecimal.create(bd); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java new file mode 100644 index 0000000..02c52fa --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -0,0 +1,322 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +/** + * This class supports string and binary data by value reference -- i.e. each field is + * explicitly present, as opposed to provided by a dictionary reference. + * In some cases, all the values will be in the same byte array to begin with, + * but this need not be the case. If each value is in a separate byte + * array to start with, or not all of the values are in the same original + * byte array, you can still assign data by reference into this column vector. + * This gives flexibility to use this in multiple situations. + * <p> + * When setting data by reference, the caller + * is responsible for allocating the byte arrays used to hold the data. + * You can also set data by value, as long as you call the initBuffer() method first. + * You can mix "by value" and "by reference" in the same column vector, + * though that use is probably not typical. + */ +public class BytesColumnVector extends ColumnVector { + public byte[][] vector; + public int[] start; // start offset of each field + + /* + * The length of each field. If the value repeats for every entry, then it is stored + * in vector[0] and isRepeating from the superclass is set to true. + */ + public int[] length; + private byte[] buffer; // optional buffer to use when actually copying in data + private int nextFree; // next free position in buffer + + // Estimate that there will be 16 bytes per entry + static final int DEFAULT_BUFFER_SIZE = 16 * VectorizedRowBatch.DEFAULT_SIZE; + + // Proportion of extra space to provide when allocating more buffer space. + static final float EXTRA_SPACE_FACTOR = (float) 1.2; + + /** + * Use this constructor for normal operation. + * All column vectors should be the default size normally. + */ + public BytesColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Don't call this constructor except for testing purposes. + * + * @param size number of elements in the column vector + */ + public BytesColumnVector(int size) { + super(size); + vector = new byte[size][]; + start = new int[size]; + length = new int[size]; + } + + /** + * Additional reset work for BytesColumnVector (releasing scratch bytes for by value strings). + */ + @Override + public void reset() { + super.reset(); + initBuffer(0); + } + + /** Set a field by reference. + * + * @param elementNum index within column vector to set + * @param sourceBuf container of source data + * @param start start byte position within source + * @param length length of source byte sequence + */ + public void setRef(int elementNum, byte[] sourceBuf, int start, int length) { + vector[elementNum] = sourceBuf; + this.start[elementNum] = start; + this.length[elementNum] = length; + } + + /** + * You must call initBuffer first before using setVal(). + * Provide the estimated number of bytes needed to hold + * a full column vector worth of byte string data. + * + * @param estimatedValueSize Estimated size of buffer space needed + */ + public void initBuffer(int estimatedValueSize) { + nextFree = 0; + + // if buffer is already allocated, keep using it, don't re-allocate + if (buffer != null) { + return; + } + + // allocate a little extra space to limit need to re-allocate + int bufferSize = this.vector.length * (int)(estimatedValueSize * EXTRA_SPACE_FACTOR); + if (bufferSize < DEFAULT_BUFFER_SIZE) { + bufferSize = DEFAULT_BUFFER_SIZE; + } + buffer = new byte[bufferSize]; + } + + /** + * Initialize buffer to default size. + */ + public void initBuffer() { + initBuffer(0); + } + + /** + * @return amount of buffer space currently allocated + */ + public int bufferSize() { + if (buffer == null) { + return 0; + } + return buffer.length; + } + + /** + * Set a field by actually copying in to a local buffer. + * If you must actually copy data in to the array, use this method. + * DO NOT USE this method unless it's not practical to set data by reference with setRef(). + * Setting data by reference tends to run a lot faster than copying data in. + * + * @param elementNum index within column vector to set + * @param sourceBuf container of source data + * @param start start byte position within source + * @param length length of source byte sequence + */ + public void setVal(int elementNum, byte[] sourceBuf, int start, int length) { + if ((nextFree + length) > buffer.length) { + increaseBufferSpace(length); + } + System.arraycopy(sourceBuf, start, buffer, nextFree, length); + vector[elementNum] = buffer; + this.start[elementNum] = nextFree; + this.length[elementNum] = length; + nextFree += length; + } + + /** + * Set a field to the concatenation of two string values. Result data is copied + * into the internal buffer. + * + * @param elementNum index within column vector to set + * @param leftSourceBuf container of left argument + * @param leftStart start of left argument + * @param leftLen length of left argument + * @param rightSourceBuf container of right argument + * @param rightStart start of right argument + * @param rightLen length of right arugment + */ + public void setConcat(int elementNum, byte[] leftSourceBuf, int leftStart, int leftLen, + byte[] rightSourceBuf, int rightStart, int rightLen) { + int newLen = leftLen + rightLen; + if ((nextFree + newLen) > buffer.length) { + increaseBufferSpace(newLen); + } + vector[elementNum] = buffer; + this.start[elementNum] = nextFree; + this.length[elementNum] = newLen; + + System.arraycopy(leftSourceBuf, leftStart, buffer, nextFree, leftLen); + nextFree += leftLen; + System.arraycopy(rightSourceBuf, rightStart, buffer, nextFree, rightLen); + nextFree += rightLen; + } + + /** + * Increase buffer space enough to accommodate next element. + * This uses an exponential increase mechanism to rapidly + * increase buffer size to enough to hold all data. + * As batches get re-loaded, buffer space allocated will quickly + * stabilize. + * + * @param nextElemLength size of next element to be added + */ + public void increaseBufferSpace(int nextElemLength) { + + // Keep doubling buffer size until there will be enough space for next element. + int newLength = 2 * buffer.length; + while((nextFree + nextElemLength) > newLength) { + newLength *= 2; + } + + // Allocate new buffer, copy data to it, and set buffer to new buffer. + byte[] newBuffer = new byte[newLength]; + System.arraycopy(buffer, 0, newBuffer, 0, nextFree); + buffer = newBuffer; + } + + /** Copy the current object contents into the output. Only copy selected entries, + * as indicated by selectedInUse and the sel array. + */ + public void copySelected( + boolean selectedInUse, int[] sel, int size, BytesColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.setVal(0, vector[0], start[0], length[0]); + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.setVal(i, vector[i], start[i], length[i]); + } + } + else { + for (int i = 0; i < size; i++) { + output.setVal(i, vector[i], start[i], length[i]); + } + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + /** Simplify vector by brute-force flattening noNulls and isRepeating + * This can be used to reduce combinatorial explosion of code paths in VectorExpressions + * with many arguments, at the expense of loss of some performance. + */ + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + + // setRef is used below and this is safe, because the reference + // is to data owned by this column vector. If this column vector + // gets re-used, the whole thing is re-used together so there + // is no danger of a dangling reference. + + // Only copy data values if entry is not null. The string value + // at position 0 is undefined if the position 0 value is null. + if (noNulls || !isNull[0]) { + + // loops start at position 1 because position 0 is already set + if (selectedInUse) { + for (int j = 1; j < size; j++) { + int i = sel[j]; + this.setRef(i, vector[0], start[0], length[0]); + } + } else { + for (int i = 1; i < size; i++) { + this.setRef(i, vector[0], start[0], length[0]); + } + } + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + // Fill the all the vector entries with provided value + public void fill(byte[] value) { + noNulls = true; + isRepeating = true; + setRef(0, value, 0, value.length); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + BytesColumnVector in = (BytesColumnVector) inputVector; + setVal(outElementNum, in.vector[inputElementNum], in.start[inputElementNum], in.length[inputElementNum]); + } + + @Override + public void init() { + initBuffer(0); + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append('"'); + buffer.append(new String(this.buffer, start[row], length[row])); + buffer.append('"'); + } else { + buffer.append("null"); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java new file mode 100644 index 0000000..cb75c2c --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -0,0 +1,173 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Arrays; + +/** + * ColumnVector contains the shared structure for the sub-types, + * including NULL information, and whether this vector + * repeats, i.e. has all values the same, so only the first + * one is set. This is used to accelerate query performance + * by handling a whole vector in O(1) time when applicable. + * + * The fields are public by design since this is a performance-critical + * structure that is used in the inner loop of query execution. + */ +public abstract class ColumnVector { + + /* + * The current kinds of column vectors. + */ + public static enum Type { + LONG, + DOUBLE, + BYTES, + DECIMAL + } + + /* + * If hasNulls is true, then this array contains true if the value + * is null, otherwise false. The array is always allocated, so a batch can be re-used + * later and nulls added. + */ + public boolean[] isNull; + + // If the whole column vector has no nulls, this is true, otherwise false. + public boolean noNulls; + + /* + * True if same value repeats for whole column vector. + * If so, vector[0] holds the repeating value. + */ + public boolean isRepeating; + + // Variables to hold state from before flattening so it can be easily restored. + private boolean preFlattenIsRepeating; + private boolean preFlattenNoNulls; + + /** + * Constructor for super-class ColumnVector. This is not called directly, + * but used to initialize inherited fields. + * + * @param len Vector length + */ + public ColumnVector(int len) { + isNull = new boolean[len]; + noNulls = true; + isRepeating = false; + } + + /** + * Resets the column to default state + * - fills the isNull array with false + * - sets noNulls to true + * - sets isRepeating to false + */ + public void reset() { + if (false == noNulls) { + Arrays.fill(isNull, false); + } + noNulls = true; + isRepeating = false; + } + + abstract public void flatten(boolean selectedInUse, int[] sel, int size); + + // Simplify vector by brute-force flattening noNulls if isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flattenRepeatingNulls(boolean selectedInUse, int[] sel, int size) { + + boolean nullFillValue; + + if (noNulls) { + nullFillValue = false; + } else { + nullFillValue = isNull[0]; + } + + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + isNull[i] = nullFillValue; + } + } else { + Arrays.fill(isNull, 0, size, nullFillValue); + } + + // all nulls are now explicit + noNulls = false; + } + + public void flattenNoNulls(boolean selectedInUse, int[] sel, int size) { + if (noNulls) { + noNulls = false; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + isNull[i] = false; + } + } else { + Arrays.fill(isNull, 0, size, false); + } + } + } + + /** + * Restore the state of isRepeating and noNulls to what it was + * before flattening. This must only be called just after flattening + * and then evaluating a VectorExpression on the column vector. + * It is an optimization that allows other operations on the same + * column to continue to benefit from the isRepeating and noNulls + * indicators. + */ + public void unFlatten() { + isRepeating = preFlattenIsRepeating; + noNulls = preFlattenNoNulls; + } + + // Record repeating and no nulls state to be restored later. + protected void flattenPush() { + preFlattenIsRepeating = isRepeating; + preFlattenNoNulls = noNulls; + } + + /** + * Set the element in this column vector from the given input vector. + */ + public abstract void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector); + + /** + * Initialize the column vector. This method can be overridden by specific column vector types. + * Use this method only if the individual type of the column vector is not known, otherwise its + * preferable to call specific initialization methods. + */ + public void init() { + // Do nothing by default + } + + /** + * Print the value for this column into the given string builder. + * @param buffer the buffer to print into + * @param row the id of the row to print + */ + public abstract void stringifyValue(StringBuilder buffer, + int row); + } http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java new file mode 100644 index 0000000..74a9d5f --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.math.BigInteger; + +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.common.type.HiveDecimal; + +public class DecimalColumnVector extends ColumnVector { + + /** + * A vector of HiveDecimalWritable objects. + * + * For high performance and easy access to this low-level structure, + * the fields are public by design (as they are in other ColumnVector + * types). + */ + public HiveDecimalWritable[] vector; + public short scale; + public short precision; + + public DecimalColumnVector(int precision, int scale) { + this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale); + } + + public DecimalColumnVector(int size, int precision, int scale) { + super(size); + this.precision = (short) precision; + this.scale = (short) scale; + vector = new HiveDecimalWritable[size]; + for (int i = 0; i < size; i++) { + vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO); + } + } + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + // TODO Auto-generated method stub + } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + HiveDecimal hiveDec = ((DecimalColumnVector) inputVector).vector[inputElementNum].getHiveDecimal(precision, scale); + if (hiveDec == null) { + noNulls = false; + isNull[outElementNum] = true; + } else { + vector[outElementNum].set(hiveDec); + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append(vector[row].toString()); + } else { + buffer.append("null"); + } + } + + public void set(int elementNum, HiveDecimalWritable writeable) { + HiveDecimal hiveDec = writeable.getHiveDecimal(precision, scale); + if (hiveDec == null) { + noNulls = false; + isNull[elementNum] = true; + } else { + vector[elementNum].set(hiveDec); + } + } + + public void set(int elementNum, HiveDecimal hiveDec) { + HiveDecimal checkedDec = HiveDecimal.enforcePrecisionScale(hiveDec, precision, scale); + if (checkedDec == null) { + noNulls = false; + isNull[elementNum] = true; + } else { + vector[elementNum].set(checkedDec); + } + } + + public void setNullDataValue(int elementNum) { + // E.g. For scale 2 the minimum is "0.01" + HiveDecimal minimumNonZeroValue = HiveDecimal.create(BigInteger.ONE, scale); + vector[elementNum].set(minimumNonZeroValue); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java new file mode 100644 index 0000000..4a7811d --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Arrays; + +/** + * This class represents a nullable double precision floating point column vector. + * This class will be used for operations on all floating point types (float, double) + * and as such will use a 64-bit double value to hold the biggest possible value. + * During copy-in/copy-out, smaller types (i.e. float) will be converted as needed. This will + * reduce the amount of code that needs to be generated and also will run fast since the + * machine operates with 64-bit words. + * + * The vector[] field is public by design for high-performance access in the inner + * loop of query execution. + */ +public class DoubleColumnVector extends ColumnVector { + public double[] vector; + public static final double NULL_VALUE = Double.NaN; + + /** + * Use this constructor by default. All column vectors + * should normally be the default size. + */ + public DoubleColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Don't use this except for testing purposes. + * + * @param len + */ + public DoubleColumnVector(int len) { + super(len); + vector = new double[len]; + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + System.arraycopy(vector, 0, output.vector, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Fill the column vector with the provided value + public void fill(double value) { + noNulls = true; + isRepeating = true; + vector[0] = value; + } + + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + double repeatVal = vector[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + vector[i] = repeatVal; + } + } else { + Arrays.fill(vector, 0, size, repeatVal); + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + vector[outElementNum] = ((DoubleColumnVector) inputVector).vector[inputElementNum]; + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append(vector[row]); + } else { + buffer.append("null"); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java new file mode 100644 index 0000000..5702584 --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -0,0 +1,189 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Arrays; + +/** + * This class represents a nullable int column vector. + * This class will be used for operations on all integer types (tinyint, smallint, int, bigint) + * and as such will use a 64-bit long value to hold the biggest possible value. + * During copy-in/copy-out, smaller int types will be converted as needed. This will + * reduce the amount of code that needs to be generated and also will run fast since the + * machine operates with 64-bit words. + * + * The vector[] field is public by design for high-performance access in the inner + * loop of query execution. + */ +public class LongColumnVector extends ColumnVector { + public long[] vector; + public static final long NULL_VALUE = 1; + + /** + * Use this constructor by default. All column vectors + * should normally be the default size. + */ + public LongColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Don't use this except for testing purposes. + * + * @param len the number of rows + */ + public LongColumnVector(int len) { + super(len); + vector = new long[len]; + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, LongColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + System.arraycopy(vector, 0, output.vector, 0, size); + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Copy the current object contents into the output. Only copy selected entries, + // as indicated by selectedInUse and the sel array. + public void copySelected( + boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) { + + // Output has nulls if and only if input has nulls. + output.noNulls = noNulls; + output.isRepeating = false; + + // Handle repeating case + if (isRepeating) { + output.vector[0] = vector[0]; // automatic conversion to double is done here + output.isNull[0] = isNull[0]; + output.isRepeating = true; + return; + } + + // Handle normal case + + // Copy data values over + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.vector[i] = vector[i]; + } + } + else { + for(int i = 0; i < size; ++i) { + output.vector[i] = vector[i]; + } + } + + // Copy nulls over if needed + if (!noNulls) { + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + output.isNull[i] = isNull[i]; + } + } + else { + System.arraycopy(isNull, 0, output.isNull, 0, size); + } + } + } + + // Fill the column vector with the provided value + public void fill(long value) { + noNulls = true; + isRepeating = true; + vector[0] = value; + } + + // Simplify vector by brute-force flattening noNulls and isRepeating + // This can be used to reduce combinatorial explosion of code paths in VectorExpressions + // with many arguments. + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + if (isRepeating) { + isRepeating = false; + long repeatVal = vector[0]; + if (selectedInUse) { + for (int j = 0; j < size; j++) { + int i = sel[j]; + vector[i] = repeatVal; + } + } else { + Arrays.fill(vector, 0, size, repeatVal); + } + flattenRepeatingNulls(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { + vector[outElementNum] = ((LongColumnVector) inputVector).vector[inputElementNum]; + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append(vector[row]); + } else { + buffer.append("null"); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java new file mode 100644 index 0000000..7c18da6 --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -0,0 +1,186 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; + +/** + * A VectorizedRowBatch is a set of rows, organized with each column + * as a vector. It is the unit of query execution, organized to minimize + * the cost per row and achieve high cycles-per-instruction. + * The major fields are public by design to allow fast and convenient + * access by the vectorized query execution code. + */ +public class VectorizedRowBatch implements Writable { + public int numCols; // number of columns + public ColumnVector[] cols; // a vector for each column + public int size; // number of rows that qualify (i.e. haven't been filtered out) + public int[] selected; // array of positions of selected values + public int[] projectedColumns; + public int projectionSize; + + /* + * If no filtering has been applied yet, selectedInUse is false, + * meaning that all rows qualify. If it is true, then the selected[] array + * records the offsets of qualifying rows. + */ + public boolean selectedInUse; + + // If this is true, then there is no data in the batch -- we have hit the end of input. + public boolean endOfFile; + + /* + * This number is carefully chosen to minimize overhead and typically allows + * one VectorizedRowBatch to fit in cache. + */ + public static final int DEFAULT_SIZE = 1024; + + /** + * Return a batch with the specified number of columns. + * This is the standard constructor -- all batches should be the same size + * + * @param numCols the number of columns to include in the batch + */ + public VectorizedRowBatch(int numCols) { + this(numCols, DEFAULT_SIZE); + } + + /** + * Return a batch with the specified number of columns and rows. + * Only call this constructor directly for testing purposes. + * Batch size should normally always be defaultSize. + * + * @param numCols the number of columns to include in the batch + * @param size the number of rows to include in the batch + */ + public VectorizedRowBatch(int numCols, int size) { + this.numCols = numCols; + this.size = size; + selected = new int[size]; + selectedInUse = false; + this.cols = new ColumnVector[numCols]; + projectedColumns = new int[numCols]; + + // Initially all columns are projected and in the same order + projectionSize = numCols; + for (int i = 0; i < numCols; i++) { + projectedColumns[i] = i; + } + } + + /** + * Returns the maximum size of the batch (number of rows it can hold) + */ + public int getMaxSize() { + return selected.length; + } + + /** + * Return count of qualifying rows. + * + * @return number of rows that have not been filtered out + */ + public long count() { + return size; + } + + private static String toUTF8(Object o) { + if(o == null || o instanceof NullWritable) { + return "\\N"; /* as found in LazySimpleSerDe's nullSequence */ + } + return o.toString(); + } + + @Override + public String toString() { + if (size == 0) { + return ""; + } + StringBuilder b = new StringBuilder(); + if (this.selectedInUse) { + for (int j = 0; j < size; j++) { + int i = selected[j]; + b.append('['); + for (int k = 0; k < projectionSize; k++) { + int projIndex = projectedColumns[k]; + ColumnVector cv = cols[projIndex]; + if (k > 0) { + b.append(", "); + } + cv.stringifyValue(b, i); + } + b.append(']'); + if (j < size - 1) { + b.append('\n'); + } + } + } else { + for (int i = 0; i < size; i++) { + b.append('['); + for (int k = 0; k < projectionSize; k++) { + int projIndex = projectedColumns[k]; + ColumnVector cv = cols[projIndex]; + if (k > 0) { + b.append(", "); + } + cv.stringifyValue(b, i); + } + b.append(']'); + if (i < size - 1) { + b.append('\n'); + } + } + } + return b.toString(); + } + + @Override + public void readFields(DataInput arg0) throws IOException { + throw new UnsupportedOperationException("Do you really need me?"); + } + + @Override + public void write(DataOutput arg0) throws IOException { + throw new UnsupportedOperationException("Don't call me"); + } + + /** + * Resets the row batch to default state + * - sets selectedInUse to false + * - sets size to 0 + * - sets endOfFile to false + * - resets each column + * - inits each column + */ + public void reset() { + selectedInUse = false; + size = 0; + endOfFile = false; + for (ColumnVector vc : cols) { + if (vc != null) { + vc.reset(); + vc.init(); + } + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java new file mode 100644 index 0000000..577d95d --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/ExpressionTree.java @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.sarg; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * The inner representation of the SearchArgument. Most users should not + * need this interface, it is only for file formats that need to translate + * the SearchArgument into an internal form. + */ +public class ExpressionTree { + public enum Operator {OR, AND, NOT, LEAF, CONSTANT} + private final Operator operator; + private final List<ExpressionTree> children; + private final int leaf; + private final SearchArgument.TruthValue constant; + + ExpressionTree() { + operator = null; + children = null; + leaf = 0; + constant = null; + } + + ExpressionTree(Operator op, ExpressionTree... kids) { + operator = op; + children = new ArrayList<ExpressionTree>(); + leaf = -1; + this.constant = null; + Collections.addAll(children, kids); + } + + ExpressionTree(int leaf) { + operator = Operator.LEAF; + children = null; + this.leaf = leaf; + this.constant = null; + } + + ExpressionTree(SearchArgument.TruthValue constant) { + operator = Operator.CONSTANT; + children = null; + this.leaf = -1; + this.constant = constant; + } + + ExpressionTree(ExpressionTree other) { + this.operator = other.operator; + if (other.children == null) { + this.children = null; + } else { + this.children = new ArrayList<ExpressionTree>(); + for(ExpressionTree child: other.children) { + children.add(new ExpressionTree(child)); + } + } + this.leaf = other.leaf; + this.constant = other.constant; + } + + public SearchArgument.TruthValue evaluate(SearchArgument.TruthValue[] leaves + ) { + SearchArgument.TruthValue result = null; + switch (operator) { + case OR: + for(ExpressionTree child: children) { + result = child.evaluate(leaves).or(result); + } + return result; + case AND: + for(ExpressionTree child: children) { + result = child.evaluate(leaves).and(result); + } + return result; + case NOT: + return children.get(0).evaluate(leaves).not(); + case LEAF: + return leaves[leaf]; + case CONSTANT: + return constant; + default: + throw new IllegalStateException("Unknown operator: " + operator); + } + } + + @Override + public String toString() { + StringBuilder buffer = new StringBuilder(); + switch (operator) { + case OR: + buffer.append("(or"); + for(ExpressionTree child: children) { + buffer.append(' '); + buffer.append(child.toString()); + } + buffer.append(')'); + break; + case AND: + buffer.append("(and"); + for(ExpressionTree child: children) { + buffer.append(' '); + buffer.append(child.toString()); + } + buffer.append(')'); + break; + case NOT: + buffer.append("(not "); + buffer.append(children.get(0)); + buffer.append(')'); + break; + case LEAF: + buffer.append("leaf-"); + buffer.append(leaf); + break; + case CONSTANT: + buffer.append(constant); + break; + } + return buffer.toString(); + } + + public Operator getOperator() { + return operator; + } + + public List<ExpressionTree> getChildren() { + return children; + } + + public SearchArgument.TruthValue getConstant() { + return constant; + } + + public int getLeaf() { + return leaf; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java new file mode 100644 index 0000000..3a92565 --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.sarg; + +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; + +import java.sql.Date; +import java.sql.Timestamp; +import java.util.List; + +/** + * The primitive predicates that form a SearchArgument. + */ +public interface PredicateLeaf { + + /** + * The possible operators for predicates. To get the opposites, construct + * an expression with a not operator. + */ + public static enum Operator { + EQUALS, + NULL_SAFE_EQUALS, + LESS_THAN, + LESS_THAN_EQUALS, + IN, + BETWEEN, + IS_NULL + } + + /** + * The possible types for sargs. + */ + public static enum Type { + INTEGER(Integer.class), // all of the integer types except long + LONG(Long.class), + FLOAT(Double.class), // float and double + STRING(String.class), // string, char, varchar + DATE(Date.class), + DECIMAL(HiveDecimalWritable.class), + TIMESTAMP(Timestamp.class), + BOOLEAN(Boolean.class); + + private final Class cls; + Type(Class cls) { + this.cls = cls; + } + + /** + * For all SARG leaves, the values must be the matching class. + * @return the value class + */ + public Class getValueClass() { + return cls; + } + } + + /** + * Get the operator for the leaf. + */ + public Operator getOperator(); + + /** + * Get the type of the column and literal by the file format. + */ + public Type getType(); + + /** + * Get the simple column name. + * @return the column name + */ + public String getColumnName(); + + /** + * Get the literal half of the predicate leaf. Adapt the original type for what orc needs + * + * @return an Integer, Long, Double, or String + */ + public Object getLiteral(); + + /** + * For operators with multiple literals (IN and BETWEEN), get the literals. + * + * @return the list of literals (Integer, Longs, Doubles, or Strings) + * + */ + public List<Object> getLiteralList(); + +} http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java new file mode 100644 index 0000000..d70b3b0 --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java @@ -0,0 +1,287 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.sarg; + +import java.util.List; + +/** + * Primary interface for <a href="http://en.wikipedia.org/wiki/Sargable"> + * SearchArgument</a>, which are the subset of predicates + * that can be pushed down to the RecordReader. Each SearchArgument consists + * of a series of SearchClauses that must each be true for the row to be + * accepted by the filter. + * + * This requires that the filter be normalized into conjunctive normal form + * (<a href="http://en.wikipedia.org/wiki/Conjunctive_normal_form">CNF</a>). + */ +public interface SearchArgument { + + /** + * The potential result sets of logical operations. + */ + public static enum TruthValue { + YES, NO, NULL, YES_NULL, NO_NULL, YES_NO, YES_NO_NULL; + + /** + * Compute logical or between the two values. + * @param right the other argument or null + * @return the result + */ + public TruthValue or(TruthValue right) { + if (right == null || right == this) { + return this; + } + if (right == YES || this == YES) { + return YES; + } + if (right == YES_NULL || this == YES_NULL) { + return YES_NULL; + } + if (right == NO) { + return this; + } + if (this == NO) { + return right; + } + if (this == NULL) { + if (right == NO_NULL) { + return NULL; + } else { + return YES_NULL; + } + } + if (right == NULL) { + if (this == NO_NULL) { + return NULL; + } else { + return YES_NULL; + } + } + return YES_NO_NULL; + } + + /** + * Compute logical AND between the two values. + * @param right the other argument or null + * @return the result + */ + public TruthValue and(TruthValue right) { + if (right == null || right == this) { + return this; + } + if (right == NO || this == NO) { + return NO; + } + if (right == NO_NULL || this == NO_NULL) { + return NO_NULL; + } + if (right == YES) { + return this; + } + if (this == YES) { + return right; + } + if (this == NULL) { + if (right == YES_NULL) { + return NULL; + } else { + return NO_NULL; + } + } + if (right == NULL) { + if (this == YES_NULL) { + return NULL; + } else { + return NO_NULL; + } + } + return YES_NO_NULL; + } + + public TruthValue not() { + switch (this) { + case NO: + return YES; + case YES: + return NO; + case NULL: + case YES_NO: + case YES_NO_NULL: + return this; + case NO_NULL: + return YES_NULL; + case YES_NULL: + return NO_NULL; + default: + throw new IllegalArgumentException("Unknown value: " + this); + } + } + + /** + * Does the RecordReader need to include this set of records? + * @return true unless none of the rows qualify + */ + public boolean isNeeded() { + switch (this) { + case NO: + case NULL: + case NO_NULL: + return false; + default: + return true; + } + } + } + + /** + * Get the leaf predicates that are required to evaluate the predicate. The + * list will have the duplicates removed. + * @return the list of leaf predicates + */ + public List<PredicateLeaf> getLeaves(); + + /** + * Get the expression tree. This should only needed for file formats that + * need to translate the expression to an internal form. + */ + public ExpressionTree getExpression(); + + /** + * Evaluate the entire predicate based on the values for the leaf predicates. + * @param leaves the value of each leaf predicate + * @return the value of hte entire predicate + */ + public TruthValue evaluate(TruthValue[] leaves); + + /** + * A builder object for contexts outside of Hive where it isn't easy to + * get a ExprNodeDesc. The user must call startOr, startAnd, or startNot + * before adding any leaves. + */ + public interface Builder { + + /** + * Start building an or operation and push it on the stack. + * @return this + */ + public Builder startOr(); + + /** + * Start building an and operation and push it on the stack. + * @return this + */ + public Builder startAnd(); + + /** + * Start building a not operation and push it on the stack. + * @return this + */ + public Builder startNot(); + + /** + * Finish the current operation and pop it off of the stack. Each start + * call must have a matching end. + * @return this + */ + public Builder end(); + + /** + * Add a less than leaf to the current item on the stack. + * @param column the name of the column + * @param type the type of the expression + * @param literal the literal + * @return this + */ + public Builder lessThan(String column, PredicateLeaf.Type type, + Object literal); + + /** + * Add a less than equals leaf to the current item on the stack. + * @param column the name of the column + * @param type the type of the expression + * @param literal the literal + * @return this + */ + public Builder lessThanEquals(String column, PredicateLeaf.Type type, + Object literal); + + /** + * Add an equals leaf to the current item on the stack. + * @param column the name of the column + * @param type the type of the expression + * @param literal the literal + * @return this + */ + public Builder equals(String column, PredicateLeaf.Type type, + Object literal); + + /** + * Add a null safe equals leaf to the current item on the stack. + * @param column the name of the column + * @param type the type of the expression + * @param literal the literal + * @return this + */ + public Builder nullSafeEquals(String column, PredicateLeaf.Type type, + Object literal); + + /** + * Add an in leaf to the current item on the stack. + * @param column the name of the column + * @param type the type of the expression + * @param literal the literal + * @return this + */ + public Builder in(String column, PredicateLeaf.Type type, + Object... literal); + + /** + * Add an is null leaf to the current item on the stack. + * @param column the name of the column + * @param type the type of the expression + * @return this + */ + public Builder isNull(String column, PredicateLeaf.Type type); + + /** + * Add a between leaf to the current item on the stack. + * @param column the name of the column + * @param type the type of the expression + * @param lower the literal + * @param upper the literal + * @return this + */ + public Builder between(String column, PredicateLeaf.Type type, + Object lower, Object upper); + + /** + * Add a truth value to the expression. + * @param truth + * @return this + */ + public Builder literal(TruthValue truth); + + /** + * Build and return the SearchArgument that has been defined. All of the + * starts must have been ended before this call. + * @return the new SearchArgument + */ + public SearchArgument build(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/9ae70cb4/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java new file mode 100644 index 0000000..0778935 --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentFactory.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.sarg; + +/** + * A factory for creating SearchArguments. + */ +public class SearchArgumentFactory { + public static SearchArgument.Builder newBuilder() { + return new SearchArgumentImpl.BuilderImpl(); + } +}