Repository: hive Updated Branches: refs/heads/master 1912d19f2 -> 3f5659f02
HIVE-10235 Loop optimization for SIMD in ColumnDivideColumn.txt (chengxiang, reviewed by Gopal V) Signed-off-by: chengxiang <chengxi...@apache.com> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3f5659f0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3f5659f0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3f5659f0 Branch: refs/heads/master Commit: 3f5659f0234806f464c2f03f5c1a99ea1af13227 Parents: 1912d19 Author: chengxiang li <chengxiang...@intel.com> Authored: Fri Apr 24 15:01:06 2015 +0800 Committer: chengxiang <chengxi...@apache.com> Committed: Thu Apr 30 09:51:31 2015 +0800 ---------------------------------------------------------------------- .../vectorization/VectorizationBench.java | 146 +++++++++++-------- .../ExpressionTemplates/ColumnDivideColumn.txt | 26 ++-- 2 files changed, 101 insertions(+), 71 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3f5659f0/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java ---------------------------------------------------------------------- diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java index a131a71..391d052 100644 --- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java @@ -20,23 +20,17 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddLongColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideDoubleColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddDoubleColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColDivideDoubleColumn; +import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; @@ -70,21 +64,18 @@ public class VectorizationBench { * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.vectorization VectorizationBench * -wi 10 -i 5 -f 2 -bm avgt -tu us */ - private static LongColumnVector longColumnVector = new LongColumnVector(); - private static LongColumnVector dupLongColumnVector = new LongColumnVector(); - private static DoubleColumnVector doubleColumnVector = new DoubleColumnVector(); - private static DoubleColumnVector dupDoubleColumnVector = new DoubleColumnVector(); @BenchmarkMode(Mode.AverageTime) @Fork(1) @State(Scope.Thread) @OutputTimeUnit(TimeUnit.NANOSECONDS) public static abstract class AbstractExpression { + private static final int DEFAULT_ITER_TIME = 1000000; protected VectorExpression expression; protected VectorizedRowBatch rowBatch; protected VectorizedRowBatch buildRowBatch(ColumnVector output, int colNum, ColumnVector... - cols) { + cols) { VectorizedRowBatch rowBatch = new VectorizedRowBatch(colNum + 1); for (int i = 0; i < cols.length; i++) { rowBatch.cols[i] = cols[i]; @@ -100,94 +91,127 @@ public class VectorizationBench { @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) @Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS) public void bench() { - expression.evaluate(rowBatch); + for (int i = 0; i < DEFAULT_ITER_TIME; i++) { + expression.evaluate(rowBatch); + } } - } - public static class DoubleAddDoubleExpr extends AbstractExpression { - @Override - public void setup() { - rowBatch = buildRowBatch(new DoubleColumnVector(), 2, doubleColumnVector, - dupDoubleColumnVector); - expression = new DoubleColAddDoubleColumn(0, 1, 2); + protected LongColumnVector getLongColumnVector() { + LongColumnVector columnVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + Random random = new Random(); + for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { + columnVector.vector[i] = random.nextLong(); + } + return columnVector; } - } - public static class LongAddLongExpr extends AbstractExpression { - @Override - public void setup() { - rowBatch = buildRowBatch(new LongColumnVector(), 2, longColumnVector, dupLongColumnVector); - expression = new LongColAddLongColumn(0, 1, 2); + protected LongColumnVector getRepeatingLongColumnVector() { + LongColumnVector columnVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + columnVector.fill(2); + return columnVector; } + + protected LongColumnVector getLongColumnVectorWithNull() { + LongColumnVector columnVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + columnVector.noNulls = false; + Random random = new Random(); + for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { + if (i % 100 == 0) { + columnVector.isNull[i] = true; + } + columnVector.vector[i] = random.nextLong(); + } + return columnVector; + } + + protected DoubleColumnVector getDoubleColumnVector() { + DoubleColumnVector columnVector = new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + Random random = new Random(); + for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { + columnVector.vector[i] = random.nextDouble(); + } + return columnVector; + } + + protected DoubleColumnVector getRepeatingDoubleColumnVector() { + DoubleColumnVector columnVector = new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + columnVector.fill(2.0d); + return columnVector; + } + + protected DoubleColumnVector getDoubleColumnVectorWithNull() { + DoubleColumnVector columnVector = new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + columnVector.noNulls = false; + Random random = new Random(); + for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { + if (i % 100 == 0) { + columnVector.isNull[i] = true; + } + columnVector.vector[i] = random.nextDouble(); + } + return columnVector; + } + } - public static class LongAddDoubleExpr extends AbstractExpression { + public static class DoubleColAddRepeatingDoubleColumnBench extends AbstractExpression { @Override public void setup() { - rowBatch = buildRowBatch(new DoubleColumnVector(), 2, longColumnVector, doubleColumnVector); - expression = new LongColAddDoubleColumn(0, 1, 2); + rowBatch = buildRowBatch(new DoubleColumnVector(), 2, getDoubleColumnVector(), + getRepeatingDoubleColumnVector()); + expression = new DoubleColAddDoubleColumn(0, 1, 2); } } - public static class DoubleAddLongExpr extends AbstractExpression { + public static class LongColAddRepeatingLongColumnBench extends AbstractExpression { @Override public void setup() { - rowBatch = buildRowBatch(new DoubleColumnVector(), 2, doubleColumnVector, longColumnVector); - expression = new DoubleColAddLongColumn(0, 1, 2); + rowBatch = buildRowBatch(new LongColumnVector(), 2, getLongColumnVector(), + getRepeatingLongColumnVector()); + expression = new LongColAddLongColumn(0, 1, 2); } } - public static class DoubleDivideDoubleExpr extends AbstractExpression { + + public static class DoubleColDivideDoubleColumnBench extends AbstractExpression { @Override public void setup() { - rowBatch = buildRowBatch(new DoubleColumnVector(), 2, doubleColumnVector, - dupDoubleColumnVector); + rowBatch = buildRowBatch(new DoubleColumnVector(), 2, getDoubleColumnVector(), + getDoubleColumnVector()); expression = new DoubleColDivideDoubleColumn(0, 1, 2); } } - public static class LongDivideLongExpr extends AbstractExpression { + public static class DoubleColDivideRepeatingDoubleColumnBench extends AbstractExpression { @Override public void setup() { - rowBatch = buildRowBatch(new DoubleColumnVector(), 2, longColumnVector, - dupLongColumnVector); - expression = new LongColDivideLongColumn(0, 1, 2); + rowBatch = buildRowBatch(new DoubleColumnVector(), 2, getDoubleColumnVector(), + getRepeatingDoubleColumnVector()); + expression = new DoubleColDivideDoubleColumn(0, 1, 2); } } - public static class DoubleDivideLongExpr extends AbstractExpression { + public static class LongColDivideLongColumnBench extends AbstractExpression { @Override public void setup() { - rowBatch = buildRowBatch(new DoubleColumnVector(), 2, doubleColumnVector, - longColumnVector); - expression = new DoubleColDivideLongColumn(0, 1, 2); + rowBatch = buildRowBatch(new DoubleColumnVector(), 2, getLongColumnVector(), + getLongColumnVector()); + expression = new LongColDivideLongColumn(0, 1, 2); } } - public static class LongDivideDoubleExpr extends AbstractExpression { + public static class LongColDivideRepeatingLongColumnBench extends AbstractExpression { @Override public void setup() { - rowBatch = buildRowBatch(new DoubleColumnVector(), 2, longColumnVector, - doubleColumnVector); - expression = new LongColDivideDoubleColumn(0, 1, 2); - } - } - - @Setup(Level.Trial) - public void initialColumnVectors() { - Random random = new Random(); - - dupLongColumnVector.fill(random.nextLong()); - dupDoubleColumnVector.fill(random.nextDouble()); - for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { - doubleColumnVector.vector[i] = random.nextDouble(); - longColumnVector.vector[i] = random.nextLong(); + rowBatch = buildRowBatch(new DoubleColumnVector(), 2, getLongColumnVector(), + getRepeatingLongColumnVector()); + expression = new LongColDivideLongColumn(0, 1, 2); } } public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder().include(".*" + VectorizationBench.class.getSimpleName() + - ".*").build(); + ".*").build(); new Runner(opt).run(); } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/3f5659f0/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt ---------------------------------------------------------------------- diff --git a/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt b/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt index d4953d1..b0f6eb1 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt @@ -86,22 +86,26 @@ public class <ClassName> extends VectorExpression { outputVector[0] = vector1[0] <OperatorSymbol> denom; hasDivBy0 = hasDivBy0 || (denom == 0); } else if (inputColVector1.isRepeating) { + final <OperandType1> vector1Value = vector1[0]; if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; <OperandType2> denom = vector2[i]; - outputVector[i] = vector1[0] <OperatorSymbol> denom; + outputVector[i] = vector1Value <OperatorSymbol> denom; hasDivBy0 = hasDivBy0 || (denom == 0); } } else { for(int i = 0; i != n; i++) { - <OperandType2> denom = vector2[i]; - outputVector[i] = vector1[0] <OperatorSymbol> denom; - hasDivBy0 = hasDivBy0 || (denom == 0); + outputVector[i] = vector1Value <OperatorSymbol> vector2[i]; + } + + for(int i = 0; i != n; i++) { + hasDivBy0 = hasDivBy0 || (vector2[i] == 0); } } } else if (inputColVector2.isRepeating) { - if (vector2[0] == 0) { + final <OperandType2> vector2Value = vector2[0]; + if (vector2Value == 0) { // Denominator is zero, convert the batch to nulls outputColVector.noNulls = false; outputColVector.isRepeating = true; @@ -109,11 +113,11 @@ public class <ClassName> extends VectorExpression { } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] <OperatorSymbol> vector2[0]; + outputVector[i] = vector1[i] <OperatorSymbol> vector2Value; } } else { for(int i = 0; i != n; i++) { - outputVector[i] = vector1[i] <OperatorSymbol> vector2[0]; + outputVector[i] = vector1[i] <OperatorSymbol> vector2Value; } } } else { @@ -126,9 +130,11 @@ public class <ClassName> extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - <OperandType2> denom = vector2[i]; - outputVector[i] = vector1[i] <OperatorSymbol> denom; - hasDivBy0 = hasDivBy0 || (denom == 0); + outputVector[i] = vector1[i] <OperatorSymbol> vector2[i]; + } + + for(int i = 0; i != n; i++) { + hasDivBy0 = hasDivBy0 || (vector2[i] == 0); } } }