HIVE-11521 : Loop optimization for SIMD in logical operators (Teddy Choi via Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7796dd67 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7796dd67 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7796dd67 Branch: refs/heads/llap Commit: 7796dd671a0c28cbcf2f1fc51b01fcd99ed49561 Parents: e2a1764 Author: Teddy Choi <tc...@hortonworks.com> Authored: Tue Aug 11 07:05:00 2015 -0800 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Mon Aug 24 10:24:39 2015 -0700 ---------------------------------------------------------------------- .../vectorization/VectorizationBench.java | 93 ++++++++++++++++++++ .../ql/exec/vector/expressions/ColAndCol.java | 34 +++---- .../ql/exec/vector/expressions/ColOrCol.java | 42 ++++----- .../hive/ql/exec/vector/expressions/NotCol.java | 14 +-- 4 files changed, 140 insertions(+), 43 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/7796dd67/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java ---------------------------------------------------------------------- diff --git a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java index 391d052..0e880c6 100644 --- a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java +++ b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java @@ -17,7 +17,10 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.ColOrCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.NotCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddDoubleColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideDoubleColumn; @@ -124,6 +127,34 @@ public class VectorizationBench { return columnVector; } + protected LongColumnVector getBooleanLongColumnVector() { + LongColumnVector columnVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + Random random = new Random(); + for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { + columnVector.vector[i] = random.nextInt(2); + } + return columnVector; + } + + protected LongColumnVector getBooleanRepeatingLongColumnVector() { + LongColumnVector columnVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + columnVector.fill(1); + return columnVector; + } + + protected LongColumnVector getBooleanLongColumnVectorWithNull() { + LongColumnVector columnVector = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + columnVector.noNulls = false; + Random random = new Random(); + for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { + if (i % 100 == 0) { + columnVector.isNull[i] = true; + } + columnVector.vector[i] = random.nextInt(2); + } + return columnVector; + } + protected DoubleColumnVector getDoubleColumnVector() { DoubleColumnVector columnVector = new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); Random random = new Random(); @@ -209,6 +240,68 @@ public class VectorizationBench { } } + public static class ColAndColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanLongColumnVector(), + getBooleanLongColumnVector()); + expression = new ColAndCol(0, 1, 2); + } + } + + public static class ColAndRepeatingColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanLongColumnVector(), + getBooleanRepeatingLongColumnVector()); + expression = new ColAndCol(0, 1, 2); + } + } + + public static class RepeatingColAndColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanRepeatingLongColumnVector(), + getBooleanLongColumnVector()); + expression = new ColAndCol(0, 1, 2); + } + } + + public static class ColOrColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanLongColumnVector(), + getBooleanLongColumnVector()); + expression = new ColOrCol(0, 1, 2); + } + } + + public static class ColOrRepeatingColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanLongColumnVector(), + getBooleanRepeatingLongColumnVector()); + expression = new ColOrCol(0, 1, 2); + } + } + + public static class RepeatingColOrColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 2, getBooleanRepeatingLongColumnVector(), + getBooleanLongColumnVector()); + expression = new ColOrCol(0, 1, 2); + } + } + + public static class NotColBench extends AbstractExpression { + @Override + public void setup() { + rowBatch = buildRowBatch(new LongColumnVector(), 1, getBooleanLongColumnVector()); + expression = new NotCol(0, 1); + } + } + public static void main(String[] args) throws RunnerException { Options opt = new OptionsBuilder().include(".*" + VectorizationBench.class.getSimpleName() + ".*").build(); http://git-wip-us.apache.org/repos/asf/hive/blob/7796dd67/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java index 72df220..ff7371d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java @@ -64,6 +64,8 @@ public class ColAndCol extends VectorExpression { return; } + long vector1Value = vector1[0]; + long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero @@ -74,11 +76,11 @@ public class ColAndCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; } } outV.isRepeating = false; @@ -86,11 +88,11 @@ public class ColAndCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; } } outV.isRepeating = false; @@ -120,12 +122,12 @@ public class ColAndCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; outV.isNull[i] = (vector1[0] == 1) && inputColVector2.isNull[i]; } } @@ -134,12 +136,12 @@ public class ColAndCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; outV.isNull[i] = (vector1[i] == 1) && inputColVector2.isNull[0]; } } @@ -172,12 +174,12 @@ public class ColAndCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 1); } } @@ -186,12 +188,12 @@ public class ColAndCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 1); } } @@ -226,14 +228,14 @@ public class ColAndCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] & vector2[i]; + outputVector[i] = vector1Value & vector2[i]; outV.isNull[i] = ((vector1[0] == 1) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 1)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); @@ -244,14 +246,14 @@ public class ColAndCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] & vector2[0]; + outputVector[i] = vector1[i] & vector2Value; outV.isNull[i] = ((vector1[i] == 1) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 1)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); http://git-wip-us.apache.org/repos/asf/hive/blob/7796dd67/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java index 5e23446..60ed2d4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java @@ -67,6 +67,8 @@ public class ColOrCol extends VectorExpression { return; } + long vector1Value = vector1[0]; + long vector2Value = vector2[0]; if (inputColVector1.noNulls && inputColVector2.noNulls) { if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero @@ -77,11 +79,11 @@ public class ColOrCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] | vector2[i]; + outputVector[i] = vector1Value | vector2[i]; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] | vector2[i]; + outputVector[i] = vector1Value | vector2[i]; } } outV.isRepeating = false; @@ -89,11 +91,11 @@ public class ColOrCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[0]; + outputVector[i] = vector1[i] | vector2Value; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[0]; + outputVector[i] = vector1[i] | vector2Value; } } outV.isRepeating = false; @@ -123,13 +125,13 @@ public class ColOrCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] | vector2[i]; - outV.isNull[i] = (vector1[0] == 0) && inputColVector2.isNull[i]; + outputVector[i] = vector1Value | vector2[i]; + outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] | vector2[i]; - outV.isNull[i] = (vector1[0] == 0) && inputColVector2.isNull[i]; + outputVector[i] = vector1Value | vector2[i]; + outV.isNull[i] = (vector1Value == 0) && inputColVector2.isNull[i]; } } outV.isRepeating = false; @@ -137,12 +139,12 @@ public class ColOrCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[0]; + outputVector[i] = vector1[i] | vector2Value; outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[0]; + outputVector[i] = vector1[i] | vector2Value; outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; } } @@ -175,12 +177,12 @@ public class ColOrCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] | vector2[i]; + outputVector[i] = vector1Value | vector2[i]; outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] | vector2[i]; + outputVector[i] = vector1Value | vector2[i]; outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); } } @@ -189,13 +191,13 @@ public class ColOrCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[0]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 0); + outputVector[i] = vector1[i] | vector2Value; + outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[0]; - outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 0); + outputVector[i] = vector1[i] | vector2Value; + outV.isNull[i] = inputColVector1.isNull[i] && (vector2Value == 0); } } outV.isRepeating = false; @@ -229,14 +231,14 @@ public class ColOrCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[0] | vector2[i]; + outputVector[i] = vector1Value | vector2[i]; outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[0] | vector2[i]; + outputVector[i] = vector1Value | vector2[i]; outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) || (inputColVector1.isNull[0] && (vector2[i] == 0)) || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); @@ -247,14 +249,14 @@ public class ColOrCol extends VectorExpression { if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = vector1[i] | vector2[0]; + outputVector[i] = vector1[i] | vector2Value; outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); } } else { for (int i = 0; i != n; i++) { - outputVector[i] = vector1[i] | vector2[0]; + outputVector[i] = vector1[i] | vector2Value; outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) || (inputColVector1.isNull[i] && (vector2[0] == 0)) || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); http://git-wip-us.apache.org/repos/asf/hive/blob/7796dd67/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java index 604d154..ea2a434 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java @@ -62,17 +62,17 @@ public class NotCol extends VectorExpression { outV.noNulls = true; if (inputColVector.isRepeating) { outV.isRepeating = true; - // mask out all but low order bit with "& 1" so NOT 1 yields 0, NOT 0 yields 1 - outputVector[0] = ~vector[0] & 1; + // 0 XOR 1 yields 1, 1 XOR 1 yields 0 + outputVector[0] = vector[0] ^ 1; } else if (batch.selectedInUse) { for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = ~vector[i] & 1; + outputVector[i] = vector[i] ^ 1; } outV.isRepeating = false; } else { for (int i = 0; i != n; i++) { - outputVector[i] = ~vector[i] & 1; + outputVector[i] = vector[i] ^ 1; } outV.isRepeating = false; } @@ -80,19 +80,19 @@ public class NotCol extends VectorExpression { outV.noNulls = false; if (inputColVector.isRepeating) { outV.isRepeating = true; - outputVector[0] = ~vector[0] & 1; + outputVector[0] = vector[0] ^ 1; outV.isNull[0] = inputColVector.isNull[0]; } else if (batch.selectedInUse) { outV.isRepeating = false; for (int j = 0; j != n; j++) { int i = sel[j]; - outputVector[i] = ~vector[i] & 1; + outputVector[i] = vector[i] ^ 1; outV.isNull[i] = inputColVector.isNull[i]; } } else { outV.isRepeating = false; for (int i = 0; i != n; i++) { - outputVector[i] = ~vector[i] & 1; + outputVector[i] = vector[i] ^ 1; outV.isNull[i] = inputColVector.isNull[i]; } }