Repository: hive
Updated Branches:
  refs/heads/master 1912d19f2 -> 3f5659f02


HIVE-10235 Loop optimization for SIMD in ColumnDivideColumn.txt (chengxiang, 
reviewed by Gopal V)

Signed-off-by: chengxiang <chengxi...@apache.com>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3f5659f0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3f5659f0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3f5659f0

Branch: refs/heads/master
Commit: 3f5659f0234806f464c2f03f5c1a99ea1af13227
Parents: 1912d19
Author: chengxiang li <chengxiang...@intel.com>
Authored: Fri Apr 24 15:01:06 2015 +0800
Committer: chengxiang <chengxi...@apache.com>
Committed: Thu Apr 30 09:51:31 2015 +0800

----------------------------------------------------------------------
 .../vectorization/VectorizationBench.java       | 146 +++++++++++--------
 .../ExpressionTemplates/ColumnDivideColumn.txt  |  26 ++--
 2 files changed, 101 insertions(+), 71 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3f5659f0/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
----------------------------------------------------------------------
diff --git 
a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
 
b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
index a131a71..391d052 100644
--- 
a/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
+++ 
b/itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/VectorizationBench.java
@@ -20,23 +20,17 @@ import 
org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddDoubleColumn;
-import 
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColAddLongColumn;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideDoubleColumn;
-import 
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.DoubleColDivideLongColumn;
-import 
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddDoubleColumn;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn;
-import 
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColDivideDoubleColumn;
+import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
 import org.openjdk.jmh.annotations.Fork;
 import org.openjdk.jmh.annotations.Measurement;
 import org.openjdk.jmh.annotations.Mode;
 import org.openjdk.jmh.annotations.OutputTimeUnit;
-import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
 import org.openjdk.jmh.annotations.Setup;
 import org.openjdk.jmh.annotations.State;
-import org.openjdk.jmh.annotations.Benchmark;
-import org.openjdk.jmh.annotations.Level;
-import org.openjdk.jmh.annotations.Scope;
 import org.openjdk.jmh.annotations.Warmup;
 import org.openjdk.jmh.runner.Runner;
 import org.openjdk.jmh.runner.RunnerException;
@@ -70,21 +64,18 @@ public class VectorizationBench {
    * $ java -jar target/benchmarks.jar org.apache.hive.benchmark.vectorization 
VectorizationBench
    * -wi 10 -i 5 -f 2 -bm avgt -tu us
    */
-  private static LongColumnVector longColumnVector = new LongColumnVector();
-  private static LongColumnVector dupLongColumnVector = new LongColumnVector();
-  private static DoubleColumnVector doubleColumnVector = new 
DoubleColumnVector();
-  private static DoubleColumnVector dupDoubleColumnVector = new 
DoubleColumnVector();
 
   @BenchmarkMode(Mode.AverageTime)
   @Fork(1)
   @State(Scope.Thread)
   @OutputTimeUnit(TimeUnit.NANOSECONDS)
   public static abstract class AbstractExpression {
+    private static final int DEFAULT_ITER_TIME = 1000000;
     protected VectorExpression expression;
     protected VectorizedRowBatch rowBatch;
 
     protected VectorizedRowBatch buildRowBatch(ColumnVector output, int 
colNum, ColumnVector...
-        cols) {
+      cols) {
       VectorizedRowBatch rowBatch = new VectorizedRowBatch(colNum + 1);
       for (int i = 0; i < cols.length; i++) {
         rowBatch.cols[i] = cols[i];
@@ -100,94 +91,127 @@ public class VectorizationBench {
     @Warmup(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS)
     @Measurement(iterations = 2, time = 2, timeUnit = TimeUnit.MILLISECONDS)
     public void bench() {
-      expression.evaluate(rowBatch);
+      for (int i = 0; i < DEFAULT_ITER_TIME; i++) {
+        expression.evaluate(rowBatch);
+      }
     }
-  }
 
-  public static class DoubleAddDoubleExpr extends AbstractExpression {
-    @Override
-    public void setup() {
-      rowBatch = buildRowBatch(new DoubleColumnVector(), 2, doubleColumnVector,
-          dupDoubleColumnVector);
-      expression = new DoubleColAddDoubleColumn(0, 1, 2);
+    protected LongColumnVector getLongColumnVector() {
+      LongColumnVector columnVector = new 
LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
+      Random random = new Random();
+      for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) {
+        columnVector.vector[i] = random.nextLong();
+      }
+      return columnVector;
     }
-  }
 
-  public static class LongAddLongExpr extends AbstractExpression {
-    @Override
-    public void setup() {
-      rowBatch = buildRowBatch(new LongColumnVector(), 2, longColumnVector, 
dupLongColumnVector);
-      expression = new LongColAddLongColumn(0, 1, 2);
+    protected LongColumnVector getRepeatingLongColumnVector() {
+      LongColumnVector columnVector = new 
LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
+      columnVector.fill(2);
+      return columnVector;
     }
+
+    protected LongColumnVector getLongColumnVectorWithNull() {
+      LongColumnVector columnVector = new 
LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
+      columnVector.noNulls = false;
+      Random random = new Random();
+      for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) {
+        if (i % 100 == 0) {
+          columnVector.isNull[i] = true;
+        }
+        columnVector.vector[i] = random.nextLong();
+      }
+      return columnVector;
+    }
+
+    protected DoubleColumnVector getDoubleColumnVector() {
+      DoubleColumnVector columnVector = new 
DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
+      Random random = new Random();
+      for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) {
+        columnVector.vector[i] = random.nextDouble();
+      }
+      return columnVector;
+    }
+
+    protected DoubleColumnVector getRepeatingDoubleColumnVector() {
+      DoubleColumnVector columnVector = new 
DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
+      columnVector.fill(2.0d);
+      return columnVector;
+    }
+
+    protected DoubleColumnVector getDoubleColumnVectorWithNull() {
+      DoubleColumnVector columnVector = new 
DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
+      columnVector.noNulls = false;
+      Random random = new Random();
+      for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) {
+        if (i % 100 == 0) {
+          columnVector.isNull[i] = true;
+        }
+        columnVector.vector[i] = random.nextDouble();
+      }
+      return columnVector;
+    }
+
   }
 
-  public static class LongAddDoubleExpr extends AbstractExpression {
+  public static class DoubleColAddRepeatingDoubleColumnBench extends 
AbstractExpression {
     @Override
     public void setup() {
-      rowBatch = buildRowBatch(new DoubleColumnVector(), 2, longColumnVector, 
doubleColumnVector);
-      expression = new LongColAddDoubleColumn(0, 1, 2);
+      rowBatch = buildRowBatch(new DoubleColumnVector(), 2, 
getDoubleColumnVector(),
+        getRepeatingDoubleColumnVector());
+      expression = new DoubleColAddDoubleColumn(0, 1, 2);
     }
   }
 
-  public static class DoubleAddLongExpr extends AbstractExpression {
+  public static class LongColAddRepeatingLongColumnBench extends 
AbstractExpression {
     @Override
     public void setup() {
-      rowBatch = buildRowBatch(new DoubleColumnVector(), 2, 
doubleColumnVector, longColumnVector);
-      expression = new DoubleColAddLongColumn(0, 1, 2);
+      rowBatch = buildRowBatch(new LongColumnVector(), 2, 
getLongColumnVector(),
+        getRepeatingLongColumnVector());
+      expression = new LongColAddLongColumn(0, 1, 2);
     }
   }
 
-  public static class DoubleDivideDoubleExpr extends AbstractExpression {
+
+  public static class DoubleColDivideDoubleColumnBench extends 
AbstractExpression {
     @Override
     public void setup() {
-      rowBatch = buildRowBatch(new DoubleColumnVector(), 2, doubleColumnVector,
-          dupDoubleColumnVector);
+      rowBatch = buildRowBatch(new DoubleColumnVector(), 2, 
getDoubleColumnVector(),
+        getDoubleColumnVector());
       expression = new DoubleColDivideDoubleColumn(0, 1, 2);
     }
   }
 
-  public static class LongDivideLongExpr extends AbstractExpression {
+  public static class DoubleColDivideRepeatingDoubleColumnBench extends 
AbstractExpression {
     @Override
     public void setup() {
-      rowBatch = buildRowBatch(new DoubleColumnVector(), 2, longColumnVector,
-          dupLongColumnVector);
-      expression = new LongColDivideLongColumn(0, 1, 2);
+      rowBatch = buildRowBatch(new DoubleColumnVector(), 2, 
getDoubleColumnVector(),
+        getRepeatingDoubleColumnVector());
+      expression = new DoubleColDivideDoubleColumn(0, 1, 2);
     }
   }
 
-  public static class DoubleDivideLongExpr extends AbstractExpression {
+  public static class LongColDivideLongColumnBench extends AbstractExpression {
     @Override
     public void setup() {
-      rowBatch = buildRowBatch(new DoubleColumnVector(), 2, doubleColumnVector,
-          longColumnVector);
-      expression = new DoubleColDivideLongColumn(0, 1, 2);
+      rowBatch = buildRowBatch(new DoubleColumnVector(), 2, 
getLongColumnVector(),
+        getLongColumnVector());
+      expression = new LongColDivideLongColumn(0, 1, 2);
     }
   }
 
-  public static class LongDivideDoubleExpr extends AbstractExpression {
+  public static class LongColDivideRepeatingLongColumnBench extends 
AbstractExpression {
     @Override
     public void setup() {
-      rowBatch = buildRowBatch(new DoubleColumnVector(), 2, longColumnVector,
-          doubleColumnVector);
-      expression = new LongColDivideDoubleColumn(0, 1, 2);
-    }
-  }
-
-  @Setup(Level.Trial)
-  public void initialColumnVectors() {
-    Random random = new Random();
-
-    dupLongColumnVector.fill(random.nextLong());
-    dupDoubleColumnVector.fill(random.nextDouble());
-    for (int i = 0; i < VectorizedRowBatch.DEFAULT_SIZE; i++) {
-      doubleColumnVector.vector[i] = random.nextDouble();
-      longColumnVector.vector[i] = random.nextLong();
+      rowBatch = buildRowBatch(new DoubleColumnVector(), 2, 
getLongColumnVector(),
+        getRepeatingLongColumnVector());
+      expression = new LongColDivideLongColumn(0, 1, 2);
     }
   }
 
   public static void main(String[] args) throws RunnerException {
     Options opt = new OptionsBuilder().include(".*" + 
VectorizationBench.class.getSimpleName() +
-        ".*").build();
+      ".*").build();
     new Runner(opt).run();
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/3f5659f0/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt
----------------------------------------------------------------------
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt 
b/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt
index d4953d1..b0f6eb1 100644
--- a/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt
+++ b/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt
@@ -86,22 +86,26 @@ public class <ClassName> extends VectorExpression {
       outputVector[0] = vector1[0] <OperatorSymbol> denom;
       hasDivBy0 = hasDivBy0 || (denom == 0);
     } else if (inputColVector1.isRepeating) {
+      final <OperandType1> vector1Value = vector1[0];
       if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
           <OperandType2> denom = vector2[i];
-          outputVector[i] = vector1[0] <OperatorSymbol> denom;
+          outputVector[i] = vector1Value <OperatorSymbol> denom;
           hasDivBy0 = hasDivBy0 || (denom == 0);
         }
       } else {
         for(int i = 0; i != n; i++) {
-          <OperandType2> denom = vector2[i];
-          outputVector[i] = vector1[0] <OperatorSymbol> denom;
-          hasDivBy0 = hasDivBy0 || (denom == 0);
+          outputVector[i] = vector1Value <OperatorSymbol> vector2[i];
+        }
+
+        for(int i = 0; i != n; i++) {
+          hasDivBy0 = hasDivBy0 || (vector2[i] == 0);
         }
       }
     } else if (inputColVector2.isRepeating) {
-      if (vector2[0] == 0) {
+      final <OperandType2> vector2Value = vector2[0];
+      if (vector2Value == 0) {
         // Denominator is zero, convert the batch to nulls
         outputColVector.noNulls = false;
         outputColVector.isRepeating = true;
@@ -109,11 +113,11 @@ public class <ClassName> extends VectorExpression {
       } else if (batch.selectedInUse) {
         for(int j = 0; j != n; j++) {
           int i = sel[j];
-          outputVector[i] = vector1[i] <OperatorSymbol> vector2[0];
+          outputVector[i] = vector1[i] <OperatorSymbol> vector2Value;
         }
       } else {
         for(int i = 0; i != n; i++) {
-          outputVector[i] = vector1[i] <OperatorSymbol> vector2[0];
+          outputVector[i] = vector1[i] <OperatorSymbol> vector2Value;
         }
       }
     } else {
@@ -126,9 +130,11 @@ public class <ClassName> extends VectorExpression {
         }
       } else {
         for(int i = 0; i != n; i++) {
-          <OperandType2> denom = vector2[i];
-          outputVector[i] = vector1[i] <OperatorSymbol> denom;
-          hasDivBy0 = hasDivBy0 || (denom == 0);
+          outputVector[i] = vector1[i] <OperatorSymbol> vector2[i];
+        }
+
+        for(int i = 0; i != n; i++) {
+          hasDivBy0 = hasDivBy0 || (vector2[i] == 0);
         }
       }
     }

Reply via email to