Wrong Results issues in STRING Functions (Matt McCline, reviewed by Teddy Choi)

mmccline Fri, 15 Jun 2018 07:44:11 -0700

HIVE-19565: Vectorization: Fix NULL / Wrong Results issues in STRING Functions 
(Matt McCline, reviewed by Teddy Choi)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3f6a160e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3f6a160e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3f6a160e

Branch: refs/heads/branch-3
Commit: 3f6a160ed84d63377cd7922f095f6846829fedd8
Parents: abc504c
Author: Matt McCline <mmccl...@hortonworks.com>
Authored: Fri Jun 15 09:43:19 2018 -0500
Committer: Matt McCline <mmccl...@hortonworks.com>
Committed: Fri Jun 15 09:43:19 2018 -0500

----------------------------------------------------------------------
 .../CharScalarConcatStringGroupCol.java         |  51 ---
 .../StringGroupColConcatCharScalar.java         |  51 ---
 .../StringGroupColConcatStringScalar.java       |   3 +-
 .../StringGroupColConcatVarCharScalar.java      |  52 ---
 .../expressions/StringGroupConcatColCol.java    |   2 +-
 .../ql/exec/vector/expressions/StringLTrim.java |  25 +-
 .../ql/exec/vector/expressions/StringRTrim.java |  24 +-
 .../StringScalarConcatStringGroupCol.java       |   2 +-
 .../expressions/StringSubstrColStart.java       |  11 +-
 .../expressions/StringSubstrColStartLen.java    |  11 +-
 .../ql/exec/vector/expressions/StringTrim.java  |  35 +-
 .../VarCharScalarConcatStringGroupCol.java      |  52 ---
 .../hive/ql/udf/generic/GenericUDFConcat.java   |  11 +-
 .../ql/exec/vector/VectorRandomRowSource.java   | 332 +++++++++++++-
 .../expressions/TestVectorCastStatement.java    |  86 ++--
 .../expressions/TestVectorDateAddSub.java       |  84 +---
 .../vector/expressions/TestVectorDateDiff.java  | 112 ++---
 .../expressions/TestVectorStringConcat.java     | 427 +++++++++++++++++++
 .../TestVectorStringExpressions.java            |  16 +-
 .../expressions/TestVectorStringUnary.java      | 368 ++++++++++++++++
 .../vector/expressions/TestVectorSubStr.java    | 347 +++++++++++++++
 .../expressions/TestVectorTimestampExtract.java |  81 +---
 22 files changed, 1634 insertions(+), 549 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java
deleted file mode 100644
index 712b8de..0000000
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-
-/**
- * Vectorized instruction to concatenate a scalar to a string column and put
- * the result in an output column.
- */
-public class CharScalarConcatStringGroupCol extends 
StringScalarConcatStringGroupCol {
-  private static final long serialVersionUID = 1L;
-
-  public CharScalarConcatStringGroupCol(byte[] value, int colNum, int 
outputColumnNum) {
-    super(value, colNum, outputColumnNum);
-  }
-
-  public CharScalarConcatStringGroupCol() {
-    super();
-  }
-
-  @Override
-  public VectorExpressionDescriptor.Descriptor getDescriptor() {
-    return (new VectorExpressionDescriptor.Builder())
-        .setMode(
-            VectorExpressionDescriptor.Mode.PROJECTION)
-        .setNumArguments(2)
-        .setArgumentTypes(
-            VectorExpressionDescriptor.ArgumentType.CHAR,
-            VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
-        .setInputExpressionTypes(
-            VectorExpressionDescriptor.InputExpressionType.SCALAR,
-            VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java
deleted file mode 100644
index bbebe6c..0000000
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-
-/**
- * Vectorized instruction to concatenate a string column to a scalar and put
- * the result in an output column.
- */
-public class StringGroupColConcatCharScalar extends 
StringGroupColConcatStringScalar {
-  private static final long serialVersionUID = 1L;
-
-  public StringGroupColConcatCharScalar(int colNum, byte[] value, int 
outputColumnNum) {
-    super(colNum, value, outputColumnNum);
-  }
-
-  public StringGroupColConcatCharScalar() {
-    super();
-  }
-
-  @Override
-  public VectorExpressionDescriptor.Descriptor getDescriptor() {
-    return (new VectorExpressionDescriptor.Builder())
-        .setMode(
-            VectorExpressionDescriptor.Mode.PROJECTION)
-        .setNumArguments(2)
-        .setArgumentTypes(
-            VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
-            VectorExpressionDescriptor.ArgumentType.CHAR)
-        .setInputExpressionTypes(
-            VectorExpressionDescriptor.InputExpressionType.COLUMN,
-            VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
index 9194e8b..896de85 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
-import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -164,7 +163,7 @@ public class StringGroupColConcatStringScalar extends 
VectorExpression {
         .setNumArguments(2)
         .setArgumentTypes(
             VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
-            VectorExpressionDescriptor.ArgumentType.STRING)
+            VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
         .setInputExpressionTypes(
             VectorExpressionDescriptor.InputExpressionType.COLUMN,
             VectorExpressionDescriptor.InputExpressionType.SCALAR).build();

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java
deleted file mode 100644
index 7349410..0000000
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-
-/**
- * Vectorized instruction to concatenate a string column to a scalar and put
- * the result in an output column.
- */
-public class StringGroupColConcatVarCharScalar extends 
StringGroupColConcatStringScalar {
-  private static final long serialVersionUID = 1L;
-
-  public StringGroupColConcatVarCharScalar(int colNum, HiveVarchar value, int 
outputColumnNum) {
-    super(colNum, value.getValue().getBytes(), outputColumnNum);
-  }
-
-  public StringGroupColConcatVarCharScalar() {
-    super();
-  }
-
-  @Override
-  public VectorExpressionDescriptor.Descriptor getDescriptor() {
-    return (new VectorExpressionDescriptor.Builder())
-        .setMode(
-            VectorExpressionDescriptor.Mode.PROJECTION)
-        .setNumArguments(2)
-        .setArgumentTypes(
-            VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
-            VectorExpressionDescriptor.ArgumentType.VARCHAR)
-        .setInputExpressionTypes(
-            VectorExpressionDescriptor.InputExpressionType.COLUMN,
-            VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
index 1c9433b..ceb7b26 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java
@@ -128,7 +128,7 @@ public class StringGroupConcatColCol extends 
VectorExpression {
           }
         } else {
           for(int i = 0; i != n; i++) {
-            if (!inV2.isNull[0]) {
+            if (!inV2.isNull[i]) {
               outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], 
start2[i], len2[i]);
             }
           }

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
index 84f03cc..8a41e76 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java
@@ -23,6 +23,8 @@ import 
org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 public class StringLTrim extends StringUnaryUDFDirect {
   private static final long serialVersionUID = 1L;
 
+  private static final byte[] EMPTY_BYTES = new byte[0];
+
   public StringLTrim(int inputColumn, int outputColumnNum) {
     super(inputColumn, outputColumnNum);
   }
@@ -36,14 +38,23 @@ public class StringLTrim extends StringUnaryUDFDirect {
    * Operate on the data in place, and set the output by reference
    * to improve performance. Ignore null handling. That will be handled 
separately.
    */
-  protected void func(BytesColumnVector outV, byte[][] vector, int[] start, 
int[] length, int i) {
-    int j = start[i];
-
-    // skip past blank characters
-    while(j < start[i] + vector[i].length && vector[i][j] == 0x20) {
-      j++;
+  protected void func(BytesColumnVector outV, byte[][] vector, int[] start, 
int[] length,
+      int batchIndex) {
+    byte[] bytes = vector[batchIndex];
+    final int startIndex = start[batchIndex];
+
+    // Skip past blank characters.
+    final int end = startIndex + length[batchIndex];
+    int index = startIndex;
+    while(index < end && bytes[index] == 0x20) {
+      index++;
     }
 
-    outV.setVal(i, vector[i], j, length[i] - (j - start[i]));
+    final int resultLength = end - index;
+    if (resultLength == 0) {
+      outV.setVal(batchIndex, EMPTY_BYTES, 0, 0);
+      return;
+    }
+    outV.setVal(batchIndex, bytes, index, resultLength);
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
index 5c087aa..6a3a220 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java
@@ -23,6 +23,8 @@ import 
org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 public class StringRTrim extends StringUnaryUDFDirect {
   private static final long serialVersionUID = 1L;
 
+  private static final byte[] EMPTY_BYTES = new byte[0];
+
   public StringRTrim(int inputColumn, int outputColumnNum) {
     super(inputColumn, outputColumnNum);
   }
@@ -36,15 +38,23 @@ public class StringRTrim extends StringUnaryUDFDirect {
    * Operate on the data in place, and set the output by reference
    * to improve performance. Ignore null handling. That will be handled 
separately.
    */
-  protected void func(BytesColumnVector outV, byte[][] vector, int[] start, 
int[] length, int i) {
-    int j = start[i] + length[i] - 1;
+  protected void func(BytesColumnVector outV, byte[][] vector, int[] start, 
int[] length,
+      int batchIndex) {
+
+    byte[] bytes = vector[batchIndex];
+    final int startIndex = start[batchIndex];
 
-    // skip trailing blank characters
-    while(j >= start[i] && vector[i][j] == 0x20) {
-      j--;
+    // Skip trailing blank characters.
+    int index = startIndex + length[batchIndex] - 1;
+    while(index >= startIndex && bytes[index] == 0x20) {
+      index--;
     }
 
-    // set output vector
-    outV.setVal(i, vector[i], start[i], (j - start[i]) + 1);
+    final int resultLength = index - startIndex + 1;
+    if (resultLength == 0) {
+      outV.setVal(batchIndex, EMPTY_BYTES, 0, 0);
+      return;
+    }
+    outV.setVal(batchIndex, bytes, startIndex, resultLength);
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
index db679b0..b099910 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java
@@ -163,7 +163,7 @@ public class StringScalarConcatStringGroupCol extends 
VectorExpression {
             VectorExpressionDescriptor.Mode.PROJECTION)
         .setNumArguments(2)
         .setArgumentTypes(
-            VectorExpressionDescriptor.ArgumentType.STRING,
+            VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
             VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
         .setInputExpressionTypes(
             VectorExpressionDescriptor.InputExpressionType.SCALAR,

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
index 411fc4b..2b97504 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java
@@ -146,12 +146,7 @@ public class StringSubstrColStart extends VectorExpression 
{
     outputColVector.isRepeating = false;
 
     if (inV.isRepeating) {
-      if (!inV.noNulls && inV.isNull[0]) {
-        outputIsNull[0] = true;
-        outputColVector.noNulls = false;
-        outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
-        return;
-      } else {
+      if (inV.noNulls || !inV.isNull[0]) {
         outputIsNull[0] = false;
         int offset = getSubstrStartOffset(vector[0], start[0], len[0], 
startIdx);
         if (offset != -1) {
@@ -159,6 +154,10 @@ public class StringSubstrColStart extends VectorExpression 
{
         } else {
           outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
         }
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+        outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
       }
       outputColVector.isRepeating = true;
       return;

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
index e28c0a7..fff3032 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
@@ -167,12 +167,7 @@ public class StringSubstrColStartLen extends 
VectorExpression {
     outputColVector.isRepeating = false;
 
     if (inV.isRepeating) {
-
-      if (!inV.noNulls && inV.isNull[0]) {
-        outputIsNull[0] = true;
-        outputColVector.noNulls = false;
-        outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
-      } else {
+      if (inV.noNulls || !inV.isNull[0]) {
         outputIsNull[0] = false;
         populateSubstrOffsets(vector[0], start[0], len[0], startIdx, length, 
offsetArray);
         if (offsetArray[0] != -1) {
@@ -180,6 +175,10 @@ public class StringSubstrColStartLen extends 
VectorExpression {
         } else {
           outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
         }
+      } else {
+        outputIsNull[0] = true;
+        outputColVector.noNulls = false;
+        outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length);
       }
       outputColVector.isRepeating = true;
       return;

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
index 458ac7d..76afe7c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java
@@ -23,6 +23,8 @@ import 
org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 public class StringTrim extends StringUnaryUDFDirect {
   private static final long serialVersionUID = 1L;
 
+  private static final byte[] EMPTY_BYTES = new byte[0];
+
   public StringTrim(int inputColumn, int outputColumnNum) {
     super(inputColumn, outputColumnNum);
   }
@@ -37,20 +39,31 @@ public class StringTrim extends StringUnaryUDFDirect {
    * Operate on the data in place, and set the output by reference
    * to improve performance. Ignore null handling. That will be handled 
separately.
    */
-  protected void func(BytesColumnVector outV, byte[][] vector, int[] start, 
int[] length, int i) {
-    int l = start[i];
-    int r = start[i] + length[i] - 1;
+  protected void func(BytesColumnVector outV, byte[][] vector, int[] start, 
int[] length,
+      int batchIndex) {
 
-    // skip blank character on left
-    while(l <= r && vector[i][l] == 0x20) {
-      l++;
+    byte[] bytes = vector[batchIndex];
+    final int startIndex = start[batchIndex];
+    final int end = startIndex + length[batchIndex];
+    int leftIndex = startIndex;
+    while(leftIndex < end && bytes[leftIndex] == 0x20) {
+      leftIndex++;
     }
-
-    // skip blank characters on right
-    while(l <= r && vector[i][r] == 0x20) {
-      r--;
+    if (leftIndex == end) {
+      outV.setVal(batchIndex, EMPTY_BYTES, 0, 0);
+      return;
     }
 
-    outV.setVal(i, vector[i], l, (r - l) + 1);
+    // Have at least 1 non-blank; Skip trailing blank characters.
+    int rightIndex = end - 1;
+    final int rightLimit = leftIndex + 1;
+    while(rightIndex >= rightLimit && bytes[rightIndex] == 0x20) {
+      rightIndex--;
+    }
+    final int resultLength = rightIndex - leftIndex + 1;
+    if (resultLength <= 0) {
+      throw new RuntimeException("Not expected");
+    }
+    outV.setVal(batchIndex, bytes, leftIndex, resultLength);
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java
deleted file mode 100644
index 76e83e0..0000000
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-
-/**
- * Vectorized instruction to concatenate a scalar to a string column and put
- * the result in an output column.
- */
-public class VarCharScalarConcatStringGroupCol extends 
StringScalarConcatStringGroupCol {
-  private static final long serialVersionUID = 1L;
-
-  public VarCharScalarConcatStringGroupCol(HiveVarchar value, int colNum, int 
outputColumnNum) {
-    super(value.getValue().getBytes(), colNum, outputColumnNum);
-  }
-
-  public VarCharScalarConcatStringGroupCol() {
-    super();
-  }
-
-  @Override
-  public VectorExpressionDescriptor.Descriptor getDescriptor() {
-    return (new VectorExpressionDescriptor.Builder())
-        .setMode(
-            VectorExpressionDescriptor.Mode.PROJECTION)
-        .setNumArguments(2)
-        .setArgumentTypes(
-            VectorExpressionDescriptor.ArgumentType.VARCHAR,
-            VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
-        .setInputExpressionTypes(
-            VectorExpressionDescriptor.InputExpressionType.SCALAR,
-            VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
index 62a7560..92588dd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java
@@ -25,11 +25,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupConcatColCol;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatStringScalar;
-import 
org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatCharScalar;
-import 
org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatVarCharScalar;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.StringScalarConcatStringGroupCol;
-import 
org.apache.hadoop.hive.ql.exec.vector.expressions.CharScalarConcatStringGroupCol;
-import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VarCharScalarConcatStringGroupCol;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
@@ -53,11 +49,10 @@ extended = "Returns NULL if any argument is NULL.\n"
 + "Example:\n"
 + "  > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n"
 + "  'abcdef'")
-@VectorizedExpressions({StringGroupConcatColCol.class,
+@VectorizedExpressions({
+    StringGroupConcatColCol.class,
     StringGroupColConcatStringScalar.class,
-    StringGroupColConcatCharScalar.class, 
StringGroupColConcatVarCharScalar.class,
-    StringScalarConcatStringGroupCol.class,
-    CharScalarConcatStringGroupCol.class, 
VarCharScalarConcatStringGroupCol.class})
+    StringScalarConcatStringGroupCol.class})
 public class GenericUDFConcat extends GenericUDF {
   private transient ObjectInspector[] argumentOIs;
   private transient StringConverter[] stringConverters;

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
index 641ff10..0e4dcfd 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
@@ -20,12 +20,17 @@ package org.apache.hadoop.hive.ql.exec.vector;
 
 import java.sql.Date;
 import java.sql.Timestamp;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Random;
 import java.util.Set;
 
+import org.apache.commons.lang.StringUtils;
+
 import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
 import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -81,6 +86,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hive.common.util.DateUtils;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
 
 import com.google.common.base.Preconditions;
 import com.google.common.base.Charsets;
@@ -114,6 +120,8 @@ public class VectorRandomRowSource {
 
   private StructObjectInspector rowStructObjectInspector;
 
+  private List<GenerationSpec> generationSpecList;
+
   private String[] alphabets;
 
   private boolean allowNull;
@@ -121,6 +129,92 @@ public class VectorRandomRowSource {
   private boolean addEscapables;
   private String needsEscapeStr;
 
+  public static class StringGenerationOption {
+
+    private boolean generateSentences;
+    private boolean addPadding;
+
+    public StringGenerationOption(boolean generateSentences, boolean 
addPadding) {
+      this.generateSentences = generateSentences;
+      this.addPadding = addPadding;
+    }
+
+    public boolean getGenerateSentences() {
+      return generateSentences;
+    }
+
+    public boolean getAddPadding() {
+      return addPadding;
+    }
+  }
+
+  public static class GenerationSpec {
+
+    public static enum GenerationKind {
+      SAME_TYPE,
+      OMIT_GENERATION,
+      STRING_FAMILY,
+      STRING_FAMILY_OTHER_TYPE_VALUE,
+      TIMESTAMP_MILLISECONDS
+    }
+
+    private final GenerationKind generationKind;
+    private final TypeInfo typeInfo;
+    private final TypeInfo sourceTypeInfo;
+    private final StringGenerationOption stringGenerationOption;
+
+    private GenerationSpec(GenerationKind generationKind, TypeInfo typeInfo,
+        TypeInfo sourceTypeInfo, StringGenerationOption 
stringGenerationOption) {
+      this.generationKind = generationKind;
+      this.typeInfo = typeInfo;
+      this.sourceTypeInfo = sourceTypeInfo;
+      this.stringGenerationOption = stringGenerationOption;
+    }
+
+    public GenerationKind getGenerationKind() {
+      return generationKind;
+    }
+
+    public TypeInfo getTypeInfo() {
+      return typeInfo;
+    }
+
+    public TypeInfo getSourceTypeInfo() {
+      return sourceTypeInfo;
+    }
+
+    public StringGenerationOption getStringGenerationOption() {
+      return stringGenerationOption;
+    }
+
+    public static GenerationSpec createSameType(TypeInfo typeInfo) {
+      return new GenerationSpec(
+          GenerationKind.SAME_TYPE, typeInfo, null, null);
+    }
+
+    public static GenerationSpec createOmitGeneration(TypeInfo typeInfo) {
+      return new GenerationSpec(
+          GenerationKind.OMIT_GENERATION, typeInfo, null, null);
+    }
+
+    public static GenerationSpec createStringFamily(TypeInfo typeInfo,
+        StringGenerationOption stringGenerationOption) {
+      return new GenerationSpec(
+          GenerationKind.STRING_FAMILY, typeInfo, null, 
stringGenerationOption);
+    }
+
+    public static GenerationSpec createStringFamilyOtherTypeValue(TypeInfo 
typeInfo,
+        TypeInfo otherTypeTypeInfo) {
+      return new GenerationSpec(
+          GenerationKind.STRING_FAMILY_OTHER_TYPE_VALUE, typeInfo, 
otherTypeTypeInfo, null);
+    }
+
+    public static GenerationSpec createTimestampMilliseconds(TypeInfo 
typeInfo) {
+      return new GenerationSpec(
+          GenerationKind.TIMESTAMP_MILLISECONDS, typeInfo, null, null);
+    }
+  }
+
   public List<String> typeNames() {
     return typeNames;
   }
@@ -186,8 +280,26 @@ public class VectorRandomRowSource {
       boolean allowNull, List<DataTypePhysicalVariation> 
explicitDataTypePhysicalVariationList) {
     this.r = r;
     this.allowNull = allowNull;
+
+    List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
+    for (String explicitTypeName : explicitTypeNameList) {
+      TypeInfo typeInfo =
+          TypeInfoUtils.getTypeInfoFromTypeString(explicitTypeName);
+      generationSpecList.add(
+          GenerationSpec.createSameType(typeInfo));
+    }
+
+    chooseSchema(
+        SupportedTypes.ALL, null, generationSpecList, 
explicitDataTypePhysicalVariationList,
+        maxComplexDepth);
+  }
+
+  public void initGenerationSpecSchema(Random r, List<GenerationSpec> 
generationSpecList, int maxComplexDepth,
+      boolean allowNull, List<DataTypePhysicalVariation> 
explicitDataTypePhysicalVariationList) {
+    this.r = r;
+    this.allowNull = allowNull;
     chooseSchema(
-        SupportedTypes.ALL, null, explicitTypeNameList, 
explicitDataTypePhysicalVariationList,
+        SupportedTypes.ALL, null, generationSpecList, 
explicitDataTypePhysicalVariationList,
         maxComplexDepth);
   }
 
@@ -418,14 +530,14 @@ public class VectorRandomRowSource {
   }
 
   private void chooseSchema(SupportedTypes supportedTypes, Set<String> 
allowedTypeNameSet,
-      List<String> explicitTypeNameList,
+      List<GenerationSpec> generationSpecList,
       List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList,
       int maxComplexDepth) {
     HashSet<Integer> hashSet = null;
     final boolean allTypes;
     final boolean onlyOne;
-    if (explicitTypeNameList != null) {
-      columnCount = explicitTypeNameList.size();
+    if (generationSpecList != null) {
+      columnCount = generationSpecList.size();
       allTypes = false;
       onlyOne = false;
     } else if (allowedTypeNameSet != null) {
@@ -472,8 +584,8 @@ public class VectorRandomRowSource {
       final String typeName;
       DataTypePhysicalVariation dataTypePhysicalVariation = 
DataTypePhysicalVariation.NONE;
 
-      if (explicitTypeNameList != null) {
-        typeName = explicitTypeNameList.get(c);
+      if (generationSpecList != null) {
+        typeName = generationSpecList.get(c).getTypeInfo().getTypeName();
         dataTypePhysicalVariation = 
explicitDataTypePhysicalVariationList.get(c);
       } else if (onlyOne || allowedTypeNameSet != null) {
         typeName = getRandomTypeName(r, supportedTypes, allowedTypeNameSet);
@@ -563,6 +675,154 @@ public class VectorRandomRowSource {
     rowStructObjectInspector = ObjectInspectorFactory.
         getStandardStructObjectInspector(columnNames, objectInspectorList);
     alphabets = new String[columnCount];
+
+    this.generationSpecList = generationSpecList;
+  }
+
+  private static ThreadLocal<DateFormat> DATE_FORMAT =
+      new ThreadLocal<DateFormat>() {
+        @Override
+        protected DateFormat initialValue() {
+          return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+        }
+      };
+
+  private static long MIN_FOUR_DIGIT_YEAR_MILLIS = parseToMillis("0001-01-01 
00:00:00");
+  private static long MAX_FOUR_DIGIT_YEAR_MILLIS = parseToMillis("9999-01-01 
00:00:00");
+
+  private static long parseToMillis(String s) {
+    try {
+      return DATE_FORMAT.get().parse(s).getTime();
+    } catch (ParseException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+
+  private static String[] randomWords =
+      new String[] {
+    "groovy",
+    "attack",
+    "wacky",
+    "kiss",
+    "to",
+    "the",
+    "a",
+    "thoughtless",
+    "blushing",
+    "pay",
+    "rule",
+    "profuse",
+    "need",
+    "smell",
+    "bucket",
+    "board",
+    "eggs",
+    "laughable",
+    "idiotic",
+    "direful",
+    "thoughtful",
+    "curious",
+    "show",
+    "surge",
+    "opines",
+    "cowl",
+    "signal",
+    ""};
+  private static int randomWordCount = randomWords.length;
+
+  private static Object toStringFamilyObject(TypeInfo typeInfo, String string, 
boolean isWritable) {
+
+    PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
+    PrimitiveCategory primitiveCategory =
+        primitiveTypeInfo.getPrimitiveCategory();
+    Object object;
+    switch (primitiveCategory) {
+    case STRING:
+      if (isWritable) {
+        object = new Text(string);
+      } else {
+        object = string;
+      }
+      break;
+    case CHAR:
+      {
+        HiveChar hiveChar =
+            new HiveChar(
+                string, ((CharTypeInfo) typeInfo).getLength());
+        if (isWritable) {
+          object = new HiveCharWritable(hiveChar);
+        } else {
+          object = hiveChar;
+        }
+      }
+      break;
+    case VARCHAR:
+      {
+        HiveVarchar hiveVarchar =
+            new HiveVarchar(
+                string, ((VarcharTypeInfo) typeInfo).getLength());
+        if (isWritable) {
+          object = new HiveVarcharWritable(hiveVarchar);
+        } else {
+          object = hiveVarchar;
+        }
+      }
+      break;
+    default:
+      throw new RuntimeException("Unexpected string family category " + 
primitiveCategory);
+    }
+    return object;
+  }
+
+  public static Object randomStringFamilyOtherTypeValue(Random random, 
TypeInfo typeInfo,
+      TypeInfo specialValueTypeInfo, boolean isWritable) {
+    String string;
+    string =
+        VectorRandomRowSource.randomPrimitiveObject(
+            random, (PrimitiveTypeInfo) specialValueTypeInfo).toString();
+    return toStringFamilyObject(typeInfo, string, isWritable);
+  }
+
+  public static Object randomStringFamily(Random random, TypeInfo typeInfo,
+      StringGenerationOption stringGenerationOption, boolean isWritable) {
+
+    String string;
+    if (stringGenerationOption == null) {
+      string =
+          VectorRandomRowSource.randomPrimitiveObject(
+              random, (PrimitiveTypeInfo) typeInfo).toString();
+    } else {
+      boolean generateSentences = 
stringGenerationOption.getGenerateSentences();
+      boolean addPadding = stringGenerationOption.getAddPadding();
+      StringBuilder sb = new StringBuilder();
+      if (addPadding && random.nextBoolean()) {
+        sb.append(StringUtils.leftPad("", random.nextInt(5)));
+      }
+      if (generateSentences) {
+        boolean capitalizeFirstWord = random.nextBoolean();
+        final int n = random.nextInt(10);
+        for (int i = 0; i < n; i++) {
+          String randomWord = randomWords[random.nextInt(randomWordCount)];
+          if (randomWord.length() > 0 &&
+              ((i == 0 && capitalizeFirstWord) || random.nextInt(20) == 0)) {
+            randomWord = Character.toUpperCase(randomWord.charAt(0)) + 
randomWord.substring(1);
+          }
+          if (i > 0) {
+            sb.append(" ");
+          }
+          sb.append(randomWord);
+        }
+      } else {
+        sb.append(
+            VectorRandomRowSource.randomPrimitiveObject(
+                random, (PrimitiveTypeInfo) typeInfo).toString());
+      }
+      if (addPadding && random.nextBoolean()) {
+        sb.append(StringUtils.leftPad("", random.nextInt(5)));
+      }
+      string = sb.toString();
+    }
+    return toStringFamilyObject(typeInfo, string, isWritable);
   }
 
   public Object[][] randomRows(int n) {
@@ -577,8 +837,64 @@ public class VectorRandomRowSource {
   public Object[] randomRow() {
 
     final Object row[] = new Object[columnCount];
-    for (int c = 0; c < columnCount; c++) {
-      row[c] = randomWritable(c);
+
+    if (generationSpecList == null) {
+      for (int c = 0; c < columnCount; c++) {
+        row[c] = randomWritable(c);
+      }
+    } else {
+      for (int c = 0; c < columnCount; c++) {
+        GenerationSpec generationSpec = generationSpecList.get(c);
+        GenerationSpec.GenerationKind generationKind = 
generationSpec.getGenerationKind();
+        Object object;
+        switch (generationKind) {
+        case SAME_TYPE:
+          object = randomWritable(c);
+          break;
+        case OMIT_GENERATION:
+          object = null;
+          break;
+        case STRING_FAMILY:
+        {
+          TypeInfo typeInfo = generationSpec.getTypeInfo();
+          StringGenerationOption stringGenerationOption =
+              generationSpec.getStringGenerationOption();
+          object = randomStringFamily(
+              r, typeInfo, stringGenerationOption, true);
+        }
+        break;
+        case STRING_FAMILY_OTHER_TYPE_VALUE:
+          {
+            TypeInfo typeInfo = generationSpec.getTypeInfo();
+            TypeInfo otherTypeTypeInfo = generationSpec.getSourceTypeInfo();
+            object = randomStringFamilyOtherTypeValue(
+                r, typeInfo, otherTypeTypeInfo, true);
+          }
+          break;
+        case TIMESTAMP_MILLISECONDS:
+          {
+            LongWritable longWritable = (LongWritable) randomWritable(c);
+            if (longWritable != null) {
+
+              while (true) {
+                long longValue = longWritable.get();
+                if (longValue >= MIN_FOUR_DIGIT_YEAR_MILLIS &&
+                    longValue <= MAX_FOUR_DIGIT_YEAR_MILLIS) {
+                  break;
+                }
+                longWritable.set(
+                    (Long) VectorRandomRowSource.randomPrimitiveObject(
+                        r, (PrimitiveTypeInfo) TypeInfoFactory.longTypeInfo));
+              }
+            }
+            object = longWritable;
+          }
+          break;
+        default:
+          throw new RuntimeException("Unexpected generationKind " + 
generationKind);
+        }
+        row[c] = object;
+      }
     }
     return row;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java
index 30e8906..9e61fcd 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java
@@ -35,6 +35,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import 
org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -252,24 +253,6 @@ public class TestVectorCastStatement {
     final int decimal64Scale =
         (isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0);
 
-    List<String> explicitTypeNameList = new ArrayList<String>();
-    List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = 
new ArrayList<DataTypePhysicalVariation>();
-    explicitTypeNameList.add(typeName);
-    explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation);
-
-    VectorRandomRowSource rowSource = new VectorRandomRowSource();
-
-    rowSource.initExplicitSchema(
-        random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ 
true,
-        explicitDataTypePhysicalVariationList);
-
-    List<String> columns = new ArrayList<String>();
-    columns.add("col0");
-    ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(typeInfo, "col0", 
"table", false);
-
-    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
-    children.add(col1Expr);
-
     
//----------------------------------------------------------------------------------------------
 
     String targetTypeName;
@@ -287,53 +270,38 @@ public class TestVectorCastStatement {
 
     
//----------------------------------------------------------------------------------------------
 
-    String[] columnNames = columns.toArray(new String[0]);
-
-    Object[][] randomRows = rowSource.randomRows(100000);
-
+    GenerationSpec generationSpec;
     if (needsValidDataTypeData(targetTypeInfo) &&
         (primitiveCategory == PrimitiveCategory.STRING ||
          primitiveCategory == PrimitiveCategory.CHAR ||
          primitiveCategory == PrimitiveCategory.VARCHAR)) {
-
-      // Regenerate string family with valid data for target data type.
-      final int rowCount = randomRows.length;
-      for (int i = 0; i < rowCount; i++) {
-        Object object = randomRows[i][0];
-        if (object == null) {
-          continue;
-        }
-        String string =
-            VectorRandomRowSource.randomPrimitiveObject(
-                random, (PrimitiveTypeInfo) targetTypeInfo).toString();
-        Object newObject;
-        switch (primitiveCategory) {
-        case STRING:
-          newObject = new Text(string);
-          break;
-        case CHAR:
-          {
-            HiveChar hiveChar =
-                new HiveChar(
-                    string, ((CharTypeInfo) typeInfo).getLength());
-            newObject = new HiveCharWritable(hiveChar);
-          }
-          break;
-        case VARCHAR:
-          {
-            HiveVarchar hiveVarchar =
-                new HiveVarchar(
-                    string, ((VarcharTypeInfo) typeInfo).getLength());
-            newObject = new HiveVarcharWritable(hiveVarchar);
-          }
-          break;
-        default:
-          throw new RuntimeException("Unexpected string family category " + 
primitiveCategory);
-        }
-        randomRows[i][0] = newObject;
-      }
+      generationSpec = 
GenerationSpec.createStringFamilyOtherTypeValue(typeInfo, targetTypeInfo);
+    } else {
+      generationSpec = GenerationSpec.createSameType(typeInfo);
     }
 
+    List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
+    List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = 
new ArrayList<DataTypePhysicalVariation>();
+    generationSpecList.add(generationSpec);
+    explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation);
+
+    VectorRandomRowSource rowSource = new VectorRandomRowSource();
+
+    rowSource.initGenerationSpecSchema(
+        random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ 
true,
+        explicitDataTypePhysicalVariationList);
+
+    List<String> columns = new ArrayList<String>();
+    columns.add("col0");
+    ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(typeInfo, "col0", 
"table", false);
+
+    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+    children.add(col1Expr);
+
+    String[] columnNames = columns.toArray(new String[0]);
+
+    Object[][] randomRows = rowSource.randomRows(100000);
+
     VectorRandomBatchSource batchSource =
         VectorRandomBatchSource.createInterestingBatches(
             random,

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java
index 0f658c6..f5deca5 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java
@@ -36,6 +36,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import 
org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
@@ -156,52 +157,6 @@ public class TestVectorDateAddSub {
     }
   }
 
-  private static final String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-
-  private Object randomDateStringFamily(
-      Random random, TypeInfo dateTimeStringTypeInfo, boolean wantWritable) {
-
-    String randomDateString = 
VectorRandomRowSource.randomPrimitiveDateStringObject(random);
-    if (random.nextInt(40) == 39) {
-
-      // Randomly corrupt.
-      int index = random.nextInt(randomDateString.length());
-      char[] chars = randomDateString.toCharArray();
-      chars[index] = alphabet.charAt(random.nextInt(alphabet.length()));
-      randomDateString = String.valueOf(chars);
-    }
-
-    PrimitiveCategory dateTimeStringPrimitiveCategory =
-        ((PrimitiveTypeInfo) dateTimeStringTypeInfo).getPrimitiveCategory();
-    switch (dateTimeStringPrimitiveCategory) {
-    case STRING:
-      return randomDateString;
-    case CHAR:
-      {
-        HiveChar hiveChar =
-            new HiveChar(randomDateString, ((CharTypeInfo) 
dateTimeStringTypeInfo).getLength());
-        if (wantWritable) {
-          return new HiveCharWritable(hiveChar);
-        } else {
-          return hiveChar;
-        }
-      }
-    case VARCHAR:
-      {
-        HiveVarchar hiveVarchar =
-            new HiveVarchar(
-                randomDateString, ((VarcharTypeInfo) 
dateTimeStringTypeInfo).getLength());
-        if (wantWritable) {
-          return new HiveVarcharWritable(hiveVarchar);
-        } else {
-          return hiveVarchar;
-        }
-      }
-    default:
-      throw new RuntimeException("Unexpected string family category " + 
dateTimeStringPrimitiveCategory);
-    }
-  }
-
   private void doDateAddSubTestsWithDiffColumnScalar(Random random, String 
dateTimeStringTypeName,
       String integerTypeName, ColumnScalarMode columnScalarMode, boolean isAdd)
           throws Exception {
@@ -220,7 +175,7 @@ public class TestVectorDateAddSub {
     PrimitiveCategory integerPrimitiveCategory =
         ((PrimitiveTypeInfo) integerTypeInfo).getPrimitiveCategory();
 
-    List<String> explicitTypeNameList = new ArrayList<String>();
+    List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
     List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList =
         new ArrayList<DataTypePhysicalVariation>();
 
@@ -229,7 +184,14 @@ public class TestVectorDateAddSub {
     ExprNodeDesc col1Expr;
     if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
         columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
-      explicitTypeNameList.add(dateTimeStringTypeName);
+      if (!isStringFamily) {
+        generationSpecList.add(
+            GenerationSpec.createSameType(dateTimeStringTypeInfo));
+      } else {
+        generationSpecList.add(
+            GenerationSpec.createStringFamilyOtherTypeValue(
+                dateTimeStringTypeInfo, TypeInfoFactory.dateTypeInfo));
+      }
       
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
 
       String columnName = "col" + (columnNum++);
@@ -243,15 +205,16 @@ public class TestVectorDateAddSub {
               random, (PrimitiveTypeInfo) dateTimeStringTypeInfo);
       } else {
         scalar1Object =
-            randomDateStringFamily(
-                random, dateTimeStringTypeInfo, /* wantWritable */ false);
+            VectorRandomRowSource.randomStringFamilyOtherTypeValue(
+                random, dateTimeStringTypeInfo, TypeInfoFactory.dateTypeInfo, 
false);
       }
       col1Expr = new ExprNodeConstantDesc(dateTimeStringTypeInfo, 
scalar1Object);
     }
     ExprNodeDesc col2Expr;
     if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
         columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
-      explicitTypeNameList.add(integerTypeName);
+      generationSpecList.add(
+          GenerationSpec.createSameType(integerTypeInfo));
       
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
 
       String columnName = "col" + (columnNum++);
@@ -277,27 +240,12 @@ public class TestVectorDateAddSub {
 
     VectorRandomRowSource rowSource = new VectorRandomRowSource();
 
-    rowSource.initExplicitSchema(
-        random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ 
true,
+    rowSource.initGenerationSpecSchema(
+        random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ 
true,
         explicitDataTypePhysicalVariationList);
 
     Object[][] randomRows = rowSource.randomRows(100000);
 
-    if (isStringFamily) {
-      if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
-          columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
-        for (int i = 0; i < randomRows.length; i++) {
-          Object[] row = randomRows[i];
-          Object object = row[columnNum - 1];
-          if (row[0] != null) {
-            row[0] =
-                randomDateStringFamily(
-                    random, dateTimeStringTypeInfo, /* wantWritable */ true);
-          }
-        }
-      }
-    }
-
     if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
         columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
 

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java
index 80a1118..dce7ccf 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java
@@ -36,6 +36,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import 
org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
@@ -70,8 +71,6 @@ import org.junit.Test;
 
 public class TestVectorDateDiff {
 
-  private static final boolean corruptDateStrings = false;
-
   @Test
   public void testDateDate() throws Exception {
     Random random = new Random(7743);
@@ -152,52 +151,6 @@ public class TestVectorDateDiff {
     }
   }
 
-  private static final String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-
-  private Object randomDateStringFamily(
-      Random random, TypeInfo dateTimeStringTypeInfo, boolean wantWritable) {
-
-    String randomDateString = 
VectorRandomRowSource.randomPrimitiveDateStringObject(random);
-    if (corruptDateStrings && random.nextInt(40) == 39) {
-
-      // Randomly corrupt.
-      int index = random.nextInt(randomDateString.length());
-      char[] chars = randomDateString.toCharArray();
-      chars[index] = alphabet.charAt(random.nextInt(alphabet.length()));
-      randomDateString = String.valueOf(chars);
-    }
-
-    PrimitiveCategory dateTimeStringPrimitiveCategory =
-        ((PrimitiveTypeInfo) dateTimeStringTypeInfo).getPrimitiveCategory();
-    switch (dateTimeStringPrimitiveCategory) {
-    case STRING:
-      return randomDateString;
-    case CHAR:
-      {
-        HiveChar hiveChar =
-            new HiveChar(randomDateString, ((CharTypeInfo) 
dateTimeStringTypeInfo).getLength());
-        if (wantWritable) {
-          return new HiveCharWritable(hiveChar);
-        } else {
-          return hiveChar;
-        }
-      }
-    case VARCHAR:
-      {
-        HiveVarchar hiveVarchar =
-            new HiveVarchar(
-                randomDateString, ((VarcharTypeInfo) 
dateTimeStringTypeInfo).getLength());
-        if (wantWritable) {
-          return new HiveVarcharWritable(hiveVarchar);
-        } else {
-          return hiveVarchar;
-        }
-      }
-    default:
-      throw new RuntimeException("Unexpected string family category " + 
dateTimeStringPrimitiveCategory);
-    }
-  }
-
   private void doDateDiffTestsWithDiffColumnScalar(Random random, String 
dateTimeStringTypeName1,
       String dateTimeStringTypeName2, ColumnScalarMode columnScalarMode)
           throws Exception {
@@ -220,7 +173,7 @@ public class TestVectorDateDiff {
          dateTimeStringPrimitiveCategory2 == PrimitiveCategory.CHAR ||
          dateTimeStringPrimitiveCategory2 == PrimitiveCategory.VARCHAR);
 
-    List<String> explicitTypeNameList = new ArrayList<String>();
+    List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
     List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList =
         new ArrayList<DataTypePhysicalVariation>();
 
@@ -229,7 +182,14 @@ public class TestVectorDateDiff {
     ExprNodeDesc col1Expr;
     if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
         columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
-      explicitTypeNameList.add(dateTimeStringTypeName1);
+      if (!isStringFamily1) {
+        generationSpecList.add(
+            GenerationSpec.createSameType(dateTimeStringTypeInfo1));
+      } else {
+        generationSpecList.add(
+            GenerationSpec.createStringFamilyOtherTypeValue(
+                dateTimeStringTypeInfo1, TypeInfoFactory.dateTypeInfo));
+      }
       
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
 
       String columnName = "col" + (columnNum++);
@@ -243,15 +203,23 @@ public class TestVectorDateDiff {
               random, (PrimitiveTypeInfo) dateTimeStringTypeInfo1);
       } else {
         scalar1Object =
-            randomDateStringFamily(
-                random, dateTimeStringTypeInfo1, /* wantWritable */ false);
+            VectorRandomRowSource.randomStringFamilyOtherTypeValue(
+                random, dateTimeStringTypeInfo1, TypeInfoFactory.dateTypeInfo, 
false);
       }
       col1Expr = new ExprNodeConstantDesc(dateTimeStringTypeInfo1, 
scalar1Object);
     }
     ExprNodeDesc col2Expr;
     if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
         columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
-      explicitTypeNameList.add(dateTimeStringTypeName2);
+      if (!isStringFamily2) {
+        generationSpecList.add(
+            GenerationSpec.createSameType(dateTimeStringTypeInfo2));
+      } else {
+        generationSpecList.add(
+            GenerationSpec.createStringFamilyOtherTypeValue(
+                dateTimeStringTypeInfo2, TypeInfoFactory.dateTypeInfo));
+      }
+
       
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
 
       String columnName = "col" + (columnNum++);
@@ -265,8 +233,8 @@ public class TestVectorDateDiff {
               random, (PrimitiveTypeInfo) dateTimeStringTypeInfo2);
       } else {
         scalar2Object =
-            randomDateStringFamily(
-                random, dateTimeStringTypeInfo2, /* wantWritable */ false);
+            VectorRandomRowSource.randomStringFamilyOtherTypeValue(
+                random, dateTimeStringTypeInfo2, TypeInfoFactory.dateTypeInfo, 
false);
       }
       col2Expr = new ExprNodeConstantDesc(dateTimeStringTypeInfo2, 
scalar2Object);
     }
@@ -281,42 +249,12 @@ public class TestVectorDateDiff {
 
     VectorRandomRowSource rowSource = new VectorRandomRowSource();
 
-    rowSource.initExplicitSchema(
-        random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ 
true,
+    rowSource.initGenerationSpecSchema(
+        random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ 
true,
         explicitDataTypePhysicalVariationList);
 
     Object[][] randomRows = rowSource.randomRows(100000);
 
-    if (isStringFamily1) {
-      if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
-          columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
-        for (int i = 0; i < randomRows.length; i++) {
-          Object[] row = randomRows[i];
-          Object object = row[columnNum - 1];
-          if (row[0] != null) {
-            row[0] =
-                randomDateStringFamily(
-                    random, dateTimeStringTypeInfo1, /* wantWritable */ true);
-          }
-        }
-      }
-    }
-
-    if (isStringFamily2) {
-      if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
-          columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
-        for (int i = 0; i < randomRows.length; i++) {
-          Object[] row = randomRows[i];
-          Object object = row[columnNum - 1];
-          if (row[columnNum - 1] != null) {
-            row[columnNum - 1] =
-                randomDateStringFamily(
-                    random, dateTimeStringTypeInfo2, /* wantWritable */ true);
-          }
-        }
-      }
-    }
-
     VectorRandomBatchSource batchSource =
         VectorRandomBatchSource.createInterestingBatches(
             random,

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java
new file mode 100644
index 0000000..a87a8b4
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java
@@ -0,0 +1,427 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import 
org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateAdd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateSub;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+public class TestVectorStringConcat {
+
+  @Test
+  public void testString() throws Exception {
+    Random random = new Random(12882);
+
+    doStringConcatTests(random, "string", "string");
+  }
+
+  @Test
+  public void testChar() throws Exception {
+    Random random = new Random(12882);
+
+    doStringConcatTests(random, "char(20)", "char(10)");
+    doStringConcatTests(random, "char(20)", "string");
+    doStringConcatTests(random, "char(20)", "varchar(10)");
+    doStringConcatTests(random, "string", "char(10)");
+  }
+
+  @Test
+  public void testVarchar() throws Exception {
+    Random random = new Random(12882);
+
+    doStringConcatTests(random, "varchar(20)", "varchar(10)");
+    doStringConcatTests(random, "varchar(20)", "string");
+    doStringConcatTests(random, "varchar(20)", "char(10)");
+    doStringConcatTests(random, "string", "varchar(10)");
+  }
+
+  public enum StringConcatTestMode {
+    ROW_MODE,
+    ADAPTOR,
+    VECTOR_EXPRESSION;
+
+    static final int count = values().length;
+  }
+
+  public enum ColumnScalarMode {
+    COLUMN_COLUMN,
+    COLUMN_SCALAR,
+    SCALAR_COLUMN;
+
+    static final int count = values().length;
+  }
+
+  private void doStringConcatTests(Random random, String stringTypeName1, 
String stringTypeName2)
+          throws Exception {
+    for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) {
+      doStringConcatTestsWithDiffColumnScalar(
+          random, stringTypeName1, stringTypeName2, columnScalarMode);
+    }
+  }
+
+  private void doStringConcatTestsWithDiffColumnScalar(Random random,
+      String stringTypeName1, String stringTypeName2, ColumnScalarMode 
columnScalarMode)
+          throws Exception {
+
+    TypeInfo stringTypeInfo1 =
+        TypeInfoUtils.getTypeInfoFromTypeString(stringTypeName1);
+    PrimitiveCategory stringPrimitiveCategory1 =
+        ((PrimitiveTypeInfo) stringTypeInfo1).getPrimitiveCategory();
+
+    TypeInfo stringTypeInfo2 =
+        TypeInfoUtils.getTypeInfoFromTypeString(stringTypeName2);
+    PrimitiveCategory stringPrimitiveCategory2 =
+        ((PrimitiveTypeInfo) stringTypeInfo2).getPrimitiveCategory();
+
+    String functionName = "concat";
+
+    List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>();
+    List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList =
+        new ArrayList<DataTypePhysicalVariation>();
+
+    List<String> columns = new ArrayList<String>();
+    int columnNum = 0;
+    ExprNodeDesc col1Expr;
+    if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
+        columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) {
+      generationSpecList.add(
+          GenerationSpec.createSameType(stringTypeInfo1));
+
+      
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
+
+      String columnName = "col" + (columnNum++);
+      col1Expr = new ExprNodeColumnDesc(stringTypeInfo1, columnName, "table", 
false);
+      columns.add(columnName);
+    } else {
+      Object scalar1Object =
+          VectorRandomRowSource.randomPrimitiveObject(
+              random, (PrimitiveTypeInfo) stringTypeInfo1);
+      col1Expr = new ExprNodeConstantDesc(stringTypeInfo1, scalar1Object);
+    }
+    ExprNodeDesc col2Expr;
+    if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN ||
+        columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) {
+      generationSpecList.add(
+          GenerationSpec.createSameType(stringTypeInfo2));
+      
explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE);
+
+      String columnName = "col" + (columnNum++);
+      col2Expr = new ExprNodeColumnDesc(stringTypeInfo2, columnName, "table", 
false);
+      columns.add(columnName);
+    } else {
+      Object scalar2Object =
+          VectorRandomRowSource.randomPrimitiveObject(
+              random, (PrimitiveTypeInfo) stringTypeInfo2);
+      col2Expr = new ExprNodeConstantDesc(stringTypeInfo2, scalar2Object);
+    }
+
+    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+    children.add(col1Expr);
+    children.add(col2Expr);
+
+    
//----------------------------------------------------------------------------------------------
+
+    String[] columnNames = columns.toArray(new String[0]);
+
+    VectorRandomRowSource rowSource = new VectorRandomRowSource();
+
+    rowSource.initGenerationSpecSchema(
+        random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ 
true,
+        explicitDataTypePhysicalVariationList);
+
+    Object[][] randomRows = rowSource.randomRows(100000);
+
+    VectorRandomBatchSource batchSource =
+        VectorRandomBatchSource.createInterestingBatches(
+            random,
+            rowSource,
+            randomRows,
+            null);
+
+    String[] outputScratchTypeNames = new String[] { "string" };
+
+    VectorizedRowBatchCtx batchContext =
+        new VectorizedRowBatchCtx(
+            columnNames,
+            rowSource.typeInfos(),
+            rowSource.dataTypePhysicalVariations(),
+            /* dataColumnNums */ null,
+            /* partitionColumnCount */ 0,
+            /* virtualColumnCount */ 0,
+            /* neededVirtualColumns */ null,
+            outputScratchTypeNames,
+            null);
+
+    GenericUDF genericUdf;
+    FunctionInfo funcInfo = null;
+    try {
+      funcInfo = FunctionRegistry.getFunctionInfo(functionName);
+    } catch (SemanticException e) {
+      Assert.fail("Failed to load " + functionName + " " + e);
+    }
+    genericUdf = funcInfo.getGenericUDF();
+
+    final int rowCount = randomRows.length;
+    Object[][] resultObjectsArray = new Object[StringConcatTestMode.count][];
+    for (int i = 0; i < StringConcatTestMode.count; i++) {
+
+      Object[] resultObjects = new Object[rowCount];
+      resultObjectsArray[i] = resultObjects;
+
+       StringConcatTestMode stringConcatTestMode = 
StringConcatTestMode.values()[i];
+      switch (stringConcatTestMode) {
+      case ROW_MODE:
+        doRowStringConcatTest(
+            stringTypeInfo1,
+            stringTypeInfo2,
+            columns,
+            children,
+            randomRows,
+            columnScalarMode,
+            rowSource.rowStructObjectInspector(),
+            genericUdf,
+            resultObjects);
+        break;
+      case ADAPTOR:
+      case VECTOR_EXPRESSION:
+        doVectorStringConcatTest(
+            stringTypeInfo1,
+            stringTypeInfo2,
+            columns,
+            rowSource.typeInfos(),
+            children,
+            stringConcatTestMode,
+            columnScalarMode,
+            batchSource,
+            batchContext,
+            rowSource.rowStructObjectInspector(),
+            genericUdf,
+            resultObjects);
+        break;
+      default:
+        throw new RuntimeException("Unexpected IF statement test mode " + 
stringConcatTestMode);
+      }
+    }
+
+    for (int i = 0; i < rowCount; i++) {
+      // Row-mode is the expected value.
+      Object expectedResult = resultObjectsArray[0][i];
+
+      for (int v = 1; v < StringConcatTestMode.count; v++) {
+        Object vectorResult = resultObjectsArray[v][i];
+        if (expectedResult == null || vectorResult == null) {
+          if (expectedResult != null || vectorResult != null) {
+            Assert.fail(
+                "Row " + i + " " + StringConcatTestMode.values()[v] +
+                " " + columnScalarMode +
+                " result is NULL " + (vectorResult == null) +
+                " does not match row-mode expected result is NULL " + 
(expectedResult == null) +
+                " row values " + Arrays.toString(randomRows[i]));
+          }
+        } else {
+
+          if (!expectedResult.equals(vectorResult)) {
+            Assert.fail(
+                "Row " + i + " " + StringConcatTestMode.values()[v] +
+                " " + columnScalarMode +
+                " result \"" + vectorResult.toString() + "\"" +
+                " (" + vectorResult.getClass().getSimpleName() + ")" +
+                " does not match row-mode expected result \"" + 
expectedResult.toString() + "\"" +
+                " (" + expectedResult.getClass().getSimpleName() + ")" +
+                " row values " + Arrays.toString(randomRows[i]));
+          }
+        }
+      }
+    }
+  }
+
+  private void doRowStringConcatTest(TypeInfo stringTypeInfo, TypeInfo 
integerTypeInfo,
+      List<String> columns, List<ExprNodeDesc> children,
+      Object[][] randomRows, ColumnScalarMode columnScalarMode,
+      ObjectInspector rowInspector,
+      GenericUDF genericUdf, Object[] resultObjects) throws Exception {
+
+    System.out.println(
+        "*DEBUG* stringTypeInfo " + stringTypeInfo.toString() +
+        " integerTypeInfo " + integerTypeInfo +
+        " stringConcatTestMode ROW_MODE" +
+        " columnScalarMode " + columnScalarMode +
+        " genericUdf " + genericUdf.toString());
+
+    ExprNodeGenericFuncDesc exprDesc =
+        new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, 
genericUdf, children);
+
+    HiveConf hiveConf = new HiveConf();
+    ExprNodeEvaluator evaluator =
+        ExprNodeEvaluatorFactory.get(exprDesc, hiveConf);
+    evaluator.initialize(rowInspector);
+
+    ObjectInspector objectInspector = evaluator.getOutputOI();
+ 
+    final int rowCount = randomRows.length;
+    for (int i = 0; i < rowCount; i++) {
+      Object[] row = randomRows[i];
+      Object result = evaluator.evaluate(row);
+      Object copyResult =
+          ObjectInspectorUtils.copyToStandardObject(
+              result, objectInspector, ObjectInspectorCopyOption.WRITABLE);
+      resultObjects[i] = copyResult;
+    }
+  }
+
+  private void extractResultObjects(VectorizedRowBatch batch, int rowIndex,
+      VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow,
+      ObjectInspector objectInspector, Object[] resultObjects) {
+
+    boolean selectedInUse = batch.selectedInUse;
+    int[] selected = batch.selected;
+    for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) {
+      final int batchIndex = (selectedInUse ? selected[logicalIndex] : 
logicalIndex);
+      resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow);
+
+      Object copyResult =
+          ObjectInspectorUtils.copyToStandardObject(
+              scrqtchRow[0], objectInspector, 
ObjectInspectorCopyOption.WRITABLE);
+      resultObjects[rowIndex++] = copyResult;
+    }
+  }
+
+  private void doVectorStringConcatTest(TypeInfo stringTypeInfo1, TypeInfo 
stringTypeInfo2,
+      List<String> columns,
+      TypeInfo[] typeInfos,
+      List<ExprNodeDesc> children,
+      StringConcatTestMode stringConcatTestMode, ColumnScalarMode 
columnScalarMode,
+      VectorRandomBatchSource batchSource, VectorizedRowBatchCtx batchContext,
+      ObjectInspector rowInspector,
+      GenericUDF genericUdf, Object[] resultObjects)
+          throws Exception {
+
+    HiveConf hiveConf = new HiveConf();
+    if (stringConcatTestMode == StringConcatTestMode.ADAPTOR) {
+      hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, 
true);
+    }
+
+    DataTypePhysicalVariation[] dataTypePhysicalVariations = new 
DataTypePhysicalVariation[2];
+    Arrays.fill(dataTypePhysicalVariations, DataTypePhysicalVariation.NONE);
+
+    ExprNodeGenericFuncDesc exprDesc =
+        new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, 
genericUdf, children);
+
+    //---------------------------------------
+    // Just so we can get the output type...
+
+    ExprNodeEvaluator evaluator =
+        ExprNodeEvaluatorFactory.get(exprDesc, hiveConf);
+    evaluator.initialize(rowInspector);
+
+    ObjectInspector objectInspector = evaluator.getOutputOI();
+    TypeInfo outputTypeInfo = 
TypeInfoUtils.getTypeInfoFromObjectInspector(objectInspector);
+
+    /*
+     * Again with correct output type...
+     */
+    exprDesc =
+        new ExprNodeGenericFuncDesc(outputTypeInfo, genericUdf, children);
+    //---------------------------------------
+
+    VectorizationContext vectorizationContext =
+        new VectorizationContext(
+            "name",
+            columns,
+            Arrays.asList(typeInfos),
+            Arrays.asList(dataTypePhysicalVariations),
+            hiveConf);
+    VectorExpression vectorExpression = 
vectorizationContext.getVectorExpression(exprDesc);
+    vectorExpression.transientInit();
+
+    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
+
+    VectorExtractRow resultVectorExtractRow = new VectorExtractRow();
+    resultVectorExtractRow.init(
+        new TypeInfo[] { outputTypeInfo }, new int[] { columns.size() });
+    Object[] scrqtchRow = new Object[1];
+
+    System.out.println(
+        "*DEBUG* stringTypeInfo1 " + stringTypeInfo1.toString() +
+        " stringTypeInfo2 " + stringTypeInfo2.toString() +
+        " stringConcatTestMode " + stringConcatTestMode +
+        " columnScalarMode " + columnScalarMode +
+        " vectorExpression " + vectorExpression.toString());
+
+    batchSource.resetBatchIteration();
+    int rowIndex = 0;
+    while (true) {
+      if (!batchSource.fillNextBatch(batch)) {
+        break;
+      }
+      vectorExpression.evaluate(batch);
+      extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow,
+          objectInspector, resultObjects);
+      rowIndex += batch.size;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
index 65daeaa..902f29e 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java
@@ -4541,8 +4541,8 @@ public class TestVectorStringExpressions {
 
     // has nulls, not repeating
     VectorizedRowBatch batch = makeStringBatch();
-    StringGroupColConcatCharScalar expr =
-        new StringGroupColConcatCharScalar(
+    StringGroupColConcatStringScalar expr =
+        new StringGroupColConcatStringScalar(
             0, new HiveChar(new String(red), 6).getStrippedValue().getBytes(), 
1);
     expr.evaluate(batch);
     BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
@@ -4605,7 +4605,9 @@ public class TestVectorStringExpressions {
 
     // has nulls, not repeating
     VectorizedRowBatch batch = makeStringBatch();
-    StringGroupColConcatVarCharScalar expr = new 
StringGroupColConcatVarCharScalar(0, new HiveVarchar(new String(red), 14), 1);
+    StringGroupColConcatStringScalar expr =
+        new StringGroupColConcatStringScalar(
+            0, new HiveVarchar(new String(red), 14).getValue().getBytes(), 1);
     expr.evaluate(batch);
     BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
 
@@ -4729,8 +4731,8 @@ public class TestVectorStringExpressions {
 
     // has nulls, not repeating
     VectorizedRowBatch batch = makeStringBatch();
-    CharScalarConcatStringGroupCol expr =
-        new CharScalarConcatStringGroupCol(
+    StringScalarConcatStringGroupCol expr =
+        new StringScalarConcatStringGroupCol(
             new HiveChar(new String(red), 6).getStrippedValue().getBytes(), 0, 
1);
     expr.evaluate(batch);
     BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];
@@ -4793,7 +4795,9 @@ public class TestVectorStringExpressions {
 
     // has nulls, not repeating
     VectorizedRowBatch batch = makeStringBatch();
-    VarCharScalarConcatStringGroupCol expr = new 
VarCharScalarConcatStringGroupCol(new HiveVarchar(new String(red), 14), 0, 1);
+    StringScalarConcatStringGroupCol expr =
+        new StringScalarConcatStringGroupCol(
+            new HiveVarchar(new String(red), 14).getValue().getBytes(), 0, 1);
     expr.evaluate(batch);
     BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];

[2/2] hive git commit: HIVE-19565: Vectorization: Fix NULL / Wrong Results issues in STRING Functions (Matt McCline, reviewed by Teddy Choi)

Reply via email to