HIVE-19565: Vectorization: Fix NULL / Wrong Results issues in STRING Functions (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3f6a160e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3f6a160e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3f6a160e Branch: refs/heads/branch-3 Commit: 3f6a160ed84d63377cd7922f095f6846829fedd8 Parents: abc504c Author: Matt McCline <mmccl...@hortonworks.com> Authored: Fri Jun 15 09:43:19 2018 -0500 Committer: Matt McCline <mmccl...@hortonworks.com> Committed: Fri Jun 15 09:43:19 2018 -0500 ---------------------------------------------------------------------- .../CharScalarConcatStringGroupCol.java | 51 --- .../StringGroupColConcatCharScalar.java | 51 --- .../StringGroupColConcatStringScalar.java | 3 +- .../StringGroupColConcatVarCharScalar.java | 52 --- .../expressions/StringGroupConcatColCol.java | 2 +- .../ql/exec/vector/expressions/StringLTrim.java | 25 +- .../ql/exec/vector/expressions/StringRTrim.java | 24 +- .../StringScalarConcatStringGroupCol.java | 2 +- .../expressions/StringSubstrColStart.java | 11 +- .../expressions/StringSubstrColStartLen.java | 11 +- .../ql/exec/vector/expressions/StringTrim.java | 35 +- .../VarCharScalarConcatStringGroupCol.java | 52 --- .../hive/ql/udf/generic/GenericUDFConcat.java | 11 +- .../ql/exec/vector/VectorRandomRowSource.java | 332 +++++++++++++- .../expressions/TestVectorCastStatement.java | 86 ++-- .../expressions/TestVectorDateAddSub.java | 84 +--- .../vector/expressions/TestVectorDateDiff.java | 112 ++--- .../expressions/TestVectorStringConcat.java | 427 +++++++++++++++++++ .../TestVectorStringExpressions.java | 16 +- .../expressions/TestVectorStringUnary.java | 368 ++++++++++++++++ .../vector/expressions/TestVectorSubStr.java | 347 +++++++++++++++ .../expressions/TestVectorTimestampExtract.java | 81 +--- 22 files changed, 1634 insertions(+), 549 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java deleted file mode 100644 index 712b8de..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Vectorized instruction to concatenate a scalar to a string column and put - * the result in an output column. - */ -public class CharScalarConcatStringGroupCol extends StringScalarConcatStringGroupCol { - private static final long serialVersionUID = 1L; - - public CharScalarConcatStringGroupCol(byte[] value, int colNum, int outputColumnNum) { - super(value, colNum, outputColumnNum); - } - - public CharScalarConcatStringGroupCol() { - super(); - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.CHAR, - VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java deleted file mode 100644 index bbebe6c..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Vectorized instruction to concatenate a string column to a scalar and put - * the result in an output column. - */ -public class StringGroupColConcatCharScalar extends StringGroupColConcatStringScalar { - private static final long serialVersionUID = 1L; - - public StringGroupColConcatCharScalar(int colNum, byte[] value, int outputColumnNum) { - super(colNum, value, outputColumnNum); - } - - public StringGroupColConcatCharScalar() { - super(); - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, - VectorExpressionDescriptor.ArgumentType.CHAR) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java index 9194e8b..896de85 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -164,7 +163,7 @@ public class StringGroupColConcatStringScalar extends VectorExpression { .setNumArguments(2) .setArgumentTypes( VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, - VectorExpressionDescriptor.ArgumentType.STRING) + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java deleted file mode 100644 index 7349410..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Vectorized instruction to concatenate a string column to a scalar and put - * the result in an output column. - */ -public class StringGroupColConcatVarCharScalar extends StringGroupColConcatStringScalar { - private static final long serialVersionUID = 1L; - - public StringGroupColConcatVarCharScalar(int colNum, HiveVarchar value, int outputColumnNum) { - super(colNum, value.getValue().getBytes(), outputColumnNum); - } - - public StringGroupColConcatVarCharScalar() { - super(); - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, - VectorExpressionDescriptor.ArgumentType.VARCHAR) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java index 1c9433b..ceb7b26 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java @@ -128,7 +128,7 @@ public class StringGroupConcatColCol extends VectorExpression { } } else { for(int i = 0; i != n; i++) { - if (!inV2.isNull[0]) { + if (!inV2.isNull[i]) { outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); } } http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java index 84f03cc..8a41e76 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLTrim.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; public class StringLTrim extends StringUnaryUDFDirect { private static final long serialVersionUID = 1L; + private static final byte[] EMPTY_BYTES = new byte[0]; + public StringLTrim(int inputColumn, int outputColumnNum) { super(inputColumn, outputColumnNum); } @@ -36,14 +38,23 @@ public class StringLTrim extends StringUnaryUDFDirect { * Operate on the data in place, and set the output by reference * to improve performance. Ignore null handling. That will be handled separately. */ - protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) { - int j = start[i]; - - // skip past blank characters - while(j < start[i] + vector[i].length && vector[i][j] == 0x20) { - j++; + protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, + int batchIndex) { + byte[] bytes = vector[batchIndex]; + final int startIndex = start[batchIndex]; + + // Skip past blank characters. + final int end = startIndex + length[batchIndex]; + int index = startIndex; + while(index < end && bytes[index] == 0x20) { + index++; } - outV.setVal(i, vector[i], j, length[i] - (j - start[i])); + final int resultLength = end - index; + if (resultLength == 0) { + outV.setVal(batchIndex, EMPTY_BYTES, 0, 0); + return; + } + outV.setVal(batchIndex, bytes, index, resultLength); } } http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java index 5c087aa..6a3a220 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringRTrim.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; public class StringRTrim extends StringUnaryUDFDirect { private static final long serialVersionUID = 1L; + private static final byte[] EMPTY_BYTES = new byte[0]; + public StringRTrim(int inputColumn, int outputColumnNum) { super(inputColumn, outputColumnNum); } @@ -36,15 +38,23 @@ public class StringRTrim extends StringUnaryUDFDirect { * Operate on the data in place, and set the output by reference * to improve performance. Ignore null handling. That will be handled separately. */ - protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) { - int j = start[i] + length[i] - 1; + protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, + int batchIndex) { + + byte[] bytes = vector[batchIndex]; + final int startIndex = start[batchIndex]; - // skip trailing blank characters - while(j >= start[i] && vector[i][j] == 0x20) { - j--; + // Skip trailing blank characters. + int index = startIndex + length[batchIndex] - 1; + while(index >= startIndex && bytes[index] == 0x20) { + index--; } - // set output vector - outV.setVal(i, vector[i], start[i], (j - start[i]) + 1); + final int resultLength = index - startIndex + 1; + if (resultLength == 0) { + outV.setVal(batchIndex, EMPTY_BYTES, 0, 0); + return; + } + outV.setVal(batchIndex, bytes, startIndex, resultLength); } } http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java index db679b0..b099910 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java @@ -163,7 +163,7 @@ public class StringScalarConcatStringGroupCol extends VectorExpression { VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.SCALAR, http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java index 411fc4b..2b97504 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java @@ -146,12 +146,7 @@ public class StringSubstrColStart extends VectorExpression { outputColVector.isRepeating = false; if (inV.isRepeating) { - if (!inV.noNulls && inV.isNull[0]) { - outputIsNull[0] = true; - outputColVector.noNulls = false; - outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); - return; - } else { + if (inV.noNulls || !inV.isNull[0]) { outputIsNull[0] = false; int offset = getSubstrStartOffset(vector[0], start[0], len[0], startIdx); if (offset != -1) { @@ -159,6 +154,10 @@ public class StringSubstrColStart extends VectorExpression { } else { outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); } + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); } outputColVector.isRepeating = true; return; http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java index e28c0a7..fff3032 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java @@ -167,12 +167,7 @@ public class StringSubstrColStartLen extends VectorExpression { outputColVector.isRepeating = false; if (inV.isRepeating) { - - if (!inV.noNulls && inV.isNull[0]) { - outputIsNull[0] = true; - outputColVector.noNulls = false; - outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); - } else { + if (inV.noNulls || !inV.isNull[0]) { outputIsNull[0] = false; populateSubstrOffsets(vector[0], start[0], len[0], startIdx, length, offsetArray); if (offsetArray[0] != -1) { @@ -180,6 +175,10 @@ public class StringSubstrColStartLen extends VectorExpression { } else { outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); } + } else { + outputIsNull[0] = true; + outputColVector.noNulls = false; + outputColVector.setVal(0, EMPTY_STRING, 0, EMPTY_STRING.length); } outputColVector.isRepeating = true; return; http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java index 458ac7d..76afe7c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringTrim.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; public class StringTrim extends StringUnaryUDFDirect { private static final long serialVersionUID = 1L; + private static final byte[] EMPTY_BYTES = new byte[0]; + public StringTrim(int inputColumn, int outputColumnNum) { super(inputColumn, outputColumnNum); } @@ -37,20 +39,31 @@ public class StringTrim extends StringUnaryUDFDirect { * Operate on the data in place, and set the output by reference * to improve performance. Ignore null handling. That will be handled separately. */ - protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) { - int l = start[i]; - int r = start[i] + length[i] - 1; + protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, + int batchIndex) { - // skip blank character on left - while(l <= r && vector[i][l] == 0x20) { - l++; + byte[] bytes = vector[batchIndex]; + final int startIndex = start[batchIndex]; + final int end = startIndex + length[batchIndex]; + int leftIndex = startIndex; + while(leftIndex < end && bytes[leftIndex] == 0x20) { + leftIndex++; } - - // skip blank characters on right - while(l <= r && vector[i][r] == 0x20) { - r--; + if (leftIndex == end) { + outV.setVal(batchIndex, EMPTY_BYTES, 0, 0); + return; } - outV.setVal(i, vector[i], l, (r - l) + 1); + // Have at least 1 non-blank; Skip trailing blank characters. + int rightIndex = end - 1; + final int rightLimit = leftIndex + 1; + while(rightIndex >= rightLimit && bytes[rightIndex] == 0x20) { + rightIndex--; + } + final int resultLength = rightIndex - leftIndex + 1; + if (resultLength <= 0) { + throw new RuntimeException("Not expected"); + } + outV.setVal(batchIndex, bytes, leftIndex, resultLength); } } http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java deleted file mode 100644 index 76e83e0..0000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Vectorized instruction to concatenate a scalar to a string column and put - * the result in an output column. - */ -public class VarCharScalarConcatStringGroupCol extends StringScalarConcatStringGroupCol { - private static final long serialVersionUID = 1L; - - public VarCharScalarConcatStringGroupCol(HiveVarchar value, int colNum, int outputColumnNum) { - super(value.getValue().getBytes(), colNum, outputColumnNum); - } - - public VarCharScalarConcatStringGroupCol() { - super(); - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.VARCHAR, - VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java index 62a7560..92588dd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java @@ -25,11 +25,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupConcatColCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatStringScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatCharScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatVarCharScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringScalarConcatStringGroupCol; -import org.apache.hadoop.hive.ql.exec.vector.expressions.CharScalarConcatStringGroupCol; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VarCharScalarConcatStringGroupCol; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -53,11 +49,10 @@ extended = "Returns NULL if any argument is NULL.\n" + "Example:\n" + " > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n" + " 'abcdef'") -@VectorizedExpressions({StringGroupConcatColCol.class, +@VectorizedExpressions({ + StringGroupConcatColCol.class, StringGroupColConcatStringScalar.class, - StringGroupColConcatCharScalar.class, StringGroupColConcatVarCharScalar.class, - StringScalarConcatStringGroupCol.class, - CharScalarConcatStringGroupCol.class, VarCharScalarConcatStringGroupCol.class}) + StringScalarConcatStringGroupCol.class}) public class GenericUDFConcat extends GenericUDF { private transient ObjectInspector[] argumentOIs; private transient StringConverter[] stringConverters; http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java index 641ff10..0e4dcfd 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -20,12 +20,17 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.sql.Date; import java.sql.Timestamp; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.text.ParseException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Random; import java.util.Set; +import org.apache.commons.lang.StringUtils; + import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -81,6 +86,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hive.common.util.DateUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; import com.google.common.base.Preconditions; import com.google.common.base.Charsets; @@ -114,6 +120,8 @@ public class VectorRandomRowSource { private StructObjectInspector rowStructObjectInspector; + private List<GenerationSpec> generationSpecList; + private String[] alphabets; private boolean allowNull; @@ -121,6 +129,92 @@ public class VectorRandomRowSource { private boolean addEscapables; private String needsEscapeStr; + public static class StringGenerationOption { + + private boolean generateSentences; + private boolean addPadding; + + public StringGenerationOption(boolean generateSentences, boolean addPadding) { + this.generateSentences = generateSentences; + this.addPadding = addPadding; + } + + public boolean getGenerateSentences() { + return generateSentences; + } + + public boolean getAddPadding() { + return addPadding; + } + } + + public static class GenerationSpec { + + public static enum GenerationKind { + SAME_TYPE, + OMIT_GENERATION, + STRING_FAMILY, + STRING_FAMILY_OTHER_TYPE_VALUE, + TIMESTAMP_MILLISECONDS + } + + private final GenerationKind generationKind; + private final TypeInfo typeInfo; + private final TypeInfo sourceTypeInfo; + private final StringGenerationOption stringGenerationOption; + + private GenerationSpec(GenerationKind generationKind, TypeInfo typeInfo, + TypeInfo sourceTypeInfo, StringGenerationOption stringGenerationOption) { + this.generationKind = generationKind; + this.typeInfo = typeInfo; + this.sourceTypeInfo = sourceTypeInfo; + this.stringGenerationOption = stringGenerationOption; + } + + public GenerationKind getGenerationKind() { + return generationKind; + } + + public TypeInfo getTypeInfo() { + return typeInfo; + } + + public TypeInfo getSourceTypeInfo() { + return sourceTypeInfo; + } + + public StringGenerationOption getStringGenerationOption() { + return stringGenerationOption; + } + + public static GenerationSpec createSameType(TypeInfo typeInfo) { + return new GenerationSpec( + GenerationKind.SAME_TYPE, typeInfo, null, null); + } + + public static GenerationSpec createOmitGeneration(TypeInfo typeInfo) { + return new GenerationSpec( + GenerationKind.OMIT_GENERATION, typeInfo, null, null); + } + + public static GenerationSpec createStringFamily(TypeInfo typeInfo, + StringGenerationOption stringGenerationOption) { + return new GenerationSpec( + GenerationKind.STRING_FAMILY, typeInfo, null, stringGenerationOption); + } + + public static GenerationSpec createStringFamilyOtherTypeValue(TypeInfo typeInfo, + TypeInfo otherTypeTypeInfo) { + return new GenerationSpec( + GenerationKind.STRING_FAMILY_OTHER_TYPE_VALUE, typeInfo, otherTypeTypeInfo, null); + } + + public static GenerationSpec createTimestampMilliseconds(TypeInfo typeInfo) { + return new GenerationSpec( + GenerationKind.TIMESTAMP_MILLISECONDS, typeInfo, null, null); + } + } + public List<String> typeNames() { return typeNames; } @@ -186,8 +280,26 @@ public class VectorRandomRowSource { boolean allowNull, List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList) { this.r = r; this.allowNull = allowNull; + + List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>(); + for (String explicitTypeName : explicitTypeNameList) { + TypeInfo typeInfo = + TypeInfoUtils.getTypeInfoFromTypeString(explicitTypeName); + generationSpecList.add( + GenerationSpec.createSameType(typeInfo)); + } + + chooseSchema( + SupportedTypes.ALL, null, generationSpecList, explicitDataTypePhysicalVariationList, + maxComplexDepth); + } + + public void initGenerationSpecSchema(Random r, List<GenerationSpec> generationSpecList, int maxComplexDepth, + boolean allowNull, List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList) { + this.r = r; + this.allowNull = allowNull; chooseSchema( - SupportedTypes.ALL, null, explicitTypeNameList, explicitDataTypePhysicalVariationList, + SupportedTypes.ALL, null, generationSpecList, explicitDataTypePhysicalVariationList, maxComplexDepth); } @@ -418,14 +530,14 @@ public class VectorRandomRowSource { } private void chooseSchema(SupportedTypes supportedTypes, Set<String> allowedTypeNameSet, - List<String> explicitTypeNameList, + List<GenerationSpec> generationSpecList, List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList, int maxComplexDepth) { HashSet<Integer> hashSet = null; final boolean allTypes; final boolean onlyOne; - if (explicitTypeNameList != null) { - columnCount = explicitTypeNameList.size(); + if (generationSpecList != null) { + columnCount = generationSpecList.size(); allTypes = false; onlyOne = false; } else if (allowedTypeNameSet != null) { @@ -472,8 +584,8 @@ public class VectorRandomRowSource { final String typeName; DataTypePhysicalVariation dataTypePhysicalVariation = DataTypePhysicalVariation.NONE; - if (explicitTypeNameList != null) { - typeName = explicitTypeNameList.get(c); + if (generationSpecList != null) { + typeName = generationSpecList.get(c).getTypeInfo().getTypeName(); dataTypePhysicalVariation = explicitDataTypePhysicalVariationList.get(c); } else if (onlyOne || allowedTypeNameSet != null) { typeName = getRandomTypeName(r, supportedTypes, allowedTypeNameSet); @@ -563,6 +675,154 @@ public class VectorRandomRowSource { rowStructObjectInspector = ObjectInspectorFactory. getStandardStructObjectInspector(columnNames, objectInspectorList); alphabets = new String[columnCount]; + + this.generationSpecList = generationSpecList; + } + + private static ThreadLocal<DateFormat> DATE_FORMAT = + new ThreadLocal<DateFormat>() { + @Override + protected DateFormat initialValue() { + return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + } + }; + + private static long MIN_FOUR_DIGIT_YEAR_MILLIS = parseToMillis("0001-01-01 00:00:00"); + private static long MAX_FOUR_DIGIT_YEAR_MILLIS = parseToMillis("9999-01-01 00:00:00"); + + private static long parseToMillis(String s) { + try { + return DATE_FORMAT.get().parse(s).getTime(); + } catch (ParseException ex) { + throw new RuntimeException(ex); + } + } + + private static String[] randomWords = + new String[] { + "groovy", + "attack", + "wacky", + "kiss", + "to", + "the", + "a", + "thoughtless", + "blushing", + "pay", + "rule", + "profuse", + "need", + "smell", + "bucket", + "board", + "eggs", + "laughable", + "idiotic", + "direful", + "thoughtful", + "curious", + "show", + "surge", + "opines", + "cowl", + "signal", + ""}; + private static int randomWordCount = randomWords.length; + + private static Object toStringFamilyObject(TypeInfo typeInfo, String string, boolean isWritable) { + + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + PrimitiveCategory primitiveCategory = + primitiveTypeInfo.getPrimitiveCategory(); + Object object; + switch (primitiveCategory) { + case STRING: + if (isWritable) { + object = new Text(string); + } else { + object = string; + } + break; + case CHAR: + { + HiveChar hiveChar = + new HiveChar( + string, ((CharTypeInfo) typeInfo).getLength()); + if (isWritable) { + object = new HiveCharWritable(hiveChar); + } else { + object = hiveChar; + } + } + break; + case VARCHAR: + { + HiveVarchar hiveVarchar = + new HiveVarchar( + string, ((VarcharTypeInfo) typeInfo).getLength()); + if (isWritable) { + object = new HiveVarcharWritable(hiveVarchar); + } else { + object = hiveVarchar; + } + } + break; + default: + throw new RuntimeException("Unexpected string family category " + primitiveCategory); + } + return object; + } + + public static Object randomStringFamilyOtherTypeValue(Random random, TypeInfo typeInfo, + TypeInfo specialValueTypeInfo, boolean isWritable) { + String string; + string = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) specialValueTypeInfo).toString(); + return toStringFamilyObject(typeInfo, string, isWritable); + } + + public static Object randomStringFamily(Random random, TypeInfo typeInfo, + StringGenerationOption stringGenerationOption, boolean isWritable) { + + String string; + if (stringGenerationOption == null) { + string = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) typeInfo).toString(); + } else { + boolean generateSentences = stringGenerationOption.getGenerateSentences(); + boolean addPadding = stringGenerationOption.getAddPadding(); + StringBuilder sb = new StringBuilder(); + if (addPadding && random.nextBoolean()) { + sb.append(StringUtils.leftPad("", random.nextInt(5))); + } + if (generateSentences) { + boolean capitalizeFirstWord = random.nextBoolean(); + final int n = random.nextInt(10); + for (int i = 0; i < n; i++) { + String randomWord = randomWords[random.nextInt(randomWordCount)]; + if (randomWord.length() > 0 && + ((i == 0 && capitalizeFirstWord) || random.nextInt(20) == 0)) { + randomWord = Character.toUpperCase(randomWord.charAt(0)) + randomWord.substring(1); + } + if (i > 0) { + sb.append(" "); + } + sb.append(randomWord); + } + } else { + sb.append( + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) typeInfo).toString()); + } + if (addPadding && random.nextBoolean()) { + sb.append(StringUtils.leftPad("", random.nextInt(5))); + } + string = sb.toString(); + } + return toStringFamilyObject(typeInfo, string, isWritable); } public Object[][] randomRows(int n) { @@ -577,8 +837,64 @@ public class VectorRandomRowSource { public Object[] randomRow() { final Object row[] = new Object[columnCount]; - for (int c = 0; c < columnCount; c++) { - row[c] = randomWritable(c); + + if (generationSpecList == null) { + for (int c = 0; c < columnCount; c++) { + row[c] = randomWritable(c); + } + } else { + for (int c = 0; c < columnCount; c++) { + GenerationSpec generationSpec = generationSpecList.get(c); + GenerationSpec.GenerationKind generationKind = generationSpec.getGenerationKind(); + Object object; + switch (generationKind) { + case SAME_TYPE: + object = randomWritable(c); + break; + case OMIT_GENERATION: + object = null; + break; + case STRING_FAMILY: + { + TypeInfo typeInfo = generationSpec.getTypeInfo(); + StringGenerationOption stringGenerationOption = + generationSpec.getStringGenerationOption(); + object = randomStringFamily( + r, typeInfo, stringGenerationOption, true); + } + break; + case STRING_FAMILY_OTHER_TYPE_VALUE: + { + TypeInfo typeInfo = generationSpec.getTypeInfo(); + TypeInfo otherTypeTypeInfo = generationSpec.getSourceTypeInfo(); + object = randomStringFamilyOtherTypeValue( + r, typeInfo, otherTypeTypeInfo, true); + } + break; + case TIMESTAMP_MILLISECONDS: + { + LongWritable longWritable = (LongWritable) randomWritable(c); + if (longWritable != null) { + + while (true) { + long longValue = longWritable.get(); + if (longValue >= MIN_FOUR_DIGIT_YEAR_MILLIS && + longValue <= MAX_FOUR_DIGIT_YEAR_MILLIS) { + break; + } + longWritable.set( + (Long) VectorRandomRowSource.randomPrimitiveObject( + r, (PrimitiveTypeInfo) TypeInfoFactory.longTypeInfo)); + } + } + object = longWritable; + } + break; + default: + throw new RuntimeException("Unexpected generationKind " + generationKind); + } + row[c] = object; + } } return row; } http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java index 30e8906..9e61fcd 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCastStatement.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -252,24 +253,6 @@ public class TestVectorCastStatement { final int decimal64Scale = (isDecimal64 ? ((DecimalTypeInfo) typeInfo).getScale() : 0); - List<String> explicitTypeNameList = new ArrayList<String>(); - List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>(); - explicitTypeNameList.add(typeName); - explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation); - - VectorRandomRowSource rowSource = new VectorRandomRowSource(); - - rowSource.initExplicitSchema( - random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true, - explicitDataTypePhysicalVariationList); - - List<String> columns = new ArrayList<String>(); - columns.add("col0"); - ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(typeInfo, "col0", "table", false); - - List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(); - children.add(col1Expr); - //---------------------------------------------------------------------------------------------- String targetTypeName; @@ -287,53 +270,38 @@ public class TestVectorCastStatement { //---------------------------------------------------------------------------------------------- - String[] columnNames = columns.toArray(new String[0]); - - Object[][] randomRows = rowSource.randomRows(100000); - + GenerationSpec generationSpec; if (needsValidDataTypeData(targetTypeInfo) && (primitiveCategory == PrimitiveCategory.STRING || primitiveCategory == PrimitiveCategory.CHAR || primitiveCategory == PrimitiveCategory.VARCHAR)) { - - // Regenerate string family with valid data for target data type. - final int rowCount = randomRows.length; - for (int i = 0; i < rowCount; i++) { - Object object = randomRows[i][0]; - if (object == null) { - continue; - } - String string = - VectorRandomRowSource.randomPrimitiveObject( - random, (PrimitiveTypeInfo) targetTypeInfo).toString(); - Object newObject; - switch (primitiveCategory) { - case STRING: - newObject = new Text(string); - break; - case CHAR: - { - HiveChar hiveChar = - new HiveChar( - string, ((CharTypeInfo) typeInfo).getLength()); - newObject = new HiveCharWritable(hiveChar); - } - break; - case VARCHAR: - { - HiveVarchar hiveVarchar = - new HiveVarchar( - string, ((VarcharTypeInfo) typeInfo).getLength()); - newObject = new HiveVarcharWritable(hiveVarchar); - } - break; - default: - throw new RuntimeException("Unexpected string family category " + primitiveCategory); - } - randomRows[i][0] = newObject; - } + generationSpec = GenerationSpec.createStringFamilyOtherTypeValue(typeInfo, targetTypeInfo); + } else { + generationSpec = GenerationSpec.createSameType(typeInfo); } + List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>(); + List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>(); + generationSpecList.add(generationSpec); + explicitDataTypePhysicalVariationList.add(dataTypePhysicalVariation); + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + explicitDataTypePhysicalVariationList); + + List<String> columns = new ArrayList<String>(); + columns.add("col0"); + ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(typeInfo, "col0", "table", false); + + List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(); + children.add(col1Expr); + + String[] columnNames = columns.toArray(new String[0]); + + Object[][] randomRows = rowSource.randomRows(100000); + VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches( random, http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java index 0f658c6..f5deca5 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateAddSub.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -156,52 +157,6 @@ public class TestVectorDateAddSub { } } - private static final String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - - private Object randomDateStringFamily( - Random random, TypeInfo dateTimeStringTypeInfo, boolean wantWritable) { - - String randomDateString = VectorRandomRowSource.randomPrimitiveDateStringObject(random); - if (random.nextInt(40) == 39) { - - // Randomly corrupt. - int index = random.nextInt(randomDateString.length()); - char[] chars = randomDateString.toCharArray(); - chars[index] = alphabet.charAt(random.nextInt(alphabet.length())); - randomDateString = String.valueOf(chars); - } - - PrimitiveCategory dateTimeStringPrimitiveCategory = - ((PrimitiveTypeInfo) dateTimeStringTypeInfo).getPrimitiveCategory(); - switch (dateTimeStringPrimitiveCategory) { - case STRING: - return randomDateString; - case CHAR: - { - HiveChar hiveChar = - new HiveChar(randomDateString, ((CharTypeInfo) dateTimeStringTypeInfo).getLength()); - if (wantWritable) { - return new HiveCharWritable(hiveChar); - } else { - return hiveChar; - } - } - case VARCHAR: - { - HiveVarchar hiveVarchar = - new HiveVarchar( - randomDateString, ((VarcharTypeInfo) dateTimeStringTypeInfo).getLength()); - if (wantWritable) { - return new HiveVarcharWritable(hiveVarchar); - } else { - return hiveVarchar; - } - } - default: - throw new RuntimeException("Unexpected string family category " + dateTimeStringPrimitiveCategory); - } - } - private void doDateAddSubTestsWithDiffColumnScalar(Random random, String dateTimeStringTypeName, String integerTypeName, ColumnScalarMode columnScalarMode, boolean isAdd) throws Exception { @@ -220,7 +175,7 @@ public class TestVectorDateAddSub { PrimitiveCategory integerPrimitiveCategory = ((PrimitiveTypeInfo) integerTypeInfo).getPrimitiveCategory(); - List<String> explicitTypeNameList = new ArrayList<String>(); + List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>(); List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>(); @@ -229,7 +184,14 @@ public class TestVectorDateAddSub { ExprNodeDesc col1Expr; if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { - explicitTypeNameList.add(dateTimeStringTypeName); + if (!isStringFamily) { + generationSpecList.add( + GenerationSpec.createSameType(dateTimeStringTypeInfo)); + } else { + generationSpecList.add( + GenerationSpec.createStringFamilyOtherTypeValue( + dateTimeStringTypeInfo, TypeInfoFactory.dateTypeInfo)); + } explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); String columnName = "col" + (columnNum++); @@ -243,15 +205,16 @@ public class TestVectorDateAddSub { random, (PrimitiveTypeInfo) dateTimeStringTypeInfo); } else { scalar1Object = - randomDateStringFamily( - random, dateTimeStringTypeInfo, /* wantWritable */ false); + VectorRandomRowSource.randomStringFamilyOtherTypeValue( + random, dateTimeStringTypeInfo, TypeInfoFactory.dateTypeInfo, false); } col1Expr = new ExprNodeConstantDesc(dateTimeStringTypeInfo, scalar1Object); } ExprNodeDesc col2Expr; if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) { - explicitTypeNameList.add(integerTypeName); + generationSpecList.add( + GenerationSpec.createSameType(integerTypeInfo)); explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); String columnName = "col" + (columnNum++); @@ -277,27 +240,12 @@ public class TestVectorDateAddSub { VectorRandomRowSource rowSource = new VectorRandomRowSource(); - rowSource.initExplicitSchema( - random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true, + rowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, explicitDataTypePhysicalVariationList); Object[][] randomRows = rowSource.randomRows(100000); - if (isStringFamily) { - if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || - columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { - for (int i = 0; i < randomRows.length; i++) { - Object[] row = randomRows[i]; - Object object = row[columnNum - 1]; - if (row[0] != null) { - row[0] = - randomDateStringFamily( - random, dateTimeStringTypeInfo, /* wantWritable */ true); - } - } - } - } - if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) { http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java index 80a1118..dce7ccf 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateDiff.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -70,8 +71,6 @@ import org.junit.Test; public class TestVectorDateDiff { - private static final boolean corruptDateStrings = false; - @Test public void testDateDate() throws Exception { Random random = new Random(7743); @@ -152,52 +151,6 @@ public class TestVectorDateDiff { } } - private static final String alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - - private Object randomDateStringFamily( - Random random, TypeInfo dateTimeStringTypeInfo, boolean wantWritable) { - - String randomDateString = VectorRandomRowSource.randomPrimitiveDateStringObject(random); - if (corruptDateStrings && random.nextInt(40) == 39) { - - // Randomly corrupt. - int index = random.nextInt(randomDateString.length()); - char[] chars = randomDateString.toCharArray(); - chars[index] = alphabet.charAt(random.nextInt(alphabet.length())); - randomDateString = String.valueOf(chars); - } - - PrimitiveCategory dateTimeStringPrimitiveCategory = - ((PrimitiveTypeInfo) dateTimeStringTypeInfo).getPrimitiveCategory(); - switch (dateTimeStringPrimitiveCategory) { - case STRING: - return randomDateString; - case CHAR: - { - HiveChar hiveChar = - new HiveChar(randomDateString, ((CharTypeInfo) dateTimeStringTypeInfo).getLength()); - if (wantWritable) { - return new HiveCharWritable(hiveChar); - } else { - return hiveChar; - } - } - case VARCHAR: - { - HiveVarchar hiveVarchar = - new HiveVarchar( - randomDateString, ((VarcharTypeInfo) dateTimeStringTypeInfo).getLength()); - if (wantWritable) { - return new HiveVarcharWritable(hiveVarchar); - } else { - return hiveVarchar; - } - } - default: - throw new RuntimeException("Unexpected string family category " + dateTimeStringPrimitiveCategory); - } - } - private void doDateDiffTestsWithDiffColumnScalar(Random random, String dateTimeStringTypeName1, String dateTimeStringTypeName2, ColumnScalarMode columnScalarMode) throws Exception { @@ -220,7 +173,7 @@ public class TestVectorDateDiff { dateTimeStringPrimitiveCategory2 == PrimitiveCategory.CHAR || dateTimeStringPrimitiveCategory2 == PrimitiveCategory.VARCHAR); - List<String> explicitTypeNameList = new ArrayList<String>(); + List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>(); List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = new ArrayList<DataTypePhysicalVariation>(); @@ -229,7 +182,14 @@ public class TestVectorDateDiff { ExprNodeDesc col1Expr; if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { - explicitTypeNameList.add(dateTimeStringTypeName1); + if (!isStringFamily1) { + generationSpecList.add( + GenerationSpec.createSameType(dateTimeStringTypeInfo1)); + } else { + generationSpecList.add( + GenerationSpec.createStringFamilyOtherTypeValue( + dateTimeStringTypeInfo1, TypeInfoFactory.dateTypeInfo)); + } explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); String columnName = "col" + (columnNum++); @@ -243,15 +203,23 @@ public class TestVectorDateDiff { random, (PrimitiveTypeInfo) dateTimeStringTypeInfo1); } else { scalar1Object = - randomDateStringFamily( - random, dateTimeStringTypeInfo1, /* wantWritable */ false); + VectorRandomRowSource.randomStringFamilyOtherTypeValue( + random, dateTimeStringTypeInfo1, TypeInfoFactory.dateTypeInfo, false); } col1Expr = new ExprNodeConstantDesc(dateTimeStringTypeInfo1, scalar1Object); } ExprNodeDesc col2Expr; if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) { - explicitTypeNameList.add(dateTimeStringTypeName2); + if (!isStringFamily2) { + generationSpecList.add( + GenerationSpec.createSameType(dateTimeStringTypeInfo2)); + } else { + generationSpecList.add( + GenerationSpec.createStringFamilyOtherTypeValue( + dateTimeStringTypeInfo2, TypeInfoFactory.dateTypeInfo)); + } + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); String columnName = "col" + (columnNum++); @@ -265,8 +233,8 @@ public class TestVectorDateDiff { random, (PrimitiveTypeInfo) dateTimeStringTypeInfo2); } else { scalar2Object = - randomDateStringFamily( - random, dateTimeStringTypeInfo2, /* wantWritable */ false); + VectorRandomRowSource.randomStringFamilyOtherTypeValue( + random, dateTimeStringTypeInfo2, TypeInfoFactory.dateTypeInfo, false); } col2Expr = new ExprNodeConstantDesc(dateTimeStringTypeInfo2, scalar2Object); } @@ -281,42 +249,12 @@ public class TestVectorDateDiff { VectorRandomRowSource rowSource = new VectorRandomRowSource(); - rowSource.initExplicitSchema( - random, explicitTypeNameList, /* maxComplexDepth */ 0, /* allowNull */ true, + rowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, explicitDataTypePhysicalVariationList); Object[][] randomRows = rowSource.randomRows(100000); - if (isStringFamily1) { - if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || - columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { - for (int i = 0; i < randomRows.length; i++) { - Object[] row = randomRows[i]; - Object object = row[columnNum - 1]; - if (row[0] != null) { - row[0] = - randomDateStringFamily( - random, dateTimeStringTypeInfo1, /* wantWritable */ true); - } - } - } - } - - if (isStringFamily2) { - if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || - columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) { - for (int i = 0; i < randomRows.length; i++) { - Object[] row = randomRows[i]; - Object object = row[columnNum - 1]; - if (row[columnNum - 1] != null) { - row[columnNum - 1] = - randomDateStringFamily( - random, dateTimeStringTypeInfo2, /* wantWritable */ true); - } - } - } - } - VectorRandomBatchSource batchSource = VectorRandomBatchSource.createInterestingBatches( random, http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java new file mode 100644 index 0000000..a87a8b4 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringConcat.java @@ -0,0 +1,427 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateAdd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateSub; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; + +import junit.framework.Assert; + +import org.junit.Test; + +public class TestVectorStringConcat { + + @Test + public void testString() throws Exception { + Random random = new Random(12882); + + doStringConcatTests(random, "string", "string"); + } + + @Test + public void testChar() throws Exception { + Random random = new Random(12882); + + doStringConcatTests(random, "char(20)", "char(10)"); + doStringConcatTests(random, "char(20)", "string"); + doStringConcatTests(random, "char(20)", "varchar(10)"); + doStringConcatTests(random, "string", "char(10)"); + } + + @Test + public void testVarchar() throws Exception { + Random random = new Random(12882); + + doStringConcatTests(random, "varchar(20)", "varchar(10)"); + doStringConcatTests(random, "varchar(20)", "string"); + doStringConcatTests(random, "varchar(20)", "char(10)"); + doStringConcatTests(random, "string", "varchar(10)"); + } + + public enum StringConcatTestMode { + ROW_MODE, + ADAPTOR, + VECTOR_EXPRESSION; + + static final int count = values().length; + } + + public enum ColumnScalarMode { + COLUMN_COLUMN, + COLUMN_SCALAR, + SCALAR_COLUMN; + + static final int count = values().length; + } + + private void doStringConcatTests(Random random, String stringTypeName1, String stringTypeName2) + throws Exception { + for (ColumnScalarMode columnScalarMode : ColumnScalarMode.values()) { + doStringConcatTestsWithDiffColumnScalar( + random, stringTypeName1, stringTypeName2, columnScalarMode); + } + } + + private void doStringConcatTestsWithDiffColumnScalar(Random random, + String stringTypeName1, String stringTypeName2, ColumnScalarMode columnScalarMode) + throws Exception { + + TypeInfo stringTypeInfo1 = + TypeInfoUtils.getTypeInfoFromTypeString(stringTypeName1); + PrimitiveCategory stringPrimitiveCategory1 = + ((PrimitiveTypeInfo) stringTypeInfo1).getPrimitiveCategory(); + + TypeInfo stringTypeInfo2 = + TypeInfoUtils.getTypeInfoFromTypeString(stringTypeName2); + PrimitiveCategory stringPrimitiveCategory2 = + ((PrimitiveTypeInfo) stringTypeInfo2).getPrimitiveCategory(); + + String functionName = "concat"; + + List<GenerationSpec> generationSpecList = new ArrayList<GenerationSpec>(); + List<DataTypePhysicalVariation> explicitDataTypePhysicalVariationList = + new ArrayList<DataTypePhysicalVariation>(); + + List<String> columns = new ArrayList<String>(); + int columnNum = 0; + ExprNodeDesc col1Expr; + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || + columnScalarMode == ColumnScalarMode.COLUMN_SCALAR) { + generationSpecList.add( + GenerationSpec.createSameType(stringTypeInfo1)); + + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + + String columnName = "col" + (columnNum++); + col1Expr = new ExprNodeColumnDesc(stringTypeInfo1, columnName, "table", false); + columns.add(columnName); + } else { + Object scalar1Object = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) stringTypeInfo1); + col1Expr = new ExprNodeConstantDesc(stringTypeInfo1, scalar1Object); + } + ExprNodeDesc col2Expr; + if (columnScalarMode == ColumnScalarMode.COLUMN_COLUMN || + columnScalarMode == ColumnScalarMode.SCALAR_COLUMN) { + generationSpecList.add( + GenerationSpec.createSameType(stringTypeInfo2)); + explicitDataTypePhysicalVariationList.add(DataTypePhysicalVariation.NONE); + + String columnName = "col" + (columnNum++); + col2Expr = new ExprNodeColumnDesc(stringTypeInfo2, columnName, "table", false); + columns.add(columnName); + } else { + Object scalar2Object = + VectorRandomRowSource.randomPrimitiveObject( + random, (PrimitiveTypeInfo) stringTypeInfo2); + col2Expr = new ExprNodeConstantDesc(stringTypeInfo2, scalar2Object); + } + + List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(); + children.add(col1Expr); + children.add(col2Expr); + + //---------------------------------------------------------------------------------------------- + + String[] columnNames = columns.toArray(new String[0]); + + VectorRandomRowSource rowSource = new VectorRandomRowSource(); + + rowSource.initGenerationSpecSchema( + random, generationSpecList, /* maxComplexDepth */ 0, /* allowNull */ true, + explicitDataTypePhysicalVariationList); + + Object[][] randomRows = rowSource.randomRows(100000); + + VectorRandomBatchSource batchSource = + VectorRandomBatchSource.createInterestingBatches( + random, + rowSource, + randomRows, + null); + + String[] outputScratchTypeNames = new String[] { "string" }; + + VectorizedRowBatchCtx batchContext = + new VectorizedRowBatchCtx( + columnNames, + rowSource.typeInfos(), + rowSource.dataTypePhysicalVariations(), + /* dataColumnNums */ null, + /* partitionColumnCount */ 0, + /* virtualColumnCount */ 0, + /* neededVirtualColumns */ null, + outputScratchTypeNames, + null); + + GenericUDF genericUdf; + FunctionInfo funcInfo = null; + try { + funcInfo = FunctionRegistry.getFunctionInfo(functionName); + } catch (SemanticException e) { + Assert.fail("Failed to load " + functionName + " " + e); + } + genericUdf = funcInfo.getGenericUDF(); + + final int rowCount = randomRows.length; + Object[][] resultObjectsArray = new Object[StringConcatTestMode.count][]; + for (int i = 0; i < StringConcatTestMode.count; i++) { + + Object[] resultObjects = new Object[rowCount]; + resultObjectsArray[i] = resultObjects; + + StringConcatTestMode stringConcatTestMode = StringConcatTestMode.values()[i]; + switch (stringConcatTestMode) { + case ROW_MODE: + doRowStringConcatTest( + stringTypeInfo1, + stringTypeInfo2, + columns, + children, + randomRows, + columnScalarMode, + rowSource.rowStructObjectInspector(), + genericUdf, + resultObjects); + break; + case ADAPTOR: + case VECTOR_EXPRESSION: + doVectorStringConcatTest( + stringTypeInfo1, + stringTypeInfo2, + columns, + rowSource.typeInfos(), + children, + stringConcatTestMode, + columnScalarMode, + batchSource, + batchContext, + rowSource.rowStructObjectInspector(), + genericUdf, + resultObjects); + break; + default: + throw new RuntimeException("Unexpected IF statement test mode " + stringConcatTestMode); + } + } + + for (int i = 0; i < rowCount; i++) { + // Row-mode is the expected value. + Object expectedResult = resultObjectsArray[0][i]; + + for (int v = 1; v < StringConcatTestMode.count; v++) { + Object vectorResult = resultObjectsArray[v][i]; + if (expectedResult == null || vectorResult == null) { + if (expectedResult != null || vectorResult != null) { + Assert.fail( + "Row " + i + " " + StringConcatTestMode.values()[v] + + " " + columnScalarMode + + " result is NULL " + (vectorResult == null) + + " does not match row-mode expected result is NULL " + (expectedResult == null) + + " row values " + Arrays.toString(randomRows[i])); + } + } else { + + if (!expectedResult.equals(vectorResult)) { + Assert.fail( + "Row " + i + " " + StringConcatTestMode.values()[v] + + " " + columnScalarMode + + " result \"" + vectorResult.toString() + "\"" + + " (" + vectorResult.getClass().getSimpleName() + ")" + + " does not match row-mode expected result \"" + expectedResult.toString() + "\"" + + " (" + expectedResult.getClass().getSimpleName() + ")" + + " row values " + Arrays.toString(randomRows[i])); + } + } + } + } + } + + private void doRowStringConcatTest(TypeInfo stringTypeInfo, TypeInfo integerTypeInfo, + List<String> columns, List<ExprNodeDesc> children, + Object[][] randomRows, ColumnScalarMode columnScalarMode, + ObjectInspector rowInspector, + GenericUDF genericUdf, Object[] resultObjects) throws Exception { + + System.out.println( + "*DEBUG* stringTypeInfo " + stringTypeInfo.toString() + + " integerTypeInfo " + integerTypeInfo + + " stringConcatTestMode ROW_MODE" + + " columnScalarMode " + columnScalarMode + + " genericUdf " + genericUdf.toString()); + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, genericUdf, children); + + HiveConf hiveConf = new HiveConf(); + ExprNodeEvaluator evaluator = + ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); + evaluator.initialize(rowInspector); + + ObjectInspector objectInspector = evaluator.getOutputOI(); + + final int rowCount = randomRows.length; + for (int i = 0; i < rowCount; i++) { + Object[] row = randomRows[i]; + Object result = evaluator.evaluate(row); + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + result, objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[i] = copyResult; + } + } + + private void extractResultObjects(VectorizedRowBatch batch, int rowIndex, + VectorExtractRow resultVectorExtractRow, Object[] scrqtchRow, + ObjectInspector objectInspector, Object[] resultObjects) { + + boolean selectedInUse = batch.selectedInUse; + int[] selected = batch.selected; + for (int logicalIndex = 0; logicalIndex < batch.size; logicalIndex++) { + final int batchIndex = (selectedInUse ? selected[logicalIndex] : logicalIndex); + resultVectorExtractRow.extractRow(batch, batchIndex, scrqtchRow); + + Object copyResult = + ObjectInspectorUtils.copyToStandardObject( + scrqtchRow[0], objectInspector, ObjectInspectorCopyOption.WRITABLE); + resultObjects[rowIndex++] = copyResult; + } + } + + private void doVectorStringConcatTest(TypeInfo stringTypeInfo1, TypeInfo stringTypeInfo2, + List<String> columns, + TypeInfo[] typeInfos, + List<ExprNodeDesc> children, + StringConcatTestMode stringConcatTestMode, ColumnScalarMode columnScalarMode, + VectorRandomBatchSource batchSource, VectorizedRowBatchCtx batchContext, + ObjectInspector rowInspector, + GenericUDF genericUdf, Object[] resultObjects) + throws Exception { + + HiveConf hiveConf = new HiveConf(); + if (stringConcatTestMode == StringConcatTestMode.ADAPTOR) { + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_TEST_VECTOR_ADAPTOR_OVERRIDE, true); + } + + DataTypePhysicalVariation[] dataTypePhysicalVariations = new DataTypePhysicalVariation[2]; + Arrays.fill(dataTypePhysicalVariations, DataTypePhysicalVariation.NONE); + + ExprNodeGenericFuncDesc exprDesc = + new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, genericUdf, children); + + //--------------------------------------- + // Just so we can get the output type... + + ExprNodeEvaluator evaluator = + ExprNodeEvaluatorFactory.get(exprDesc, hiveConf); + evaluator.initialize(rowInspector); + + ObjectInspector objectInspector = evaluator.getOutputOI(); + TypeInfo outputTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(objectInspector); + + /* + * Again with correct output type... + */ + exprDesc = + new ExprNodeGenericFuncDesc(outputTypeInfo, genericUdf, children); + //--------------------------------------- + + VectorizationContext vectorizationContext = + new VectorizationContext( + "name", + columns, + Arrays.asList(typeInfos), + Arrays.asList(dataTypePhysicalVariations), + hiveConf); + VectorExpression vectorExpression = vectorizationContext.getVectorExpression(exprDesc); + vectorExpression.transientInit(); + + VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); + + VectorExtractRow resultVectorExtractRow = new VectorExtractRow(); + resultVectorExtractRow.init( + new TypeInfo[] { outputTypeInfo }, new int[] { columns.size() }); + Object[] scrqtchRow = new Object[1]; + + System.out.println( + "*DEBUG* stringTypeInfo1 " + stringTypeInfo1.toString() + + " stringTypeInfo2 " + stringTypeInfo2.toString() + + " stringConcatTestMode " + stringConcatTestMode + + " columnScalarMode " + columnScalarMode + + " vectorExpression " + vectorExpression.toString()); + + batchSource.resetBatchIteration(); + int rowIndex = 0; + while (true) { + if (!batchSource.fillNextBatch(batch)) { + break; + } + vectorExpression.evaluate(batch); + extractResultObjects(batch, rowIndex, resultVectorExtractRow, scrqtchRow, + objectInspector, resultObjects); + rowIndex += batch.size; + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/3f6a160e/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java index 65daeaa..902f29e 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java @@ -4541,8 +4541,8 @@ public class TestVectorStringExpressions { // has nulls, not repeating VectorizedRowBatch batch = makeStringBatch(); - StringGroupColConcatCharScalar expr = - new StringGroupColConcatCharScalar( + StringGroupColConcatStringScalar expr = + new StringGroupColConcatStringScalar( 0, new HiveChar(new String(red), 6).getStrippedValue().getBytes(), 1); expr.evaluate(batch); BytesColumnVector outCol = (BytesColumnVector) batch.cols[1]; @@ -4605,7 +4605,9 @@ public class TestVectorStringExpressions { // has nulls, not repeating VectorizedRowBatch batch = makeStringBatch(); - StringGroupColConcatVarCharScalar expr = new StringGroupColConcatVarCharScalar(0, new HiveVarchar(new String(red), 14), 1); + StringGroupColConcatStringScalar expr = + new StringGroupColConcatStringScalar( + 0, new HiveVarchar(new String(red), 14).getValue().getBytes(), 1); expr.evaluate(batch); BytesColumnVector outCol = (BytesColumnVector) batch.cols[1]; @@ -4729,8 +4731,8 @@ public class TestVectorStringExpressions { // has nulls, not repeating VectorizedRowBatch batch = makeStringBatch(); - CharScalarConcatStringGroupCol expr = - new CharScalarConcatStringGroupCol( + StringScalarConcatStringGroupCol expr = + new StringScalarConcatStringGroupCol( new HiveChar(new String(red), 6).getStrippedValue().getBytes(), 0, 1); expr.evaluate(batch); BytesColumnVector outCol = (BytesColumnVector) batch.cols[1]; @@ -4793,7 +4795,9 @@ public class TestVectorStringExpressions { // has nulls, not repeating VectorizedRowBatch batch = makeStringBatch(); - VarCharScalarConcatStringGroupCol expr = new VarCharScalarConcatStringGroupCol(new HiveVarchar(new String(red), 14), 0, 1); + StringScalarConcatStringGroupCol expr = + new StringScalarConcatStringGroupCol( + new HiveVarchar(new String(red), 14).getValue().getBytes(), 0, 1); expr.evaluate(batch); BytesColumnVector outCol = (BytesColumnVector) batch.cols[1];