Author: hashutosh Date: Fri Dec 20 03:31:26 2013 New Revision: 1552487 URL: http://svn.apache.org/r1552487 Log: HIVE-6034 : vectorized % doesn't handle zeroes the same way as non-vectorized (Sergey Shelukhin via Eric Hanson)
Modified: hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java hive/trunk/ql/src/test/queries/clientpositive/vectorization_12.q hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q hive/trunk/ql/src/test/results/clientpositive/vectorization_12.q.out hive/trunk/ql/src/test/results/clientpositive/vectorization_14.q.out Modified: hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java URL: http://svn.apache.org/viewvc/hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java?rev=1552487&r1=1552486&r2=1552487&view=diff ============================================================================== --- hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java (original) +++ hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java Fri Dec 20 03:31:26 2013 @@ -38,62 +38,51 @@ public class GenVectorCode extends Task {"ColumnArithmeticScalar", "Add", "long", "long", "+"}, {"ColumnArithmeticScalar", "Subtract", "long", "long", "-"}, {"ColumnArithmeticScalar", "Multiply", "long", "long", "*"}, - {"ColumnArithmeticScalar", "Modulo", "long", "long", "%"}, {"ColumnArithmeticScalar", "Add", "long", "double", "+"}, {"ColumnArithmeticScalar", "Subtract", "long", "double", "-"}, {"ColumnArithmeticScalar", "Multiply", "long", "double", "*"}, - {"ColumnArithmeticScalar", "Modulo", "long", "double", "%"}, {"ColumnArithmeticScalar", "Add", "double", "long", "+"}, {"ColumnArithmeticScalar", "Subtract", "double", "long", "-"}, {"ColumnArithmeticScalar", "Multiply", "double", "long", "*"}, - {"ColumnArithmeticScalar", "Modulo", "double", "long", "%"}, {"ColumnArithmeticScalar", "Add", "double", "double", "+"}, {"ColumnArithmeticScalar", "Subtract", "double", "double", "-"}, {"ColumnArithmeticScalar", "Multiply", "double", "double", "*"}, - {"ColumnArithmeticScalar", "Modulo", "double", "double", "%"}, {"ScalarArithmeticColumn", "Add", "long", "long", "+"}, {"ScalarArithmeticColumn", "Subtract", "long", "long", "-"}, {"ScalarArithmeticColumn", "Multiply", "long", "long", "*"}, - {"ScalarArithmeticColumn", "Modulo", "long", "long", "%"}, {"ScalarArithmeticColumn", "Add", "long", "double", "+"}, {"ScalarArithmeticColumn", "Subtract", "long", "double", "-"}, {"ScalarArithmeticColumn", "Multiply", "long", "double", "*"}, - {"ScalarArithmeticColumn", "Modulo", "long", "double", "%"}, {"ScalarArithmeticColumn", "Add", "double", "long", "+"}, {"ScalarArithmeticColumn", "Subtract", "double", "long", "-"}, {"ScalarArithmeticColumn", "Multiply", "double", "long", "*"}, - {"ScalarArithmeticColumn", "Modulo", "double", "long", "%"}, {"ScalarArithmeticColumn", "Add", "double", "double", "+"}, {"ScalarArithmeticColumn", "Subtract", "double", "double", "-"}, {"ScalarArithmeticColumn", "Multiply", "double", "double", "*"}, - {"ScalarArithmeticColumn", "Modulo", "double", "double", "%"}, {"ColumnArithmeticColumn", "Add", "long", "long", "+"}, {"ColumnArithmeticColumn", "Subtract", "long", "long", "-"}, {"ColumnArithmeticColumn", "Multiply", "long", "long", "*"}, - {"ColumnArithmeticColumn", "Modulo", "long", "long", "%"}, {"ColumnArithmeticColumn", "Add", "long", "double", "+"}, {"ColumnArithmeticColumn", "Subtract", "long", "double", "-"}, {"ColumnArithmeticColumn", "Multiply", "long", "double", "*"}, - {"ColumnArithmeticColumn", "Modulo", "long", "double", "%"}, {"ColumnArithmeticColumn", "Add", "double", "long", "+"}, {"ColumnArithmeticColumn", "Subtract", "double", "long", "-"}, {"ColumnArithmeticColumn", "Multiply", "double", "long", "*"}, - {"ColumnArithmeticColumn", "Modulo", "double", "long", "%"}, {"ColumnArithmeticColumn", "Add", "double", "double", "+"}, {"ColumnArithmeticColumn", "Subtract", "double", "double", "-"}, {"ColumnArithmeticColumn", "Multiply", "double", "double", "*"}, - {"ColumnArithmeticColumn", "Modulo", "double", "double", "%"}, + {"ColumnDivideScalar", "Divide", "long", "double", "/"}, {"ColumnDivideScalar", "Divide", "double", "long", "/"}, @@ -105,6 +94,19 @@ public class GenVectorCode extends Task {"ColumnDivideColumn", "Divide", "double", "long", "/"}, {"ColumnDivideColumn", "Divide", "double", "double", "/"}, + {"ColumnDivideScalar", "Modulo", "long", "long", "%"}, + {"ColumnDivideScalar", "Modulo", "long", "double", "%"}, + {"ColumnDivideScalar", "Modulo", "double", "long", "%"}, + {"ColumnDivideScalar", "Modulo", "double", "double", "%"}, + {"ScalarDivideColumn", "Modulo", "long", "long", "%"}, + {"ScalarDivideColumn", "Modulo", "long", "double", "%"}, + {"ScalarDivideColumn", "Modulo", "double", "long", "%"}, + {"ScalarDivideColumn", "Modulo", "double", "double", "%"}, + {"ColumnDivideColumn", "Modulo", "long", "long", "%"}, + {"ColumnDivideColumn", "Modulo", "long", "double", "%"}, + {"ColumnDivideColumn", "Modulo", "double", "long", "%"}, + {"ColumnDivideColumn", "Modulo", "double", "double", "%"}, + {"ColumnCompareScalar", "Equal", "long", "double", "=="}, {"ColumnCompareScalar", "Equal", "double", "double", "=="}, {"ColumnCompareScalar", "NotEqual", "long", "double", "!="}, @@ -1082,6 +1084,7 @@ public class GenVectorCode extends Task templateString = templateString.replaceAll("<OperandType1>", operandType1); templateString = templateString.replaceAll("<OperandType2>", operandType2); templateString = templateString.replaceAll("<ReturnType>", returnType); + templateString = templateString.replaceAll("<CamelReturnType>", getCamelCaseType(returnType)); writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, className, templateString); Modified: hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt?rev=1552487&r1=1552486&r2=1552487&view=diff ============================================================================== --- hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt (original) +++ hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ColumnDivideColumn.txt Fri Dec 20 03:31:26 2013 @@ -83,20 +83,20 @@ public class <ClassName> extends VectorE boolean hasDivBy0 = false; if (inputColVector1.isRepeating && inputColVector2.isRepeating) { <OperandType2> denom = vector2[0]; - outputVector[0] = vector1[0] <OperatorSymbol> (double) denom; + outputVector[0] = vector1[0] <OperatorSymbol> denom; hasDivBy0 = hasDivBy0 || (denom == 0); } else if (inputColVector1.isRepeating) { if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; <OperandType2> denom = vector2[i]; - outputVector[i] = vector1[0] <OperatorSymbol> (double) denom; + outputVector[i] = vector1[0] <OperatorSymbol> denom; hasDivBy0 = hasDivBy0 || (denom == 0); } } else { for(int i = 0; i != n; i++) { <OperandType2> denom = vector2[i]; - outputVector[i] = vector1[0] <OperatorSymbol> (double) denom; + outputVector[i] = vector1[0] <OperatorSymbol> denom; hasDivBy0 = hasDivBy0 || (denom == 0); } } @@ -142,8 +142,7 @@ public class <ClassName> extends VectorE if (!hasDivBy0) { NullUtil.setNullDataEntries<CamelReturnType>(outputColVector, batch.selectedInUse, sel, n); } else { - // Currently, the output from division is always double. - NullUtil.setNullAndDivBy0DataEntriesDouble( + NullUtil.setNullAndDivBy0DataEntries<CamelReturnType>( outputColVector, batch.selectedInUse, sel, n, inputColVector2); } } Modified: hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt?rev=1552487&r1=1552486&r2=1552487&view=diff ============================================================================== --- hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt (original) +++ hive/trunk/ql/src/gen/vectorization/ExpressionTemplates/ScalarDivideColumn.txt Fri Dec 20 03:31:26 2013 @@ -126,8 +126,7 @@ public class <ClassName> extends VectorE if (!hasDivBy0) { NullUtil.setNullOutputEntriesColScalar(outputColVector, batch.selectedInUse, sel, n); } else { - // Currently, the output from division is always double. - NullUtil.setNullAndDivBy0DataEntriesDouble( + NullUtil.setNullAndDivBy0DataEntries<CamelReturnType>( outputColVector, batch.selectedInUse, sel, n, inputColVector); } } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java?rev=1552487&r1=1552486&r2=1552487&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NullUtil.java Fri Dec 20 03:31:26 2013 @@ -27,7 +27,7 @@ import org.apache.hadoop.hive.ql.exec.ve * Utility functions to handle null propagation. */ public class NullUtil { - /* + /** * Set the data value for all NULL entries to the designated NULL_VALUE. */ public static void setNullDataEntriesLong( @@ -51,20 +51,19 @@ public class NullUtil { } } } - + // for use by Column-Scalar and Scalar-Column arithmetic for null propagation public static void setNullOutputEntriesColScalar( ColumnVector v, boolean selectedInUse, int[] sel, int n) { if (v instanceof DoubleColumnVector) { - // No need to set null data entries because the input NaN values // will automatically propagate to the output. return; } setNullDataEntriesLong((LongColumnVector) v, selectedInUse, sel, n); } - - /* + + /** * Set the data value for all NULL entries to NaN */ public static void setNullDataEntriesDouble( @@ -89,10 +88,9 @@ public class NullUtil { } } - /* - * Set the data value for all NULL entries, as well as those coming from division by zero, - * to NaN. Assumes there are entries coming from division by zero. - * We assume that infinities do not appear legally in the result, so we can replace all of them. + /** + * Set all the entries for which denoms array contains zeroes to NULL; sets all the data + * values for NULL entries for DoubleColumnVector.NULL_VALUE. */ public static void setNullAndDivBy0DataEntriesDouble( DoubleColumnVector v, boolean selectedInUse, int[] sel, int n, LongColumnVector denoms) { @@ -117,10 +115,9 @@ public class NullUtil { } } - /* - * Set the data value for all NULL entries, as well as those coming from division by zero, - * to NaN. Assumes there are entries coming from division by zero. - * We assume that infinities do not appear legally in the result, so we can replace all of them. + /** + * Set all the entries for which denoms array contains zeroes to NULL; sets all the data + * values for NULL entries for DoubleColumnVector.NULL_VALUE. */ public static void setNullAndDivBy0DataEntriesDouble( DoubleColumnVector v, boolean selectedInUse, int[] sel, int n, DoubleColumnVector denoms) { @@ -145,6 +142,60 @@ public class NullUtil { } } + /** + * Set all the entries for which denoms array contains zeroes to NULL; sets all the data + * values for NULL entries for LongColumnVector.NULL_VALUE. + */ + public static void setNullAndDivBy0DataEntriesLong( + LongColumnVector v, boolean selectedInUse, int[] sel, int n, LongColumnVector denoms) { + assert v.isRepeating || !denoms.isRepeating; + v.noNulls = false; + long[] vector = denoms.vector; + if (v.isRepeating && (v.isNull[0] = (v.isNull[0] || vector[0] == 0))) { + v.vector[0] = LongColumnVector.NULL_VALUE; + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (v.isNull[i] = (v.isNull[i] || vector[i] == 0)) { + v.vector[i] = LongColumnVector.NULL_VALUE; + } + } + } else { + for (int i = 0; i != n; i++) { + if (v.isNull[i] = (v.isNull[i] || vector[i] == 0)) { + v.vector[i] = LongColumnVector.NULL_VALUE; + } + } + } + } + + /** + * Set all the entries for which denoms array contains zeroes to NULL; sets all the data + * values for NULL entries for LongColumnVector.NULL_VALUE. + */ + public static void setNullAndDivBy0DataEntriesLong( + LongColumnVector v, boolean selectedInUse, int[] sel, int n, DoubleColumnVector denoms) { + assert v.isRepeating || !denoms.isRepeating; + v.noNulls = false; + double[] vector = denoms.vector; + if (v.isRepeating && (v.isNull[0] = (v.isNull[0] || vector[0] == 0))) { + v.vector[0] = LongColumnVector.NULL_VALUE; + } else if (selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (v.isNull[i] = (v.isNull[i] || vector[i] == 0)) { + v.vector[i] = LongColumnVector.NULL_VALUE; + } + } + } else { + for (int i = 0; i != n; i++) { + if (v.isNull[i] = (v.isNull[i] || vector[i] == 0)) { + v.vector[i] = LongColumnVector.NULL_VALUE; + } + } + } + } + /* * Propagate null values for a two-input operator. */ Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_12.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_12.q?rev=1552487&r1=1552486&r2=1552487&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/vectorization_12.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_12.q Fri Dec 20 03:31:26 2013 @@ -28,5 +28,5 @@ WHERE (((ctimestamp1 IS NULL) OR ((cboolean2 <= 1) AND (cbigint >= csmallint)))) GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble -ORDER BY ctimestamp1, cdouble; +ORDER BY ctimestamp1, cdouble, cbigint, cstring1; Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q?rev=1552487&r1=1552486&r2=1552487&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_14.q Fri Dec 20 03:31:26 2013 @@ -29,5 +29,5 @@ WHERE (((ctinyint <= cbigint) AND ((cbigint > -257) OR (cfloat < cint)))) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble -ORDER BY cstring1, cfloat, cdouble; +ORDER BY cstring1, cfloat, cdouble, ctimestamp1; Modified: hive/trunk/ql/src/test/results/clientpositive/vectorization_12.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorization_12.q.out?rev=1552487&r1=1552486&r2=1552487&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/vectorization_12.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/vectorization_12.q.out Fri Dec 20 03:31:26 2013 @@ -27,7 +27,7 @@ WHERE (((ctimestamp1 IS NULL) OR ((cboolean2 <= 1) AND (cbigint >= csmallint)))) GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble -ORDER BY ctimestamp1, cdouble +ORDER BY ctimestamp1, cdouble, cbigint, cstring1 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -60,7 +60,7 @@ WHERE (((ctimestamp1 IS NULL) OR ((cboolean2 <= 1) AND (cbigint >= csmallint)))) GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble -ORDER BY ctimestamp1, cdouble +ORDER BY ctimestamp1, cdouble, cbigint, cstring1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -247,8 +247,8 @@ POSTHOOK: Input: default@alltypesorc 1864027286 true 0W67K0mT27r22f817281Ocq NULL -5818.0 3.7421376E7 -1864027286 1 1864027286 0.0 -5818.0 5818.0 -5818.0 -3.7421376E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5818.0 -5818.0 -3.7427194E7 0.0 1864027286 true FpsIohh60Bho67Fb7f NULL -5732.0 3.6868224E7 -1864027286 1 1864027286 0.0 -5732.0 5732.0 -5732.0 -3.6868224E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5732.0 -5732.0 -3.6873956E7 0.0 1864027286 true MGsGfU7253gN2Hnt2W NULL -5679.0 3.6527328E7 -1864027286 1 1864027286 0.0 -5679.0 5679.0 -5679.0 -3.6527328E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5679.0 -5679.0 -3.6533007E7 0.0 -1864027286 true lxQp116 NULL -5638.15 3.62645808E7 -1864027286 1 1864027286 0.0 -5638.15 5638.15 -5638.15 -3.62645808E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5638.15 -5638.15 -3.6270218949999996E7 0.0 -1887561756 false w62rRn0DnCSWJ1ht6qWa NULL -5638.15 3.62645808E7 1887561756 1 -1887561756 0.0 -5638.15 5638.15 -5638.15 -3.62645808E7 -1.88756739415E9 -1887561756 -1.554726368159204E-4 -5638.15 -5638.15 -3.6270218949999996E7 0.0 +1864027286 true lxQp116 NULL -5638.15 3.62645808E7 -1864027286 1 1864027286 0.0 -5638.15 5638.15 -5638.15 -3.62645808E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5638.15 -5638.15 -3.6270218949999996E7 0.0 1864027286 true wEe2THv60F6 NULL -5589.0 3.5948448E7 -1864027286 1 1864027286 0.0 -5589.0 5589.0 -5589.0 -3.5948448E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5589.0 -5589.0 -3.5954037E7 0.0 1864027286 true 7MHXQ0V71I NULL -5564.0 3.5787648E7 -1864027286 1 1864027286 0.0 -5564.0 5564.0 -5564.0 -3.5787648E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5564.0 -5564.0 -3.5793212E7 0.0 1864027286 true N7L608vFx24p0uNVwJr2o6G NULL -5536.0 3.5607552E7 -1864027286 1 1864027286 0.0 -5536.0 5536.0 -5536.0 -3.5607552E7 1.86402164785E9 1864027286 -1.554726368159204E-4 -5536.0 -5536.0 -3.5613088E7 0.0