This is an automated email from the ASF dual-hosted git repository. krisztiankasa pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 9aefef155ae HIVE-26992: Select count(*) query fails with error "java.lang.ArrayIndexOutOfBoundsException: arraycopy: last source index 1024 out of bounds for int[256]" (Zsolt Miskolczi, reviewed by Denys Kuzmenko, Krisztian Kasa) 9aefef155ae is described below commit 9aefef155ae3afef444944324cd91642b0ae1355 Author: InvisibleProgrammer <zsolt.miskol...@gmail.com> AuthorDate: Tue Feb 14 11:31:35 2023 +0100 HIVE-26992: Select count(*) query fails with error "java.lang.ArrayIndexOutOfBoundsException: arraycopy: last source index 1024 out of bounds for int[256]" (Zsolt Miskolczi, reviewed by Denys Kuzmenko, Krisztian Kasa) --- .../hive/ql/exec/vector/VectorFilterOperator.java | 2 +- .../hive/ql/exec/vector/VectorTopNKeyOperator.java | 2 +- .../hive/ql/optimizer/physical/Vectorizer.java | 9 +- .../ql/exec/vector/TestVectorFilterOperator.java | 106 ++++++++------------- .../hive/ql/exec/vector/TestVectorOperator.java | 80 ++++++++++++++++ .../ql/exec/vector/TestVectorTopNKeyOperator.java | 61 ++++++++++++ .../ql/exec/vector/TestVectorizedRowBatch.java | 10 ++ 7 files changed, 197 insertions(+), 73 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java index 37c0ed8889a..9cfe9fbec15 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -113,7 +113,7 @@ public class VectorFilterOperator extends FilterOperator //The selected vector represents selected rows. //Clone the selected vector - System.arraycopy(vrg.selected, 0, temporarySelected, 0, vrg.size); + System.arraycopy(vrg.selected, 0, temporarySelected, 0, vrg.selected.length); int [] selectedBackup = vrg.selected; vrg.selected = temporarySelected; int sizeBackup = vrg.size; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java index 10567c7180a..e44db5c910f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java @@ -118,7 +118,7 @@ public class VectorTopNKeyOperator extends Operator<TopNKeyDesc> implements Vect incomingBatches++; // The selected vector represents selected rows. // Clone the selected vector - System.arraycopy(batch.selected, 0, temporarySelected, 0, batch.size); + System.arraycopy(batch.selected, 0, temporarySelected, 0, batch.selected.length); int [] selectedBackup = batch.selected; batch.selected = temporarySelected; int sizeBackup = batch.size; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e59d01d09fa..3cf63e24c58 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -41,6 +41,7 @@ import java.util.TreeSet; import java.util.regex.Pattern; import java.util.stream.Collectors; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; @@ -4377,6 +4378,7 @@ public class Vectorizer implements PhysicalPlanResolver { return false; } + @VisibleForTesting public static Operator<? extends OperatorDesc> vectorizeFilterOperator( Operator<? extends OperatorDesc> filterOp, VectorizationContext vContext, VectorFilterDesc vectorFilterDesc) @@ -4397,9 +4399,10 @@ public class Vectorizer implements PhysicalPlanResolver { vContext, vectorFilterDesc); } - private static Operator<? extends OperatorDesc> vectorizeTopNKeyOperator( - Operator<? extends OperatorDesc> topNKeyOperator, VectorizationContext vContext, - VectorTopNKeyDesc vectorTopNKeyDesc) throws HiveException { + @VisibleForTesting + public static Operator<? extends OperatorDesc> vectorizeTopNKeyOperator( + Operator<? extends OperatorDesc> topNKeyOperator, VectorizationContext vContext, + VectorTopNKeyDesc vectorTopNKeyDesc) throws HiveException { TopNKeyDesc topNKeyDesc = (TopNKeyDesc) topNKeyOperator.getConf(); VectorExpression[] keyExpressions = getVectorExpressions(vContext, topNKeyDesc.getKeyColumns()); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java index 2e601d6fdac..b85bae92b61 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java @@ -21,17 +21,15 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn; import org.junit.Assert; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; -import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -39,57 +37,12 @@ import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorFilterDesc; import org.junit.Test; +import org.junit.jupiter.api.Assertions; /** * Test cases for vectorized filter operator. */ -public class TestVectorFilterOperator { - - HiveConf hconf = new HiveConf(); - - /** - * Fundamental logic and performance tests for vector filters belong here. - * - * For tests about filters to cover specific operator and data type combinations, - * see also the other filter tests under org.apache.hadoop.hive.ql.exec.vector.expressions - */ - public static class FakeDataReader { - private final int size; - private final VectorizedRowBatch vrg; - private int currentSize = 0; - private final int numCols; - private final int len = 1024; - - public FakeDataReader(int size, int numCols) { - this.size = size; - this.numCols = numCols; - vrg = new VectorizedRowBatch(numCols, len); - for (int i = 0; i < numCols; i++) { - try { - Thread.sleep(2); - } catch (InterruptedException ignore) {} - vrg.cols[i] = getLongVector(len); - } - } - - public VectorizedRowBatch getNext() { - if (currentSize >= size) { - vrg.size = 0; - return vrg; - } else { - vrg.size = len; - currentSize += vrg.size; - vrg.selectedInUse = false; - return vrg; - } - } - - private LongColumnVector getLongVector(int len) { - LongColumnVector lcv = new LongColumnVector(len); - TestVectorizedRowBatch.setRandomLongCol(lcv); - return lcv; - } - } +public class TestVectorFilterOperator extends TestVectorOperator{ private VectorFilterOperator getAVectorFilterOperator() throws HiveException { ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false); @@ -110,14 +63,9 @@ public class TestVectorFilterOperator { @Test public void testBasicFilterOperator() throws HiveException { VectorFilterOperator vfo = getAVectorFilterOperator(); - vfo.initialize(hconf, null); - VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1); - VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0); - VectorExpression ve3 = new FilterExprAndExpr(); - ve3.setChildExpressions(new VectorExpression[] {ve1, ve2}); - vfo.setFilterCondition(ve3); + prepareVectorFilterOperation(vfo); - FakeDataReader fdr = new FakeDataReader(1024*1, 3); + FakeDataReader fdr = new FakeDataReader(1024*1, 3, FakeDataSampleType.Random); VectorizedRowBatch vrg = fdr.getNext(); @@ -139,14 +87,9 @@ public class TestVectorFilterOperator { @Test public void testBasicFilterLargeData() throws HiveException { VectorFilterOperator vfo = getAVectorFilterOperator(); - vfo.initialize(hconf, null); - VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1); - VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0); - VectorExpression ve3 = new FilterExprAndExpr(); - ve3.setChildExpressions(new VectorExpression[] {ve1, ve2}); - vfo.setFilterCondition(ve3); + prepareVectorFilterOperation(vfo); - FakeDataReader fdr = new FakeDataReader(16*1024*1024, 3); + FakeDataReader fdr = new FakeDataReader(16*1024*1024, 3, FakeDataSampleType.Random); long startTime = System.currentTimeMillis(); VectorizedRowBatch vrg = fdr.getNext(); @@ -160,7 +103,7 @@ public class TestVectorFilterOperator { //Base time - fdr = new FakeDataReader(16*1024*1024, 3); + fdr = new FakeDataReader(16*1024*1024, 3, FakeDataSampleType.Random); long startTime1 = System.currentTimeMillis(); vrg = fdr.getNext(); @@ -178,5 +121,32 @@ public class TestVectorFilterOperator { long endTime1 = System.currentTimeMillis(); System.out.println("testBaseFilterOperator base Op Time = "+(endTime1-startTime1)); } + + @Test + public void testVectorFilterHasSelectedSmallerThanBatchDoNotThrowException() throws HiveException { + + VectorFilterOperator vfo = getAVectorFilterOperator(); + + FakeDataReader fdr = new FakeDataReader(1024*1, 3, FakeDataSampleType.OrderedSequence); + + prepareVectorFilterOperation(vfo); + + VectorizedRowBatch vrg = fdr.getNext(); + + vrg.selected = new int[] { 1, 2, 3, 4}; + + Assertions.assertDoesNotThrow(() -> vfo.process(vrg, 0)); + } + + private void prepareVectorFilterOperation(VectorFilterOperator vfo) throws HiveException { + vfo.initialize(hiveConf, null); + + VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1); + VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0); + VectorExpression ve3 = new FilterExprAndExpr(); + ve3.setChildExpressions(new VectorExpression[] {ve1, ve2}); + + vfo.setFilterCondition(ve3); + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorOperator.java new file mode 100644 index 00000000000..28678af0bc3 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorOperator.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.conf.HiveConf; + +public abstract class TestVectorOperator { + + protected HiveConf hiveConf = new HiveConf(); + + public enum FakeDataSampleType { + OrderedSequence, + Random, + Repeated + } + + public class FakeDataReader { + private final int size; + private final VectorizedRowBatch vrg; + private int currentSize = 0; + private final int len = 1024; + + public FakeDataReader(int size, int numCols, FakeDataSampleType fakeDataSampleType) { + this.size = size; + vrg = new VectorizedRowBatch(numCols, len); + for (int i = 0; i < numCols; i++) { + try { + Thread.sleep(2); + } catch (InterruptedException ignore) { + } + vrg.cols[i] = getLongVector(fakeDataSampleType); + } + } + + public VectorizedRowBatch getNext() { + if (currentSize >= size) { + vrg.size = 0; + } else { + vrg.size = len; + currentSize += vrg.size; + vrg.selectedInUse = false; + } + return vrg; + } + + private LongColumnVector getLongVector(FakeDataSampleType fakeDataSampleType) { + LongColumnVector lcv = new LongColumnVector(len); + + switch (fakeDataSampleType) { + case OrderedSequence: + TestVectorizedRowBatch.setOrderedSequenceLongCol(lcv); + break; + case Random: + TestVectorizedRowBatch.setRandomLongCol(lcv); + break; + case Repeated: + TestVectorizedRowBatch.setRepeatingLongCol(lcv); + break; + } + + return lcv; + } + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorTopNKeyOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorTopNKeyOperator.java new file mode 100644 index 00000000000..77fce5248e0 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorTopNKeyOperator.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; +import org.apache.hadoop.hive.ql.plan.VectorTopNKeyDesc; +import org.junit.Test; +import org.junit.jupiter.api.Assertions; + +import java.util.ArrayList; +import java.util.List; + +public class TestVectorTopNKeyOperator extends TestVectorOperator { + + @Test + public void testTopNHasSelectedSmallerThanBatchDoesNotThrowException() throws HiveException { + List<String> columns = new ArrayList<>(); + columns.add("col1"); + TopNKeyDesc topNKeyDesc = new TopNKeyDesc(); + topNKeyDesc.setCheckEfficiencyNumBatches(1); + topNKeyDesc.setTopN(2); + + Operator<? extends OperatorDesc> filterOp = + OperatorFactory.get(new CompilationOpContext(), topNKeyDesc); + + VectorizationContext vc = new VectorizationContext("name", columns); + + VectorTopNKeyOperator vfo = (VectorTopNKeyOperator) Vectorizer.vectorizeTopNKeyOperator(filterOp, vc, new VectorTopNKeyDesc()); + + vfo.initialize(hiveConf, null); + + FakeDataReader fdr = new FakeDataReader(1024, 3, FakeDataSampleType.Repeated); + VectorizedRowBatch vrg = fdr.getNext(); + + vrg.selected = new int[] { 1, 2, 3, 4}; + + Assertions.assertDoesNotThrow(() -> vfo.process(vrg, 0)); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java index 258b47bbec5..0c2c0204d15 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java @@ -162,6 +162,16 @@ public class TestVectorizedRowBatch { } } + /** + * Set the vector to sample data that is a monotonous sequence of numbers: 0, 1, 2, ... + * @param col + */ + public static void setOrderedSequenceLongCol(LongColumnVector col) { + int size = col.vector.length; + for(int i = 0; i < size; i++) { + col.vector[i] = i; + } + } /** * Set the vector to sample data that repeats an iteration from 0 to 99.