[hive] branch master updated: HIVE-26992: Select count(*) query fails with error "java.lang.ArrayIndexOutOfBoundsException: arraycopy: last source index 1024 out of bounds for int[256]" (Zsolt Miskolczi, reviewed by Denys Kuzmenko, Krisztian Kasa)

krisztiankasa Tue, 14 Feb 2023 02:31:51 -0800

This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 9aefef155ae HIVE-26992: Select count(*) query fails with error 
"java.lang.ArrayIndexOutOfBoundsException: arraycopy: last source index 1024 
out of bounds for int[256]" (Zsolt Miskolczi, reviewed by Denys Kuzmenko, 
Krisztian Kasa)
9aefef155ae is described below

commit 9aefef155ae3afef444944324cd91642b0ae1355
Author: InvisibleProgrammer <zsolt.miskol...@gmail.com>
AuthorDate: Tue Feb 14 11:31:35 2023 +0100

    HIVE-26992: Select count(*) query fails with error 
"java.lang.ArrayIndexOutOfBoundsException: arraycopy: last source index 1024 
out of bounds for int[256]" (Zsolt Miskolczi, reviewed by Denys Kuzmenko, 
Krisztian Kasa)
---
 .../hive/ql/exec/vector/VectorFilterOperator.java  |   2 +-
 .../hive/ql/exec/vector/VectorTopNKeyOperator.java |   2 +-
 .../hive/ql/optimizer/physical/Vectorizer.java     |   9 +-
 .../ql/exec/vector/TestVectorFilterOperator.java   | 106 ++++++++-------------
 .../hive/ql/exec/vector/TestVectorOperator.java    |  80 ++++++++++++++++
 .../ql/exec/vector/TestVectorTopNKeyOperator.java  |  61 ++++++++++++
 .../ql/exec/vector/TestVectorizedRowBatch.java     |  10 ++
 7 files changed, 197 insertions(+), 73 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
index 37c0ed8889a..9cfe9fbec15 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
@@ -113,7 +113,7 @@ public class VectorFilterOperator extends FilterOperator
 
     //The selected vector represents selected rows.
     //Clone the selected vector
-    System.arraycopy(vrg.selected, 0, temporarySelected, 0, vrg.size);
+    System.arraycopy(vrg.selected, 0, temporarySelected, 0, 
vrg.selected.length);
     int [] selectedBackup = vrg.selected;
     vrg.selected = temporarySelected;
     int sizeBackup = vrg.size;
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
index 10567c7180a..e44db5c910f 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
@@ -118,7 +118,7 @@ public class VectorTopNKeyOperator extends 
Operator<TopNKeyDesc> implements Vect
     incomingBatches++;
     // The selected vector represents selected rows.
     // Clone the selected vector
-    System.arraycopy(batch.selected, 0, temporarySelected, 0, batch.size);
+    System.arraycopy(batch.selected, 0, temporarySelected, 0, 
batch.selected.length);
     int [] selectedBackup = batch.selected;
     batch.selected = temporarySelected;
     int sizeBackup = batch.size;
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index e59d01d09fa..3cf63e24c58 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -41,6 +41,7 @@ import java.util.TreeSet;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.lang3.tuple.ImmutablePair;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
@@ -4377,6 +4378,7 @@ public class Vectorizer implements PhysicalPlanResolver {
     return false;
   }
 
+  @VisibleForTesting
   public static Operator<? extends OperatorDesc> vectorizeFilterOperator(
       Operator<? extends OperatorDesc> filterOp, VectorizationContext vContext,
       VectorFilterDesc vectorFilterDesc)
@@ -4397,9 +4399,10 @@ public class Vectorizer implements PhysicalPlanResolver {
         vContext, vectorFilterDesc);
   }
 
-  private static Operator<? extends OperatorDesc> vectorizeTopNKeyOperator(
-      Operator<? extends OperatorDesc> topNKeyOperator, VectorizationContext 
vContext,
-      VectorTopNKeyDesc vectorTopNKeyDesc) throws HiveException {
+  @VisibleForTesting
+  public static Operator<? extends OperatorDesc> vectorizeTopNKeyOperator(
+          Operator<? extends OperatorDesc> topNKeyOperator, 
VectorizationContext vContext,
+          VectorTopNKeyDesc vectorTopNKeyDesc) throws HiveException {
 
     TopNKeyDesc topNKeyDesc = (TopNKeyDesc) topNKeyOperator.getConf();
     VectorExpression[] keyExpressions = getVectorExpressions(vContext, 
topNKeyDesc.getKeyColumns());
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
index 2e601d6fdac..b85bae92b61 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
@@ -21,17 +21,15 @@ package org.apache.hadoop.hive.ql.exec.vector;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import 
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar;
+import 
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn;
 import org.junit.Assert;
 
-import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorFactory;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
-import 
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar;
-import 
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -39,57 +37,12 @@ import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.VectorFilterDesc;
 import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
 
 /**
  * Test cases for vectorized filter operator.
  */
-public class TestVectorFilterOperator {
-
-  HiveConf hconf = new HiveConf();
-
-  /**
-   * Fundamental logic and performance tests for vector filters belong here.
-   *
-   * For tests about filters to cover specific operator and data type 
combinations,
-   * see also the other filter tests under 
org.apache.hadoop.hive.ql.exec.vector.expressions
-   */
-  public static class FakeDataReader {
-    private final int size;
-    private final VectorizedRowBatch vrg;
-    private int currentSize = 0;
-    private final int numCols;
-    private final int len = 1024;
-
-    public FakeDataReader(int size, int numCols) {
-      this.size = size;
-      this.numCols = numCols;
-      vrg = new VectorizedRowBatch(numCols, len);
-      for (int i = 0; i < numCols; i++) {
-        try {
-          Thread.sleep(2);
-        } catch (InterruptedException ignore) {}
-        vrg.cols[i] = getLongVector(len);
-      }
-    }
-
-    public VectorizedRowBatch getNext() {
-      if (currentSize >= size) {
-        vrg.size = 0;
-        return vrg;
-      } else {
-        vrg.size = len;
-        currentSize += vrg.size;
-        vrg.selectedInUse = false;
-        return vrg;
-      }
-    }
-
-    private LongColumnVector getLongVector(int len) {
-      LongColumnVector lcv = new LongColumnVector(len);
-      TestVectorizedRowBatch.setRandomLongCol(lcv);
-      return lcv;
-    }
-  }
+public class TestVectorFilterOperator extends TestVectorOperator{
 
   private VectorFilterOperator getAVectorFilterOperator() throws HiveException 
{
     ExprNodeColumnDesc col1Expr = new  ExprNodeColumnDesc(Long.class, "col1", 
"table", false);
@@ -110,14 +63,9 @@ public class TestVectorFilterOperator {
   @Test
   public void testBasicFilterOperator() throws HiveException {
     VectorFilterOperator vfo = getAVectorFilterOperator();
-    vfo.initialize(hconf, null);
-    VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1);
-    VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0);
-    VectorExpression ve3 = new FilterExprAndExpr();
-    ve3.setChildExpressions(new VectorExpression[] {ve1, ve2});
-    vfo.setFilterCondition(ve3);
+    prepareVectorFilterOperation(vfo);
 
-    FakeDataReader fdr = new FakeDataReader(1024*1, 3);
+    FakeDataReader fdr = new FakeDataReader(1024*1, 3, 
FakeDataSampleType.Random);
 
     VectorizedRowBatch vrg = fdr.getNext();
 
@@ -139,14 +87,9 @@ public class TestVectorFilterOperator {
   @Test
   public void testBasicFilterLargeData() throws HiveException {
     VectorFilterOperator vfo = getAVectorFilterOperator();
-    vfo.initialize(hconf, null);
-    VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1);
-    VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0);
-    VectorExpression ve3 = new FilterExprAndExpr();
-    ve3.setChildExpressions(new VectorExpression[] {ve1, ve2});
-    vfo.setFilterCondition(ve3);
+    prepareVectorFilterOperation(vfo);
 
-    FakeDataReader fdr = new FakeDataReader(16*1024*1024, 3);
+    FakeDataReader fdr = new FakeDataReader(16*1024*1024, 3, 
FakeDataSampleType.Random);
 
     long startTime = System.currentTimeMillis();
     VectorizedRowBatch vrg = fdr.getNext();
@@ -160,7 +103,7 @@ public class TestVectorFilterOperator {
 
     //Base time
 
-    fdr = new FakeDataReader(16*1024*1024, 3);
+    fdr = new FakeDataReader(16*1024*1024, 3, FakeDataSampleType.Random);
 
     long startTime1 = System.currentTimeMillis();
     vrg = fdr.getNext();
@@ -178,5 +121,32 @@ public class TestVectorFilterOperator {
     long endTime1 = System.currentTimeMillis();
     System.out.println("testBaseFilterOperator base Op Time = 
"+(endTime1-startTime1));
   }
+
+  @Test
+  public void testVectorFilterHasSelectedSmallerThanBatchDoNotThrowException() 
throws HiveException {
+
+    VectorFilterOperator vfo = getAVectorFilterOperator();
+
+    FakeDataReader fdr = new FakeDataReader(1024*1, 3, 
FakeDataSampleType.OrderedSequence);
+
+    prepareVectorFilterOperation(vfo);
+
+    VectorizedRowBatch vrg = fdr.getNext();
+
+    vrg.selected = new int[] { 1, 2, 3, 4};
+
+    Assertions.assertDoesNotThrow(() -> vfo.process(vrg, 0));
+  }
+
+  private void prepareVectorFilterOperation(VectorFilterOperator vfo) throws 
HiveException {
+    vfo.initialize(hiveConf, null);
+
+    VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1);
+    VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0);
+    VectorExpression ve3 = new FilterExprAndExpr();
+    ve3.setChildExpressions(new VectorExpression[] {ve1, ve2});
+
+    vfo.setFilterCondition(ve3);
+  }
 }
 
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorOperator.java 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorOperator.java
new file mode 100644
index 00000000000..28678af0bc3
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorOperator.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+
+public abstract class TestVectorOperator {
+
+  protected HiveConf hiveConf = new HiveConf();
+
+  public enum FakeDataSampleType {
+    OrderedSequence,
+    Random,
+    Repeated
+  }
+
+  public class FakeDataReader {
+    private final int size;
+    private final VectorizedRowBatch vrg;
+    private int currentSize = 0;
+    private final int len = 1024;
+
+    public FakeDataReader(int size, int numCols, FakeDataSampleType 
fakeDataSampleType) {
+      this.size = size;
+      vrg = new VectorizedRowBatch(numCols, len);
+      for (int i = 0; i < numCols; i++) {
+        try {
+          Thread.sleep(2);
+        } catch (InterruptedException ignore) {
+        }
+        vrg.cols[i] = getLongVector(fakeDataSampleType);
+      }
+    }
+
+    public VectorizedRowBatch getNext() {
+      if (currentSize >= size) {
+        vrg.size = 0;
+      } else {
+        vrg.size = len;
+        currentSize += vrg.size;
+        vrg.selectedInUse = false;
+      }
+      return vrg;
+    }
+
+    private LongColumnVector getLongVector(FakeDataSampleType 
fakeDataSampleType) {
+      LongColumnVector lcv = new LongColumnVector(len);
+
+      switch (fakeDataSampleType) {
+        case OrderedSequence:
+          TestVectorizedRowBatch.setOrderedSequenceLongCol(lcv);
+          break;
+        case Random:
+          TestVectorizedRowBatch.setRandomLongCol(lcv);
+          break;
+        case Repeated:
+          TestVectorizedRowBatch.setRepeatingLongCol(lcv);
+          break;
+      }
+
+      return lcv;
+    }
+  }
+}
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorTopNKeyOperator.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorTopNKeyOperator.java
new file mode 100644
index 00000000000..77fce5248e0
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorTopNKeyOperator.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.TopNKeyDesc;
+import org.apache.hadoop.hive.ql.plan.VectorTopNKeyDesc;
+import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class TestVectorTopNKeyOperator extends TestVectorOperator {
+
+  @Test
+  public void testTopNHasSelectedSmallerThanBatchDoesNotThrowException() 
throws HiveException {
+    List<String> columns = new ArrayList<>();
+    columns.add("col1");
+    TopNKeyDesc topNKeyDesc = new TopNKeyDesc();
+    topNKeyDesc.setCheckEfficiencyNumBatches(1);
+    topNKeyDesc.setTopN(2);
+
+    Operator<? extends OperatorDesc> filterOp =
+            OperatorFactory.get(new CompilationOpContext(), topNKeyDesc);
+
+    VectorizationContext vc = new VectorizationContext("name", columns);
+
+    VectorTopNKeyOperator vfo = (VectorTopNKeyOperator) 
Vectorizer.vectorizeTopNKeyOperator(filterOp, vc, new VectorTopNKeyDesc());
+
+    vfo.initialize(hiveConf, null);
+
+    FakeDataReader fdr = new FakeDataReader(1024, 3, 
FakeDataSampleType.Repeated);
+    VectorizedRowBatch vrg = fdr.getNext();
+
+    vrg.selected = new int[] { 1, 2, 3, 4};
+
+    Assertions.assertDoesNotThrow(() -> vfo.process(vrg, 0));
+  }
+}
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java
index 258b47bbec5..0c2c0204d15 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java
@@ -162,6 +162,16 @@ public class TestVectorizedRowBatch {
     }
   }
 
+  /**
+   * Set the vector to sample data that is a monotonous sequence of numbers: 
0, 1, 2, ...
+   * @param col
+   */
+  public static void setOrderedSequenceLongCol(LongColumnVector col) {
+    int size = col.vector.length;
+    for(int i = 0; i < size; i++) {
+      col.vector[i] = i;
+    }
+  }
 
   /**
    * Set the vector to sample data that repeats an iteration from 0 to 99.

[hive] branch master updated: HIVE-26992: Select count(*) query fails with error "java.lang.ArrayIndexOutOfBoundsException: arraycopy: last source index 1024 out of bounds for int[256]" (Zsolt Miskolczi, reviewed by Denys Kuzmenko, Krisztian Kasa)

Reply via email to