[ 
https://issues.apache.org/jira/browse/DRILL-6626?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16553476#comment-16553476
 ] 

ASF GitHub Bot commented on DRILL-6626:
---------------------------------------

Ben-Zvi closed pull request #1394: DRILL-6626: Fixed an 
IndexOutOfBoundException during aggregator rehash
URL: https://github.com/apache/drill/pull/1394
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java
index d37631be45c..ba928ae8f2d 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java
@@ -114,8 +114,13 @@ public RecordBatchMemoryManager 
getRecordBatchMemoryManager() {
 
     @Override
     public void update() {
+      update(incoming);
+    }
+
+    @Override
+    public void update(RecordBatch incomingRecordBatch) {
       // Get sizing information for the batch.
-      setRecordBatchSizer(new RecordBatchSizer(incoming));
+      setRecordBatchSizer(new RecordBatchSizer(incomingRecordBatch));
 
       int fieldId = 0;
       int newOutgoingRowWidth = 0;
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java
index 2f3bc23da33..4bbfa05a16e 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java
@@ -584,6 +584,11 @@ public AggOutcome doWork() {
         currentBatchRecordCount = incoming.getRecordCount(); // initialize for 
first non empty batch
         // Calculate the number of partitions based on actual incoming data
         delayedSetup();
+        // Update the record batch manager since this is the first batch with 
data; we need to
+        // perform the update before any processing.
+        // NOTE - We pass the incoming record batch explicitly because it 
could be a spilled record (different
+        //        from the instance owned by the HashAggBatch).
+        outgoing.getRecordBatchMemoryManager().update(incoming);
       }
 
       //
@@ -666,7 +671,9 @@ public AggOutcome doWork() {
           // remember EMIT, but continue like handling OK
 
         case OK:
-          outgoing.getRecordBatchMemoryManager().update();
+          // NOTE - We pass the incoming record batch explicitly because it 
could be a spilled record (different
+          //        from the instance owned by the HashAggBatch).
+          outgoing.getRecordBatchMemoryManager().update(incoming);
 
           currentBatchRecordCount = incoming.getRecordCount(); // size of next 
batch
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
index 756b3f3a208..83b72d7c70c 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
@@ -798,12 +798,10 @@ private void resizeAndRehashIfNeeded() {
 
     IntVector newStartIndices = allocMetadataVector(tableSize, EMPTY_SLOT);
 
-    int idx = 0;
     for (int i = 0; i < batchHolders.size(); i++) {
       BatchHolder bh = batchHolders.get(i);
-      int batchStartIdx = idx;
+      int batchStartIdx = i * BATCH_SIZE;
       bh.rehash(tableSize, newStartIndices, batchStartIdx);
-      idx += bh.getTargetBatchRowCount();
     }
 
     startIndices.clear();
@@ -816,7 +814,7 @@ private void resizeAndRehashIfNeeded() {
         logger.debug("Bucket: {}, startIdx[ {} ] = {}.", i, i, 
startIndices.getAccessor().get(i));
         int startIdx = startIndices.getAccessor().get(i);
         BatchHolder bh = batchHolders.get((startIdx >>> 16) & BATCH_MASK);
-        bh.dump(idx);
+        bh.dump(startIdx);
       }
     }
     resizingTime += System.currentTimeMillis() - t0;
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchMemoryManager.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchMemoryManager.java
index 79b28db2438..2372be2c900 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchMemoryManager.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchMemoryManager.java
@@ -154,6 +154,9 @@ public void update(int inputIndex) {
 
   public void update() {};
 
+  public void update(RecordBatch recordBatch) {
+  }
+
   public void update(RecordBatch recordBatch, int index) {
     // Get sizing information for the batch.
     setRecordBatchSizer(index, new RecordBatchSizer(recordBatch));


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Hash Aggregate: Index out of bounds with small output batch size and spilling
> -----------------------------------------------------------------------------
>
>                 Key: DRILL-6626
>                 URL: https://issues.apache.org/jira/browse/DRILL-6626
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Execution - Relational Operators
>    Affects Versions: 1.14.0
>            Reporter: Boaz Ben-Zvi
>            Assignee: salim achouche
>            Priority: Major
>              Labels: pull-request-available
>
>    This new IOOB failure was seen while trying to recreate the NPE failure in 
> DRILL-6622 (over TPC-DS SF1). The proposed fix for the latter (PR #1391) does 
> not seem to make a difference.
> This IOOB can easily be created with other large Hash-Agg queries that need 
> to spill. 
> The IOOB was caused after restricting the output batch size (to force many), 
> and the Hash Aggr memory (to force a spill):
> {code}
> 0: jdbc:drill:zk=local> alter system set 
> `drill.exec.memory.operator.output_batch_size` = 262144;
> +-------+--------------------------------------------------------+
> |  ok   |                        summary                         |
> +-------+--------------------------------------------------------+
> | true  | drill.exec.memory.operator.output_batch_size updated.  |
> +-------+--------------------------------------------------------+
> 1 row selected (0.106 seconds)
> 0: jdbc:drill:zk=local>
> 0: jdbc:drill:zk=local> alter session set `exec.errors.verbose` = true;
> +-------+-------------------------------+
> |  ok   |            summary            |
> +-------+-------------------------------+
> | true  | exec.errors.verbose updated.  |
> +-------+-------------------------------+
> 1 row selected (0.081 seconds)
> 0: jdbc:drill:zk=local>
> 0: jdbc:drill:zk=local> alter session set `exec.hashagg.mem_limit` = 16777216;
> +-------+----------------------------------+
> |  ok   |             summary              |
> +-------+----------------------------------+
> | true  | exec.hashagg.mem_limit updated.  |
> +-------+----------------------------------+
> 1 row selected (0.089 seconds)
> 0: jdbc:drill:zk=local>
> 0: jdbc:drill:zk=local> SELECT c_customer_id FROM 
> dfs.`/data/tpcds/sf1/parquet/customer`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT ca_address_id FROM 
> dfs.`/data/tpcds/sf1/parquet/customer_address`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT cd_credit_rating FROM 
> dfs.`/data/tpcds/sf1/parquet/customer_demographics`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT hd_buy_potential FROM 
> dfs.`/data/tpcds/sf1/parquet/household_demographics`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT i_item_id FROM 
> dfs.`/data/tpcds/sf1/parquet/item`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT p_promo_id FROM 
> dfs.`/data/tpcds/sf1/parquet/promotion`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT t_time_id FROM 
> dfs.`/data/tpcds/sf1/parquet/time_dim`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT d_date_id FROM 
> dfs.`/data/tpcds/sf1/parquet/date_dim`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT s_store_id FROM 
> dfs.`/data/tpcds/sf1/parquet/store`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT w_warehouse_id FROM 
> dfs.`/data/tpcds/sf1/parquet/warehouse`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT sm_ship_mode_id FROM 
> dfs.`/data/tpcds/sf1/parquet/ship_mode`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT r_reason_id FROM 
> dfs.`/data/tpcds/sf1/parquet/reason`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT cc_call_center_id FROM 
> dfs.`/data/tpcds/sf1/parquet/call_center`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT web_site_id FROM 
> dfs.`/data/tpcds/sf1/parquet/web_site`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT wp_web_page_id FROM 
> dfs.`/data/tpcds/sf1/parquet/web_page`
> . . . . . . . . . . . > UNION
> . . . . . . . . . . . > SELECT cp_catalog_page_id FROM 
> dfs.`/data/tpcds/sf1/parquet/catalog_page`;
> Error: SYSTEM ERROR: IndexOutOfBoundsException: Index: 26474, Size: 7
> Fragment 4:0
> [Error Id: d44e64ea-f474-436e-94b0-61c61eec2227 on 172.30.8.176:31020]
>   (java.lang.IndexOutOfBoundsException) Index: 26474, Size: 7
>     java.util.ArrayList.rangeCheck():653
>     java.util.ArrayList.get():429
>     
> org.apache.drill.exec.physical.impl.common.HashTableTemplate$BatchHolder.rehash():293
>     
> org.apache.drill.exec.physical.impl.common.HashTableTemplate$BatchHolder.access$1300():120
>     
> org.apache.drill.exec.physical.impl.common.HashTableTemplate.resizeAndRehashIfNeeded():805
>     org.apache.drill.exec.physical.impl.common.HashTableTemplate.put():682
>     
> org.apache.drill.exec.physical.impl.aggregate.HashAggTemplate.checkGroupAndAggrValues():1379
>     org.apache.drill.exec.physical.impl.aggregate.HashAggTemplate.doWork():604
>     org.apache.drill.exec.physical.impl.aggregate.HashAggBatch.innerNext():273
>     org.apache.drill.exec.record.AbstractRecordBatch.next():172
>     
> org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next():229
>     org.apache.drill.exec.record.AbstractRecordBatch.next():119
>     org.apache.drill.exec.record.AbstractRecordBatch.next():109
>     org.apache.drill.exec.record.AbstractUnaryRecordBatch.innerNext():63
>     
> org.apache.drill.exec.physical.impl.project.ProjectRecordBatch.innerNext():142
>     org.apache.drill.exec.record.AbstractRecordBatch.next():172
>     
> org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next():229
>     org.apache.drill.exec.physical.impl.BaseRootExec.next():103
>     
> org.apache.drill.exec.physical.impl.SingleSenderCreator$SingleSenderRootExec.innerNext():93
>     org.apache.drill.exec.physical.impl.BaseRootExec.next():93
>     org.apache.drill.exec.work.fragment.FragmentExecutor$1.run():294
>     org.apache.drill.exec.work.fragment.FragmentExecutor$1.run():281
>     java.security.AccessController.doPrivileged():-2
>     javax.security.auth.Subject.doAs():422
>     org.apache.hadoop.security.UserGroupInformation.doAs():1657
>     org.apache.drill.exec.work.fragment.FragmentExecutor.run():281
>     org.apache.drill.common.SelfCleaningRunnable.run():38
>     java.util.concurrent.ThreadPoolExecutor.runWorker():1142
>     java.util.concurrent.ThreadPoolExecutor$Worker.run():617
>     java.lang.Thread.run():745 (state=,code=0)
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to