[ https://issues.apache.org/jira/browse/DRILL-6626?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16553476#comment-16553476 ]
ASF GitHub Bot commented on DRILL-6626: --------------------------------------- Ben-Zvi closed pull request #1394: DRILL-6626: Fixed an IndexOutOfBoundException during aggregator rehash URL: https://github.com/apache/drill/pull/1394 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java index d37631be45c..ba928ae8f2d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggBatch.java @@ -114,8 +114,13 @@ public RecordBatchMemoryManager getRecordBatchMemoryManager() { @Override public void update() { + update(incoming); + } + + @Override + public void update(RecordBatch incomingRecordBatch) { // Get sizing information for the batch. - setRecordBatchSizer(new RecordBatchSizer(incoming)); + setRecordBatchSizer(new RecordBatchSizer(incomingRecordBatch)); int fieldId = 0; int newOutgoingRowWidth = 0; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java index 2f3bc23da33..4bbfa05a16e 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/aggregate/HashAggTemplate.java @@ -584,6 +584,11 @@ public AggOutcome doWork() { currentBatchRecordCount = incoming.getRecordCount(); // initialize for first non empty batch // Calculate the number of partitions based on actual incoming data delayedSetup(); + // Update the record batch manager since this is the first batch with data; we need to + // perform the update before any processing. + // NOTE - We pass the incoming record batch explicitly because it could be a spilled record (different + // from the instance owned by the HashAggBatch). + outgoing.getRecordBatchMemoryManager().update(incoming); } // @@ -666,7 +671,9 @@ public AggOutcome doWork() { // remember EMIT, but continue like handling OK case OK: - outgoing.getRecordBatchMemoryManager().update(); + // NOTE - We pass the incoming record batch explicitly because it could be a spilled record (different + // from the instance owned by the HashAggBatch). + outgoing.getRecordBatchMemoryManager().update(incoming); currentBatchRecordCount = incoming.getRecordCount(); // size of next batch diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java index 756b3f3a208..83b72d7c70c 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java @@ -798,12 +798,10 @@ private void resizeAndRehashIfNeeded() { IntVector newStartIndices = allocMetadataVector(tableSize, EMPTY_SLOT); - int idx = 0; for (int i = 0; i < batchHolders.size(); i++) { BatchHolder bh = batchHolders.get(i); - int batchStartIdx = idx; + int batchStartIdx = i * BATCH_SIZE; bh.rehash(tableSize, newStartIndices, batchStartIdx); - idx += bh.getTargetBatchRowCount(); } startIndices.clear(); @@ -816,7 +814,7 @@ private void resizeAndRehashIfNeeded() { logger.debug("Bucket: {}, startIdx[ {} ] = {}.", i, i, startIndices.getAccessor().get(i)); int startIdx = startIndices.getAccessor().get(i); BatchHolder bh = batchHolders.get((startIdx >>> 16) & BATCH_MASK); - bh.dump(idx); + bh.dump(startIdx); } } resizingTime += System.currentTimeMillis() - t0; diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchMemoryManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchMemoryManager.java index 79b28db2438..2372be2c900 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchMemoryManager.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchMemoryManager.java @@ -154,6 +154,9 @@ public void update(int inputIndex) { public void update() {}; + public void update(RecordBatch recordBatch) { + } + public void update(RecordBatch recordBatch, int index) { // Get sizing information for the batch. setRecordBatchSizer(index, new RecordBatchSizer(recordBatch)); ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Hash Aggregate: Index out of bounds with small output batch size and spilling > ----------------------------------------------------------------------------- > > Key: DRILL-6626 > URL: https://issues.apache.org/jira/browse/DRILL-6626 > Project: Apache Drill > Issue Type: Bug > Components: Execution - Relational Operators > Affects Versions: 1.14.0 > Reporter: Boaz Ben-Zvi > Assignee: salim achouche > Priority: Major > Labels: pull-request-available > > This new IOOB failure was seen while trying to recreate the NPE failure in > DRILL-6622 (over TPC-DS SF1). The proposed fix for the latter (PR #1391) does > not seem to make a difference. > This IOOB can easily be created with other large Hash-Agg queries that need > to spill. > The IOOB was caused after restricting the output batch size (to force many), > and the Hash Aggr memory (to force a spill): > {code} > 0: jdbc:drill:zk=local> alter system set > `drill.exec.memory.operator.output_batch_size` = 262144; > +-------+--------------------------------------------------------+ > | ok | summary | > +-------+--------------------------------------------------------+ > | true | drill.exec.memory.operator.output_batch_size updated. | > +-------+--------------------------------------------------------+ > 1 row selected (0.106 seconds) > 0: jdbc:drill:zk=local> > 0: jdbc:drill:zk=local> alter session set `exec.errors.verbose` = true; > +-------+-------------------------------+ > | ok | summary | > +-------+-------------------------------+ > | true | exec.errors.verbose updated. | > +-------+-------------------------------+ > 1 row selected (0.081 seconds) > 0: jdbc:drill:zk=local> > 0: jdbc:drill:zk=local> alter session set `exec.hashagg.mem_limit` = 16777216; > +-------+----------------------------------+ > | ok | summary | > +-------+----------------------------------+ > | true | exec.hashagg.mem_limit updated. | > +-------+----------------------------------+ > 1 row selected (0.089 seconds) > 0: jdbc:drill:zk=local> > 0: jdbc:drill:zk=local> SELECT c_customer_id FROM > dfs.`/data/tpcds/sf1/parquet/customer` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT ca_address_id FROM > dfs.`/data/tpcds/sf1/parquet/customer_address` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT cd_credit_rating FROM > dfs.`/data/tpcds/sf1/parquet/customer_demographics` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT hd_buy_potential FROM > dfs.`/data/tpcds/sf1/parquet/household_demographics` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT i_item_id FROM > dfs.`/data/tpcds/sf1/parquet/item` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT p_promo_id FROM > dfs.`/data/tpcds/sf1/parquet/promotion` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT t_time_id FROM > dfs.`/data/tpcds/sf1/parquet/time_dim` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT d_date_id FROM > dfs.`/data/tpcds/sf1/parquet/date_dim` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT s_store_id FROM > dfs.`/data/tpcds/sf1/parquet/store` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT w_warehouse_id FROM > dfs.`/data/tpcds/sf1/parquet/warehouse` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT sm_ship_mode_id FROM > dfs.`/data/tpcds/sf1/parquet/ship_mode` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT r_reason_id FROM > dfs.`/data/tpcds/sf1/parquet/reason` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT cc_call_center_id FROM > dfs.`/data/tpcds/sf1/parquet/call_center` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT web_site_id FROM > dfs.`/data/tpcds/sf1/parquet/web_site` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT wp_web_page_id FROM > dfs.`/data/tpcds/sf1/parquet/web_page` > . . . . . . . . . . . > UNION > . . . . . . . . . . . > SELECT cp_catalog_page_id FROM > dfs.`/data/tpcds/sf1/parquet/catalog_page`; > Error: SYSTEM ERROR: IndexOutOfBoundsException: Index: 26474, Size: 7 > Fragment 4:0 > [Error Id: d44e64ea-f474-436e-94b0-61c61eec2227 on 172.30.8.176:31020] > (java.lang.IndexOutOfBoundsException) Index: 26474, Size: 7 > java.util.ArrayList.rangeCheck():653 > java.util.ArrayList.get():429 > > org.apache.drill.exec.physical.impl.common.HashTableTemplate$BatchHolder.rehash():293 > > org.apache.drill.exec.physical.impl.common.HashTableTemplate$BatchHolder.access$1300():120 > > org.apache.drill.exec.physical.impl.common.HashTableTemplate.resizeAndRehashIfNeeded():805 > org.apache.drill.exec.physical.impl.common.HashTableTemplate.put():682 > > org.apache.drill.exec.physical.impl.aggregate.HashAggTemplate.checkGroupAndAggrValues():1379 > org.apache.drill.exec.physical.impl.aggregate.HashAggTemplate.doWork():604 > org.apache.drill.exec.physical.impl.aggregate.HashAggBatch.innerNext():273 > org.apache.drill.exec.record.AbstractRecordBatch.next():172 > > org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next():229 > org.apache.drill.exec.record.AbstractRecordBatch.next():119 > org.apache.drill.exec.record.AbstractRecordBatch.next():109 > org.apache.drill.exec.record.AbstractUnaryRecordBatch.innerNext():63 > > org.apache.drill.exec.physical.impl.project.ProjectRecordBatch.innerNext():142 > org.apache.drill.exec.record.AbstractRecordBatch.next():172 > > org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next():229 > org.apache.drill.exec.physical.impl.BaseRootExec.next():103 > > org.apache.drill.exec.physical.impl.SingleSenderCreator$SingleSenderRootExec.innerNext():93 > org.apache.drill.exec.physical.impl.BaseRootExec.next():93 > org.apache.drill.exec.work.fragment.FragmentExecutor$1.run():294 > org.apache.drill.exec.work.fragment.FragmentExecutor$1.run():281 > java.security.AccessController.doPrivileged():-2 > javax.security.auth.Subject.doAs():422 > org.apache.hadoop.security.UserGroupInformation.doAs():1657 > org.apache.drill.exec.work.fragment.FragmentExecutor.run():281 > org.apache.drill.common.SelfCleaningRunnable.run():38 > java.util.concurrent.ThreadPoolExecutor.runWorker():1142 > java.util.concurrent.ThreadPoolExecutor$Worker.run():617 > java.lang.Thread.run():745 (state=,code=0) > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)