[ https://issues.apache.org/jira/browse/DRILL-5253?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16067136#comment-16067136 ]
Rahul Challapalli edited comment on DRILL-5253 at 6/28/17 8:23 PM: ------------------------------------------------------------------- One more scenario where I hit this error on a single node drill cluster. Even in this case the dataset is too large to attach here {code} alter session set `planner.memory.max_query_memory_per_node` = 3507483648; select * from ( select columns[0] col1 from dfs.`/drill/testdata/hash-agg/seq.tbl` order by columns[0] ) d1 inner join ( select distinct columns[0] col1 from dfs.`/drill/testdata/hash-agg/uuid.tbl` union all select max(col1) max_col1 from dfs.`/drill/testdata/resource-manager/small_large_parquet` group by dir0 ) d2 on d1.col1 = d2.col1; {code} Exception {code} [Error Id: 3f41c730-8b67-48ff-96b6-15535c336626 ] at org.apache.drill.common.exceptions.UserException$Builder.build(UserException.java:550) ~[drill-common-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:244) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.common.SelfCleaningRunnable.run(SelfCleaningRunnable.java:38) [drill-common-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) [na:1.7.0_111] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) [na:1.7.0_111] at java.lang.Thread.run(Thread.java:745) [na:1.7.0_111] Caused by: org.apache.drill.exec.exception.OutOfMemoryException: org.apache.drill.exec.exception.OutOfMemoryException: Unable to allocate sv2 buffer after repeated attempts at org.apache.drill.exec.physical.impl.xsort.ExternalSortBatch.innerNext(ExternalSortBatch.java:383) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:225) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:119) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:109) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.record.AbstractSingleRecordBatch.innerNext(AbstractSingleRecordBatch.java:51) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.svremover.RemovingRecordBatch.innerNext(RemovingRecordBatch.java:93) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:225) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:105) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.SingleSenderCreator$SingleSenderRootExec.innerNext(SingleSenderCreator.java:92) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:95) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:234) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:227) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at java.security.AccessController.doPrivileged(Native Method) ~[na:1.7.0_111] at javax.security.auth.Subject.doAs(Subject.java:415) ~[na:1.7.0_111] at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595) ~[hadoop-common-2.7.0-mapr-1607.jar:na] at org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:227) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] ... 4 common frames omitted Caused by: org.apache.drill.exec.exception.OutOfMemoryException: Unable to allocate sv2 buffer after repeated attempts at org.apache.drill.exec.physical.impl.xsort.ExternalSortBatch.newSV2(ExternalSortBatch.java:662) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.xsort.ExternalSortBatch.innerNext(ExternalSortBatch.java:379) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] ... 21 common frames omitted {code} For this case there seems to be a memory accounting issue as well (probably while canceling the running fragments after OOM) {code} WARN o.a.d.exec.rpc.control.WorkEventBus - Fragment 26abfa45-7d5a-20d9-14c3-8a34a0e2018a:2:10 not found in the work bus. 2017-06-28 12:52:56,483 [26abfa45-7d5a-20d9-14c3-8a34a0e2018a:frag:2:12] WARN o.a.drill.exec.memory.BaseAllocator - Closed child allocator[op:2:12:2:ExternalSort] on parent allocator[frag:2:12]'s child list. Allocator(frag:2:12) 24000000/20000000/33638272/40029474652 (res/actual/peak/limit) child allocators: 1 Allocator(op:2:12:2:ExternalSort) 20000000/32768/29574784/29474652 (res/actual/peak/limit) child allocators: 0 ledgers: 1 ledger[191359279] allocator: op:2:12:2:ExternalSort), isOwning: true, size: 32768, references: 3, life: 13254566565832052..0, allocatorManager: [145147047, life: 13254566561809396..0] holds 3 buffers. DrillBuf[321514436], udle: [145147048 194..1217] DrillBuf[321514441], udle: [145147048 5313..14520] DrillBuf[321514438], udle: [145147048 1217..5313] reservations: 0 ledgers: 0 reservations: 0 2017-06-28 12:52:56,483 [26abfa45-7d5a-20d9-14c3-8a34a0e2018a:frag:2:12] INFO o.a.d.e.w.fragment.FragmentExecutor - 26abfa45-7d5a-20d9-14c3-8a34a0e2018a:2:12: State change requested FAILED --> FAILED 2017-06-28 12:52:56,483 [26abfa45-7d5a-20d9-14c3-8a34a0e2018a:frag:2:12] INFO o.a.d.e.w.fragment.FragmentExecutor - 26abfa45-7d5a-20d9-14c3-8a34a0e2018a:2:12: State change requested FAILED --> FINISHED 2017-06-28 12:52:56,484 [26abfa45-7d5a-20d9-14c3-8a34a0e2018a:frag:2:12] ERROR o.a.d.e.w.fragment.FragmentExecutor - SYSTEM ERROR: IllegalStateException: Allocator[op:2:12:2:ExternalSort] closed with outstanding buffers allocated (1). Allocator(op:2:12:2:ExternalSort) 20000000/32768/29574784/29474652 (res/actual/peak/limit) child allocators: 0 ledgers: 1 ledger[191359279] allocator: op:2:12:2:ExternalSort), isOwning: true, size: 32768, references: 3, life: 13254566565832052..0, allocatorManager: [145147047, life: 13254566561809396..0] holds 3 buffers. DrillBuf[321514436], udle: [145147048 194..1217] DrillBuf[321514441], udle: [145147048 5313..14520] DrillBuf[321514438], udle: [145147048 1217..5313] reservations: 0 Fragment 2:12 [Error Id: 6a419e8c-96e9-4918-a4fe-feb8aeb04c60 on qa-node190.qa.lab:31010] org.apache.drill.common.exceptions.UserException: SYSTEM ERROR: IllegalStateException: Allocator[op:2:12:2:ExternalSort] closed with outstanding buffers allocated (1). Allocator(op:2:12:2:ExternalSort) 20000000/32768/29574784/29474652 (res/actual/peak/limit) child allocators: 0 ledgers: 1 ledger[191359279] allocator: op:2:12:2:ExternalSort), isOwning: true, size: 32768, references: 3, life: 13254566565832052..0, allocatorManager: [145147047, life: 13254566561809396..0] holds 3 buffers. DrillBuf[321514436], udle: [145147048 194..1217] DrillBuf[321514441], udle: [145147048 5313..14520] DrillBuf[321514438], udle: [145147048 1217..5313] reservations: 0 Fragment 2:12 [Error Id: 6a419e8c-96e9-4918-a4fe-feb8aeb04c60 on qa-node190.qa.lab:31010] at org.apache.drill.common.exceptions.UserException$Builder.build(UserException.java:550) ~[drill-common-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor.sendFinalState(FragmentExecutor.java:295) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor.cleanup(FragmentExecutor.java:160) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:264) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.common.SelfCleaningRunnable.run(SelfCleaningRunnable.java:38) [drill-common-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) [na:1.7.0_111] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) [na:1.7.0_111] at java.lang.Thread.run(Thread.java:745) [na:1.7.0_111] Caused by: java.lang.IllegalStateException: Allocator[op:2:12:2:ExternalSort] closed with outstanding buffers allocated (1). Allocator(op:2:12:2:ExternalSort) 20000000/32768/29574784/29474652 (res/actual/peak/limit) child allocators: 0 ledgers: 1 ledger[191359279] allocator: op:2:12:2:ExternalSort), isOwning: true, size: 32768, references: 3, life: 13254566565832052..0, allocatorManager: [145147047, life: 13254566561809396..0] holds 3 buffers. DrillBuf[321514436], udle: [145147048 194..1217] DrillBuf[321514441], udle: [145147048 5313..14520] DrillBuf[321514438], udle: [145147048 1217..5313] reservations: 0 at org.apache.drill.exec.memory.BaseAllocator.close(BaseAllocator.java:504) ~[drill-memory-base-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.ops.AbstractOperatorExecContext.close(AbstractOperatorExecContext.java:86) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.ops.OperatorContextImpl.close(OperatorContextImpl.java:108) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.ops.FragmentContext.suppressingClose(FragmentContext.java:436) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.ops.FragmentContext.close(FragmentContext.java:425) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor.closeOutResources(FragmentExecutor.java:320) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor.cleanup(FragmentExecutor.java:155) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] ... 5 common frames omitted 2017-06-28 12:52:56,484 [drill-executor-374] DEBUG o.a.d.exec.rpc.control.WorkEventBus - Removing fragment manager: 26abfa45-7d5a-20d9-14c3-8a34a0e2018a:2:12 2017-06-28 12:52:56,484 [drill-executor-374] WARN o.a.d.exec.rpc.control.WorkEventBus - Fragment 26abfa45-7d5a-20d9-14c3-8a34a0e2018a:2:12 not found in the work bus. 2017-06-28 12:52:56,486 [BitServer-4] WARN o.a.d.exec.rpc.control.WorkEventBus - A fragment message arrived but there was no registered listener for that message: profile { state: FAILED error { error_id: "6a419e8c-96e9-4918-a4fe-feb8aeb04c60" endpoint { address: "qa-node190.qa.lab" user_port: 31010 control_port: 31011 data_port: 31012 version: "1.11.0-SNAPSHOT" } error_type: SYSTEM message: "SYSTEM ERROR: IllegalStateException: Allocator[op:2:12:2:ExternalSort] closed with outstanding buffers allocated (1).\nAllocator(op:2:12:2:ExternalSort) 20000000/32768/29574784/29474652 (res/actual/peak/limit)\n child allocators: 0\n ledgers: 1\n ledger[191359279] allocator: op:2:12:2:ExternalSort), isOwning: true, size: 32768, references: 3, life: 13254566565832052..0, allocatorManager: [145147047, life: 13254566561809396..0] holds 3 buffers. \n DrillBuf[321514436], udle: [145147048 194..1217]\n DrillBuf[321514441], udle: [145147048 5313..14520]\n DrillBuf[321514438], udle: [145147048 1217..5313]\n reservations: 0\n\n\nFragment 2:12\n\n[Error Id: 6a419e8c-96e9-4918-a4fe-feb8aeb04c60 on qa-node190.qa.lab:31010]" exception { exception_class: "java.lang.IllegalStateException" message: "Allocator[op:2:12:2:ExternalSort] closed with outstanding buffers allocated (1).\nAllocator(op:2:12:2:ExternalSort) 20000000/32768/29574784/29474652 (res/actual/peak/limit)\n child allocators: 0\n ledgers: 1\n ledger[191359279] allocator: op:2:12:2:ExternalSort), isOwning: true, size: 32768, references: 3, life: 13254566565832052..0, allocatorManager: [145147047, life: 13254566561809396..0] holds 3 buffers. \n DrillBuf[321514436], udle: [145147048 194..1217]\n DrillBuf[321514441], udle: [145147048 5313..14520]\n DrillBuf[321514438], udle: [145147048 1217..5313]\n reservations: 0\n" stack_trace { class_name: "org.apache.drill.exec.memory.BaseAllocator" file_name: "BaseAllocator.java" line_number: 504 method_name: "close" is_native_method: false } ...... ...... {code} was (Author: rkins): One more scenario where I hit this error on a single node drill cluster. Even in this case the dataset is too large to attach here {code} alter session set `planner.memory.max_query_memory_per_node` = 3507483648; select * from ( select columns[0] col1 from dfs.`/drill/testdata/hash-agg/seq.tbl` order by columns[0] ) d1 inner join ( select distinct columns[0] col1 from dfs.`/drill/testdata/hash-agg/uuid.tbl` union all select max(col1) max_col1 from dfs.`/drill/testdata/resource-manager/small_large_parquet` group by dir0 ) d2 on d1.col1 = d2.col1; {code} Exception {code} [Error Id: 3f41c730-8b67-48ff-96b6-15535c336626 ] at org.apache.drill.common.exceptions.UserException$Builder.build(UserException.java:550) ~[drill-common-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:244) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.common.SelfCleaningRunnable.run(SelfCleaningRunnable.java:38) [drill-common-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) [na:1.7.0_111] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) [na:1.7.0_111] at java.lang.Thread.run(Thread.java:745) [na:1.7.0_111] Caused by: org.apache.drill.exec.exception.OutOfMemoryException: org.apache.drill.exec.exception.OutOfMemoryException: Unable to allocate sv2 buffer after repeated attempts at org.apache.drill.exec.physical.impl.xsort.ExternalSortBatch.innerNext(ExternalSortBatch.java:383) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:225) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:119) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:109) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.record.AbstractSingleRecordBatch.innerNext(AbstractSingleRecordBatch.java:51) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.svremover.RemovingRecordBatch.innerNext(RemovingRecordBatch.java:93) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:225) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:105) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.SingleSenderCreator$SingleSenderRootExec.innerNext(SingleSenderCreator.java:92) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:95) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:234) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:227) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at java.security.AccessController.doPrivileged(Native Method) ~[na:1.7.0_111] at javax.security.auth.Subject.doAs(Subject.java:415) ~[na:1.7.0_111] at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595) ~[hadoop-common-2.7.0-mapr-1607.jar:na] at org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:227) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] ... 4 common frames omitted Caused by: org.apache.drill.exec.exception.OutOfMemoryException: Unable to allocate sv2 buffer after repeated attempts at org.apache.drill.exec.physical.impl.xsort.ExternalSortBatch.newSV2(ExternalSortBatch.java:662) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.physical.impl.xsort.ExternalSortBatch.innerNext(ExternalSortBatch.java:379) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] ... 21 common frames omitted {code} For this case there seems to be a memory accounting issue as well {code} 2017-06-28 12:52:56,483 [26abfa45-7d5a-20d9-14c3-8a34a0e2018a:frag:2:12] WARN o.a.drill.exec.memory.BaseAllocator - Closed child allocator[op:2:12:2:ExternalSort] on parent allocator[frag:2:12]'s child list. Allocator(frag:2:12) 24000000/20000000/33638272/40029474652 (res/actual/peak/limit) child allocators: 1 Allocator(op:2:12:2:ExternalSort) 20000000/32768/29574784/29474652 (res/actual/peak/limit) child allocators: 0 ledgers: 1 ledger[191359279] allocator: op:2:12:2:ExternalSort), isOwning: true, size: 32768, references: 3, life: 13254566565832052..0, allocatorManager: [145147047, life: 13254566561809396..0] holds 3 buffers. DrillBuf[321514436], udle: [145147048 194..1217] DrillBuf[321514441], udle: [145147048 5313..14520] DrillBuf[321514438], udle: [145147048 1217..5313] reservations: 0 ledgers: 0 reservations: 0 2017-06-28 12:52:56,483 [26abfa45-7d5a-20d9-14c3-8a34a0e2018a:frag:2:12] INFO o.a.d.e.w.fragment.FragmentExecutor - 26abfa45-7d5a-20d9-14c3-8a34a0e2018a:2:12: State change requested FAILED --> FAILED 2017-06-28 12:52:56,483 [26abfa45-7d5a-20d9-14c3-8a34a0e2018a:frag:2:12] INFO o.a.d.e.w.fragment.FragmentExecutor - 26abfa45-7d5a-20d9-14c3-8a34a0e2018a:2:12: State change requested FAILED --> FINISHED 2017-06-28 12:52:56,484 [26abfa45-7d5a-20d9-14c3-8a34a0e2018a:frag:2:12] ERROR o.a.d.e.w.fragment.FragmentExecutor - SYSTEM ERROR: IllegalStateException: Allocator[op:2:12:2:ExternalSort] closed with outstanding buffers allocated (1). Allocator(op:2:12:2:ExternalSort) 20000000/32768/29574784/29474652 (res/actual/peak/limit) child allocators: 0 ledgers: 1 ledger[191359279] allocator: op:2:12:2:ExternalSort), isOwning: true, size: 32768, references: 3, life: 13254566565832052..0, allocatorManager: [145147047, life: 13254566561809396..0] holds 3 buffers. DrillBuf[321514436], udle: [145147048 194..1217] DrillBuf[321514441], udle: [145147048 5313..14520] DrillBuf[321514438], udle: [145147048 1217..5313] reservations: 0 Fragment 2:12 [Error Id: 6a419e8c-96e9-4918-a4fe-feb8aeb04c60 on qa-node190.qa.lab:31010] org.apache.drill.common.exceptions.UserException: SYSTEM ERROR: IllegalStateException: Allocator[op:2:12:2:ExternalSort] closed with outstanding buffers allocated (1). Allocator(op:2:12:2:ExternalSort) 20000000/32768/29574784/29474652 (res/actual/peak/limit) child allocators: 0 ledgers: 1 ledger[191359279] allocator: op:2:12:2:ExternalSort), isOwning: true, size: 32768, references: 3, life: 13254566565832052..0, allocatorManager: [145147047, life: 13254566561809396..0] holds 3 buffers. DrillBuf[321514436], udle: [145147048 194..1217] DrillBuf[321514441], udle: [145147048 5313..14520] DrillBuf[321514438], udle: [145147048 1217..5313] reservations: 0 Fragment 2:12 [Error Id: 6a419e8c-96e9-4918-a4fe-feb8aeb04c60 on qa-node190.qa.lab:31010] at org.apache.drill.common.exceptions.UserException$Builder.build(UserException.java:550) ~[drill-common-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor.sendFinalState(FragmentExecutor.java:295) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor.cleanup(FragmentExecutor.java:160) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:264) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.common.SelfCleaningRunnable.run(SelfCleaningRunnable.java:38) [drill-common-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) [na:1.7.0_111] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) [na:1.7.0_111] at java.lang.Thread.run(Thread.java:745) [na:1.7.0_111] Caused by: java.lang.IllegalStateException: Allocator[op:2:12:2:ExternalSort] closed with outstanding buffers allocated (1). Allocator(op:2:12:2:ExternalSort) 20000000/32768/29574784/29474652 (res/actual/peak/limit) child allocators: 0 ledgers: 1 ledger[191359279] allocator: op:2:12:2:ExternalSort), isOwning: true, size: 32768, references: 3, life: 13254566565832052..0, allocatorManager: [145147047, life: 13254566561809396..0] holds 3 buffers. DrillBuf[321514436], udle: [145147048 194..1217] DrillBuf[321514441], udle: [145147048 5313..14520] DrillBuf[321514438], udle: [145147048 1217..5313] reservations: 0 at org.apache.drill.exec.memory.BaseAllocator.close(BaseAllocator.java:504) ~[drill-memory-base-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.ops.AbstractOperatorExecContext.close(AbstractOperatorExecContext.java:86) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.ops.OperatorContextImpl.close(OperatorContextImpl.java:108) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.ops.FragmentContext.suppressingClose(FragmentContext.java:436) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.ops.FragmentContext.close(FragmentContext.java:425) ~[drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor.closeOutResources(FragmentExecutor.java:320) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] at org.apache.drill.exec.work.fragment.FragmentExecutor.cleanup(FragmentExecutor.java:155) [drill-java-exec-1.11.0-SNAPSHOT.jar:1.11.0-SNAPSHOT] ... 5 common frames omitted 2017-06-28 12:52:56,484 [drill-executor-374] DEBUG o.a.d.exec.rpc.control.WorkEventBus - Removing fragment manager: 26abfa45-7d5a-20d9-14c3-8a34a0e2018a:2:12 2017-06-28 12:52:56,484 [drill-executor-374] WARN o.a.d.exec.rpc.control.WorkEventBus - Fragment 26abfa45-7d5a-20d9-14c3-8a34a0e2018a:2:12 not found in the work bus. 2017-06-28 12:52:56,486 [BitServer-4] WARN o.a.d.exec.rpc.control.WorkEventBus - A fragment message arrived but there was no registered listener for that message: profile { state: FAILED error { error_id: "6a419e8c-96e9-4918-a4fe-feb8aeb04c60" endpoint { address: "qa-node190.qa.lab" user_port: 31010 control_port: 31011 data_port: 31012 version: "1.11.0-SNAPSHOT" } error_type: SYSTEM message: "SYSTEM ERROR: IllegalStateException: Allocator[op:2:12:2:ExternalSort] closed with outstanding buffers allocated (1).\nAllocator(op:2:12:2:ExternalSort) 20000000/32768/29574784/29474652 (res/actual/peak/limit)\n child allocators: 0\n ledgers: 1\n ledger[191359279] allocator: op:2:12:2:ExternalSort), isOwning: true, size: 32768, references: 3, life: 13254566565832052..0, allocatorManager: [145147047, life: 13254566561809396..0] holds 3 buffers. \n DrillBuf[321514436], udle: [145147048 194..1217]\n DrillBuf[321514441], udle: [145147048 5313..14520]\n DrillBuf[321514438], udle: [145147048 1217..5313]\n reservations: 0\n\n\nFragment 2:12\n\n[Error Id: 6a419e8c-96e9-4918-a4fe-feb8aeb04c60 on qa-node190.qa.lab:31010]" exception { exception_class: "java.lang.IllegalStateException" message: "Allocator[op:2:12:2:ExternalSort] closed with outstanding buffers allocated (1).\nAllocator(op:2:12:2:ExternalSort) 20000000/32768/29574784/29474652 (res/actual/peak/limit)\n child allocators: 0\n ledgers: 1\n ledger[191359279] allocator: op:2:12:2:ExternalSort), isOwning: true, size: 32768, references: 3, life: 13254566565832052..0, allocatorManager: [145147047, life: 13254566561809396..0] holds 3 buffers. \n DrillBuf[321514436], udle: [145147048 194..1217]\n DrillBuf[321514441], udle: [145147048 5313..14520]\n DrillBuf[321514438], udle: [145147048 1217..5313]\n reservations: 0\n" stack_trace { class_name: "org.apache.drill.exec.memory.BaseAllocator" file_name: "BaseAllocator.java" line_number: 504 method_name: "close" is_native_method: false } ...... ...... {code} > External sort fails with OOM error (Fails to allocate sv2) > ---------------------------------------------------------- > > Key: DRILL-5253 > URL: https://issues.apache.org/jira/browse/DRILL-5253 > Project: Apache Drill > Issue Type: Sub-task > Components: Execution - Relational Operators > Affects Versions: 1.10.0 > Reporter: Rahul Challapalli > Assignee: Paul Rogers > Attachments: 2762f36d-a2e7-5582-922d-3c4626be18c0.sys.drill > > > git.commit.id.abbrev=2af709f > The data set used in the below query has the same value for every column in > every row. The query fails with an OOM as it exceeds the allocated memory > {code} > alter session set `planner.width.max_per_node` = 1; > alter session set `planner.memory.max_query_memory_per_node` = 104857600; > select count(*) from (select * from identical order by col1, col2, col3, > col4, col5, col6, col7, col8, col9, col10); > Error: RESOURCE ERROR: One or more nodes ran out of memory while executing > the query. > org.apache.drill.exec.exception.OutOfMemoryException: Unable to allocate sv2 > buffer after repeated attempts > Fragment 2:0 > [Error Id: aed43fa1-fd8b-4440-9426-0f35d055aabb on qa-node190.qa.lab:31010] > (state=,code=0) > {code} > Exception from the logs > {code} > org.apache.drill.common.exceptions.UserException: RESOURCE ERROR: One or more > nodes ran out of memory while executing the query. > org.apache.drill.exec.exception.OutOfMemoryException: Unable to allocate sv2 > buffer after repeated attempts > [Error Id: aed43fa1-fd8b-4440-9426-0f35d055aabb ] > at > org.apache.drill.common.exceptions.UserException$Builder.build(UserException.java:544) > ~[drill-common-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:242) > [drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.common.SelfCleaningRunnable.run(SelfCleaningRunnable.java:38) > [drill-common-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > [na:1.7.0_111] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > [na:1.7.0_111] > at java.lang.Thread.run(Thread.java:745) [na:1.7.0_111] > Caused by: org.apache.drill.exec.exception.OutOfMemoryException: > org.apache.drill.exec.exception.OutOfMemoryException: Unable to allocate sv2 > buffer after repeated attempts > at > org.apache.drill.exec.physical.impl.xsort.ExternalSortBatch.innerNext(ExternalSortBatch.java:371) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:215) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:119) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:109) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.record.AbstractSingleRecordBatch.innerNext(AbstractSingleRecordBatch.java:51) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.physical.impl.svremover.RemovingRecordBatch.innerNext(RemovingRecordBatch.java:93) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:215) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:104) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.physical.impl.SingleSenderCreator$SingleSenderRootExec.innerNext(SingleSenderCreator.java:92) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:94) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:232) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:226) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at java.security.AccessController.doPrivileged(Native Method) > ~[na:1.7.0_111] > at javax.security.auth.Subject.doAs(Subject.java:415) ~[na:1.7.0_111] > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595) > ~[hadoop-common-2.7.0-mapr-1607.jar:na] > at > org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:226) > [drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > ... 4 common frames omitted > Caused by: org.apache.drill.exec.exception.OutOfMemoryException: Unable to > allocate sv2 buffer after repeated attempts > at > org.apache.drill.exec.physical.impl.xsort.ExternalSortBatch.newSV2(ExternalSortBatch.java:650) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > at > org.apache.drill.exec.physical.impl.xsort.ExternalSortBatch.innerNext(ExternalSortBatch.java:367) > ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT] > ... 21 common frames omitted > {code} > The data set and profile are attached -- This message was sent by Atlassian JIRA (v6.4.14#64029)