[
https://issues.apache.org/jira/browse/TEZ-3793?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Ayush Saxena resolved TEZ-3793.
-------------------------------
Resolution: Cannot Reproduce
> Consider reducing the number of times "DiskChecker.doDiskIo" needs to be
> invoked for writing to local folders
> -------------------------------------------------------------------------------------------------------------
>
> Key: TEZ-3793
> URL: https://issues.apache.org/jira/browse/TEZ-3793
> Project: Apache Tez
> Issue Type: Improvement
> Reporter: Rajesh Balamohan
> Priority: Major
>
> For large jobs, {{LocalDirAllocator}} comes up as bottleneck fairly often as
> it tries to do minimal write operation. It would be good to consider reducing
> the number of times directories are checked for errors/issues.
> Some sample stack traces are given below
> {noformat}
> at java.io.FileDescriptor.sync(Native Method)
> at
> org.apache.hadoop.util.DiskChecker.diskIoCheckWithoutNativeIo(DiskChecker.java:249)
> at org.apache.hadoop.util.DiskChecker.doDiskIo(DiskChecker.java:220)
> at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:82)
> at
> org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createPath(LocalDirAllocator.java:350)
> at
> org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:424)
> at
> org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:151)
> at
> org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:132)
> at
> org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles.getOutputFileForWrite(TezTaskOutputFiles.java:91)
> at
> org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.getSpillPathDetails(UnorderedPartitionedKVWriter.java:721)
> at
> org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.mergeAll(UnorderedPartitionedKVWriter.java:748)
> at
> org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.close(UnorderedPartitionedKVWriter.java:545)
> at
> org.apache.tez.runtime.library.output.UnorderedPartitionedKVOutput.close(UnorderedPartitionedKVOutput.java:105)
> - locked <0x00007f4823d5dc88> (a
> org.apache.tez.runtime.library.output.UnorderedPartitionedKVOutput)
> at
> org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.close(LogicalIOProcessorRuntimeTask.java:393)
> at
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:83)
> at
> org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1866)
> at
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
> at
> org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
> at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
> at
> org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool$WrappedCallable.call(StatsRecordingThreadPool.java:110)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:748)
>
> at java.io.FileDescriptor.sync(Native Method)
> at
> org.apache.hadoop.util.DiskChecker.diskIoCheckWithoutNativeIo(DiskChecker.java:249)
> at org.apache.hadoop.util.DiskChecker.doDiskIo(DiskChecker.java:220)
> at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:82)
> at
> org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createPath(LocalDirAllocator.java:350)
> at
> org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:424)
> at
> org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:151)
> at
> org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:132)
> at
> org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles.getInputFileForWrite(TezTaskOutputFiles.java:250)
> at
> org.apache.tez.runtime.library.common.shuffle.DiskFetchedInput.<init>(DiskFetchedInput.java:52)
> at
> org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator.allocate(SimpleFetchedInputAllocator.java:140)
> - locked <0x00007f4891288858> (a
> org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator)
> at
> org.apache.tez.runtime.library.common.shuffle.Fetcher.fetchInputs(Fetcher.java:793)
> at
> org.apache.tez.runtime.library.common.shuffle.Fetcher.doHttpFetch(Fetcher.java:539)
> at
> org.apache.tez.runtime.library.common.shuffle.Fetcher.doHttpFetch(Fetcher.java:428)
> at
> org.apache.tez.runtime.library.common.shuffle.Fetcher.callInternal(Fetcher.java:226)
> at
> org.apache.tez.runtime.library.common.shuffle.Fetcher.callInternal(Fetcher.java:73)
> at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:748)
> at java.io.FileDescriptor.sync(Native Method)
> at
> org.apache.hadoop.util.DiskChecker.diskIoCheckWithoutNativeIo(DiskChecker.java:249)
> at org.apache.hadoop.util.DiskChecker.doDiskIo(DiskChecker.java:220)
> at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:82)
> at
> org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createPath(LocalDirAllocator.java:350)
> at
> org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:424)
> at
> org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:151)
> at
> org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:132)
> at
> org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles.getSpillFileForWrite(TezTaskOutputFiles.java:207)
> at
> org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.getSpillPathDetails(UnorderedPartitionedKVWriter.java:728)
> at
> org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.setupNextBuffer(UnorderedPartitionedKVWriter.java:356)
> at
> org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.write(UnorderedPartitionedKVWriter.java:299)
> at
> org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.write(UnorderedPartitionedKVWriter.java:269)
> at
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor$TezKVOutputCollector.collect(TezProcessor.java:260)
> at
> org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator.collect(VectorReduceSinkCommonOperator.java:432)
> at
> org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator.process(VectorReduceSinkCommonOperator.java:397)
> at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:145)
> at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator.process(VectorFilterOperator.java:123)
> at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897)
> at
> org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:783)
> at
> org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:86)
> {noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)