[ https://issues.apache.org/jira/browse/TEZ-3284?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Jonathan Eagles updated TEZ-3284: --------------------------------- Attachment: TEZ-3284.3.patch > Synchronization for every write in UnorderdKVWriter > --------------------------------------------------- > > Key: TEZ-3284 > URL: https://issues.apache.org/jira/browse/TEZ-3284 > Project: Apache Tez > Issue Type: Bug > Affects Versions: 0.9.0 > Reporter: Gopal V > Assignee: Jonathan Eagles > Priority: Critical > Labels: Performance > Attachments: TEZ-3284.1.patch, TEZ-3284.2.patch, TEZ-3284.3.patch > > > {code} > baos = new ByteArrayOutputStream(); > dos = new DataOutputStream(baos); > keySerializer.open(dos); > valSerializer.open(dos); > {code} > This is a known performance issue as documented in HADOOP-10694 > Both ByteArrayOutputStream::write() and DataOutputStream::write() have lock > prefix calls in them, because they are object synchronized methods. > Recommended solution is to replicate the Hive NonSync impls similar to > HADOOP-10694 > {code} > TezTaskRunner [RUNNABLE] > *** java.io.DataOutputStream.write(byte[], int, int) DataOutputStream.java:107 > org.apache.tez.runtime.library.common.serializer.TezBytesWritableSerialization$TezBytesWritableSerializer.serialize(Writable) > TezBytesWritableSerialization.java:123 > org.apache.tez.runtime.library.common.serializer.TezBytesWritableSerialization$TezBytesWritableSerializer.serialize(Object) > TezBytesWritableSerialization.java:110 > org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.write(Object, > Object, int) UnorderedPartitionedKVWriter.java:295 > org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.write(Object, > Object) UnorderedPartitionedKVWriter.java:257 > org.apache.hadoop.hive.ql.exec.tez.TezProcessor$TezKVOutputCollector.collect(Object, > Object) TezProcessor.java:232 > org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator.collect(BytesWritable, > Writable) VectorReduceSinkCommonOperator.java:432 > org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator.process(Object, > int) VectorReduceSinkCommonOperator.java:397 > org.apache.hadoop.hive.ql.exec.Operator.forward(Object, ObjectInspector) > Operator.java:837 > org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(Object, > int) VectorSelectOperator.java:144 > org.apache.hadoop.hive.ql.exec.Operator.forward(Object, ObjectInspector) > Operator.java:837 > org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator.process(Object, > int) VectorFilterOperator.java:121 > org.apache.hadoop.hive.ql.exec.Operator.forward(Object, ObjectInspector) > Operator.java:837 > org.apache.hadoop.hive.ql.exec.TableScanOperator.process(Object, int) > TableScanOperator.java:130 > org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(Writable) > VectorMapOperator.java:796 > org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(Object) > MapRecordSource.java:86 > org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord() > MapRecordSource.java:70 > org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run() > MapRecordProcessor.java:361 > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(Map, > Map) TezProcessor.java:172 > org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(Map, Map) > TezProcessor.java:160 > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run() > LogicalIOProcessorRuntimeTask.java:370 > org.apache.tez.runtime.task.TaskRunner2Callable$1.run() > TaskRunner2Callable.java:73 > org.apache.tez.runtime.task.TaskRunner2Callable$1.run() > TaskRunner2Callable.java:61 > java.security.AccessController.doPrivileged(PrivilegedExceptionAction, > AccessControlContext) AccessController.java (native) > javax.security.auth.Subject.doAs(Subject, PrivilegedExceptionAction) > Subject.java:422 > org.apache.hadoop.security.UserGroupInformation.doAs(PrivilegedExceptionAction) > UserGroupInformation.java:1657 > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal() > TaskRunner2Callable.java:61 > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal() > TaskRunner2Callable.java:37 > org.apache.tez.common.CallableWithNdc.call() CallableWithNdc.java:36 > java.util.concurrent.FutureTask.run() FutureTask.java:266 > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor$Worker) > ThreadPoolExecutor.java:1142 > java.util.concurrent.ThreadPoolExecutor$Worker.run() > ThreadPoolExecutor.java:617 > java.lang.Thread.run() Thread.java:745 > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)