[
https://issues.apache.org/jira/browse/HCATALOG-448?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13414278#comment-13414278
]
Rajesh Balamohan commented on HCATALOG-448:
-------------------------------------------
Attaching some of the thread details when HCatStorer (0.4) was executing
without patch. This is to add more insight into why HCatStorer in 0.4 was
running slow.
"main" prio=10 tid=0x000000004a7c1800 nid=0x6b7 runnable [0x0000000041e80000]
java.lang.Thread.State: RUNNABLE
at java.util.regex.Matcher.search(Matcher.java:1105)
at java.util.regex.Matcher.find(Matcher.java:535)
at
org.apache.hadoop.conf.Configuration.substituteVars(Configuration.java:379)
at org.apache.hadoop.conf.Configuration.get(Configuration.java:415)
at
org.apache.hcatalog.mapreduce.HCatBaseOutputFormat.getJobInfo(HCatBaseOutputFormat.java:92)
at
org.apache.hcatalog.mapreduce.FileRecordWriterContainer.write(FileRecordWriterContainer.java:236)
at
org.apache.hcatalog.mapreduce.FileRecordWriterContainer.write(FileRecordWriterContainer.java:52)
at
org.apache.hcatalog.pig.HCatBaseStorer.putNext(HCatBaseStorer.java:235)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:139)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:98)
at
org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.write(MapTask.java:531)
at
org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map.collect(PigMapOnly.java:48)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:248)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:64)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
java.lang.Thread.State: RUNNABLE
at java.lang.String.intern(Native Method)
at java.io.ObjectStreamField.<init>(ObjectStreamField.java:87)
at java.io.ObjectStreamClass.readNonProxy(ObjectStreamClass.java:653)
at
java.io.ObjectInputStream.readClassDescriptor(ObjectInputStream.java:808)
at
java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1564)
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1495)
at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1731)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1328)
at
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1946)
at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1870)
at
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1752)
at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1328)
at java.io.ObjectInputStream.readObject(ObjectInputStream.java:350)
at org.apache.hcatalog.common.HCatUtil.deserialize(HCatUtil.java:115)
at
org.apache.hcatalog.mapreduce.HCatBaseOutputFormat.getJobInfo(HCatBaseOutputFormat.java:97)
at
org.apache.hcatalog.mapreduce.FileRecordWriterContainer.write(FileRecordWriterContainer.java:236)
at
org.apache.hcatalog.mapreduce.FileRecordWriterContainer.write(FileRecordWriterContainer.java:52)
at
org.apache.hcatalog.pig.HCatBaseStorer.putNext(HCatBaseStorer.java:235)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:139)
"main" prio=10 tid=0x000000004a7c1800 nid=0x6b7 runnable [0x0000000041e80000]
java.lang.Thread.State: RUNNABLE
at org.apache.hadoop.conf.Configuration.<init>(Configuration.java:250)
- locked <0x0000000219608580> (a org.apache.hadoop.mapred.JobConf)
at org.apache.hadoop.mapred.JobConf.<init>(JobConf.java:349)
at
org.apache.hadoop.mapred.HCatMapRedUtil.createTaskAttemptContext(HCatMapRedUtil.java:26)
at
org.apache.hcatalog.mapreduce.FileRecordWriterContainer.write(FileRecordWriterContainer.java:239)
at
org.apache.hcatalog.mapreduce.FileRecordWriterContainer.write(FileRecordWriterContainer.java:52)
at
org.apache.hcatalog.pig.HCatBaseStorer.putNext(HCatBaseStorer.java:235)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:139)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat$PigRecordWriter.write(PigOutputFormat.java:98)
at
org.apache.hadoop.mapred.MapTask$NewDirectOutputCollector.write(MapTask.java:531)
at
org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapOnly$Map.collect(PigMapOnly.java:48)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:248)
at
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapBase.map(PigGenericMapBase.java:64)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
> HCatStorer performance is 4x slower in HCat 0.4 than HCat 0.2
> -------------------------------------------------------------
>
> Key: HCATALOG-448
> URL: https://issues.apache.org/jira/browse/HCATALOG-448
> Project: HCatalog
> Issue Type: Bug
> Affects Versions: 0.4.1
> Reporter: Rohini Palaniswamy
> Assignee: Mithun Radhakrishnan
> Priority: Critical
> Attachments: hcatalog-448-for-0.4-codebase.patch
>
>
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators:
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira