[
https://issues.apache.org/jira/browse/MAHOUT-646?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13015071#comment-13015071
]
Robin Anil commented on MAHOUT-646:
-----------------------------------
Unfortunately Bayes classifier reads from a text input format. And I wanted to
split the output of different categories into multiple files for the wikipedia
example, which btw is not necessary for Bayes, it reads of all the files
anyways. Dropping it wouldnt create any problems. Just have to update all
tutorials and references which mention that fact.
> Cannot run Wikipedia example on Amazon Elastic MapReduce (EMR)
> --------------------------------------------------------------
>
> Key: MAHOUT-646
> URL: https://issues.apache.org/jira/browse/MAHOUT-646
> Project: Mahout
> Issue Type: Bug
> Components: Classification
> Affects Versions: 0.5
> Reporter: Martin Provencher
> Priority: Minor
>
> When I tried to run the Wikipedia example on EMR with all the categories
> existing in the Wikipedia dump, I got this error :
> org.apache.hadoop.ipc.RemoteException:
> org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException: failed to
> create file
> /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000
> for DFSClient_attempt_201103292134_0010_r_000000_0 on client 10.240.10.157
> because current leaseholder is trying to recreate file.
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1045)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
> at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
> at java.lang.reflect.Method.invoke(Method.java:597)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:396)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
> at org.apache.hadoop.ipc.Client.call(Client.java:740)
> at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
> at $Proxy1.create(Unknown Source)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
> at java.lang.reflect.Method.invoke(Method.java:597)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
> at $Proxy1.create(Unknown Source)
> at
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
> at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
> at
> org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
> at
> org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
> at
> org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
> at
> org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
> at
> org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
> at
> org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
> at
> org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
> at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
> at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
> at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
> at org.apache.hadoop.mapred.Child.main(Child.java:170)
> org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to create
> file
> /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000
> on client 10.240.10.157 either because the filename is invalid or the file
> exists
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:1092)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:981)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNode.create(NameNode.java:377)
> at sun.reflect.GeneratedMethodAccessor7.invoke(Unknown Source)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
> at java.lang.reflect.Method.invoke(Method.java:597)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:508)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:961)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:957)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:396)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:955)
> at org.apache.hadoop.ipc.Client.call(Client.java:740)
> at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:220)
> at $Proxy1.create(Unknown Source)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
> at java.lang.reflect.Method.invoke(Method.java:597)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
> at $Proxy1.create(Unknown Source)
> at
> org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.<init>(DFSClient.java:2709)
> at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:491)
> at
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:195)
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:524)
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:505)
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:412)
> at
> org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:128)
> at
> org.apache.mahout.classifier.bayes.MultipleTextOutputFormat.getBaseRecordWriter(MultipleTextOutputFormat.java:41)
> at
> org.apache.mahout.classifier.bayes.MultipleOutputFormat$1.write(MultipleOutputFormat.java:81)
> at
> org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:517)
> at
> org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
> at
> org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:35)
> at
> org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorReducer.reduce(WikipediaDatasetCreatorReducer.java:28)
> at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
> at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:575)
> at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:412)
> at org.apache.hadoop.mapred.Child.main(Child.java:170)
> 4 more :
> org.apache.hadoop.ipc.RemoteException: java.io.IOException: failed to
> create file
> /yatter.tagger/wikipedia/input/_temporary/_attempt__0000_r_000000_0/part-r-00000
> on client 10.240.10.157 either because the filename is invalid or the file
> exists
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira