Hi Patricio, Try this
bin/nutch mergesegs /user/nutch/crawl_al/MERGEDsegments -dir /user/nutch/my_crawl/segments/* -filter -slice 50000 or bin/nutch mergesegs /user/nutch/crawl_al/MERGEDsegments -dir /user/nutch/my_crawl/segments/seg1 /user/nutch/my_crawl/segments/seg2 /user/nutch/my_crawl/segments/seg3, etc -filter -slice 50000 If this works then we need to edit the wiki to accomodate the '/*' which is required to refer to ALL segments in any given directory. HTH On Fri, Oct 28, 2011 at 11:09 PM, Patricio Galeas <[email protected]> wrote: > Hello, > > when I try to merge segment using ... > > bin/nutch mergesegs /user/nutch/crawl_al/MERGEDsegments -dir > /user/nutch/my_crawl/segments -filter -slice 50000 > > > .... I get the following error. What I'm doing wrong? > > Thanks > Patricio > > java.io.EOFException > at java.io.DataInputStream.readByte(DataInputStream.java:250) > at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298) > at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319) > at org.apache.hadoop.io.Text.readString(Text.java:400) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288) > > java.io.EOFException > at java.io.DataInputStream.readByte(DataInputStream.java:250) > at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298) > at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319) > at org.apache.hadoop.io.Text.readString(Text.java:400) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288) > > java.io.EOFException > at java.io.DataInputStream.readByte(DataInputStream.java:250) > at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298) > at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319) > at org.apache.hadoop.io.Text.readString(Text.java:400) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288) > > attempt_201110152026_0003_r_000001_1: log4j:WARN No appenders could be > found for logger (org.apache.hadoop.hdfs.DFSClient). > attempt_201110152026_0003_r_000001_1: log4j:WARN Please initialize the > log4j system properly. > java.io.EOFException > at java.io.DataInputStream.readByte(DataInputStream.java:250) > at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298) > at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319) > at org.apache.hadoop.io.Text.readString(Text.java:400) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288) > > attempt_201110152026_0003_r_000000_1: log4j:WARN No appenders could be > found for logger (org.apache.hadoop.hdfs.DFSClient). > attempt_201110152026_0003_r_000000_1: log4j:WARN Please initialize the > log4j system properly. > java.io.EOFException > at java.io.DataInputStream.readByte(DataInputStream.java:250) > at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298) > at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319) > at org.apache.hadoop.io.Text.readString(Text.java:400) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288) > > java.io.EOFException > at java.io.DataInputStream.readByte(DataInputStream.java:250) > at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298) > at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319) > at org.apache.hadoop.io.Text.readString(Text.java:400) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.createBlockOutputStream(DFSClient.java:2901) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient.java:2826) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream.access$2000(DFSClient.java:2102) > at > org.apache.hadoop.hdfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:2288) > > Exception in thread "main" java.io.IOException: Job failed! > at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1252) > at org.apache.nutch.segment.SegmentMerger.merge(SegmentMerger.java:638) > at org.apache.nutch.segment.SegmentMerger.main(SegmentMerger.java:683 > -- *Lewis*

