Author: dogacan Date: Wed Jul 11 08:30:29 2007 New Revision: 555307 URL: http://svn.apache.org/viewvc?view=rev&rev=555307 Log: NUTCH-510 - IndexMerger delete working dir. Contributed by Enis.
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=555307&r1=555306&r2=555307 ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Wed Jul 11 08:30:29 2007 @@ -83,6 +83,8 @@ 27. NUTCH-505 - Outlink urls should be validated. (dogacan) +28. NUTCH-510 - IndexMerger delete working dir. (Enis Soztutar via dogacan) + Release 0.9 - 2007-04-02 1. Changed log4j confiquration to log to stdout on commandline Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java?view=diff&rev=555307&r1=555306&r2=555307 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java Wed Jul 11 08:30:29 2007 @@ -24,6 +24,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.*; +import org.apache.hadoop.mapred.FileAlreadyExistsException; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.ToolBase; import org.apache.hadoop.conf.*; @@ -58,13 +59,9 @@ * Merge all input indexes to the single output index */ public void merge(Path[] indexes, Path outputIndex, Path localWorkingDir) throws IOException { - if (LOG.isInfoEnabled()) { - LOG.info("merging indexes to: " + outputIndex); - } - FileSystem localFs = FileSystem.getLocal(getConf()); - if (localWorkingDir == null) { - localWorkingDir = new Path("indexmerger-" + System.currentTimeMillis()); - } + LOG.info("merging indexes to: " + outputIndex); + + FileSystem localFs = FileSystem.getLocal(getConf()); if (localFs.exists(localWorkingDir)) { localFs.delete(localWorkingDir); } @@ -73,6 +70,10 @@ // Get local output target // FileSystem fs = FileSystem.get(getConf()); + if (fs.exists(outputIndex)) { + throw new FileAlreadyExistsException("Output directory " + outputIndex + " already exists!"); + } + Path tmpLocalOutput = new Path(localWorkingDir, "merge-output"); Path localOutput = fs.startLocalOutput(outputIndex, tmpLocalOutput); @@ -83,8 +84,6 @@ } // - - // // Merge indices // IndexWriter writer = new IndexWriter(localOutput.toString(), null, true); @@ -102,8 +101,7 @@ // Put target back // fs.completeLocalOutput(outputIndex, tmpLocalOutput); - FileSystem.getLocal(conf).delete(localWorkingDir); - if (LOG.isInfoEnabled()) { LOG.info("done merging"); } + LOG.info("done merging"); } /** @@ -125,9 +123,9 @@ // Parse args, read all index directories to be processed // FileSystem fs = FileSystem.get(conf); - List indexDirs = new ArrayList(); + List<Path> indexDirs = new ArrayList<Path>(); - Path workDir = null; + Path workDir = new Path("indexmerger-" + System.currentTimeMillis()); int i = 0; if ("-workingdir".equals(args[i])) { i++; @@ -152,6 +150,8 @@ } catch (Exception e) { LOG.fatal("IndexMerger: " + StringUtils.stringifyException(e)); return -1; + } finally { + FileSystem.getLocal(conf).delete(workDir); } } } ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs