Author: dogacan
Date: Wed Jul 11 08:30:29 2007
New Revision: 555307

URL: http://svn.apache.org/viewvc?view=rev&rev=555307
Log:
NUTCH-510 - IndexMerger delete working dir. Contributed by Enis.

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=555307&r1=555306&r2=555307
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Wed Jul 11 08:30:29 2007
@@ -83,6 +83,8 @@
 
 27. NUTCH-505 - Outlink urls should be validated. (dogacan)
 
+28. NUTCH-510 - IndexMerger delete working dir. (Enis Soztutar via dogacan)
+
 Release 0.9 - 2007-04-02
 
  1. Changed log4j confiquration to log to stdout on commandline

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java?view=diff&rev=555307&r1=555306&r2=555307
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java Wed 
Jul 11 08:30:29 2007
@@ -24,6 +24,7 @@
 import org.apache.commons.logging.LogFactory;
 
 import org.apache.hadoop.fs.*;
+import org.apache.hadoop.mapred.FileAlreadyExistsException;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.ToolBase;
 import org.apache.hadoop.conf.*;
@@ -58,13 +59,9 @@
    * Merge all input indexes to the single output index
    */
   public void merge(Path[] indexes, Path outputIndex, Path localWorkingDir) 
throws IOException {
-    if (LOG.isInfoEnabled()) {
-      LOG.info("merging indexes to: " + outputIndex);
-    }
-    FileSystem localFs = FileSystem.getLocal(getConf());
-    if (localWorkingDir == null) {
-      localWorkingDir = new Path("indexmerger-" + System.currentTimeMillis());
-    }
+    LOG.info("merging indexes to: " + outputIndex);
+
+    FileSystem localFs = FileSystem.getLocal(getConf());  
     if (localFs.exists(localWorkingDir)) {
       localFs.delete(localWorkingDir);
     }
@@ -73,6 +70,10 @@
     // Get local output target
     //
     FileSystem fs = FileSystem.get(getConf());
+    if (fs.exists(outputIndex)) {
+      throw new FileAlreadyExistsException("Output directory " + outputIndex + 
" already exists!");
+    }
+
     Path tmpLocalOutput = new Path(localWorkingDir, "merge-output");
     Path localOutput = fs.startLocalOutput(outputIndex, tmpLocalOutput);
 
@@ -83,8 +84,6 @@
     }
 
     //
-
-    //
     // Merge indices
     //
     IndexWriter writer = new IndexWriter(localOutput.toString(), null, true);
@@ -102,8 +101,7 @@
     // Put target back
     //
     fs.completeLocalOutput(outputIndex, tmpLocalOutput);
-    FileSystem.getLocal(conf).delete(localWorkingDir);
-    if (LOG.isInfoEnabled()) { LOG.info("done merging"); }
+    LOG.info("done merging");
   }
 
   /** 
@@ -125,9 +123,9 @@
     // Parse args, read all index directories to be processed
     //
     FileSystem fs = FileSystem.get(conf);
-    List indexDirs = new ArrayList();
+    List<Path> indexDirs = new ArrayList<Path>();
 
-    Path workDir = null;
+    Path workDir = new Path("indexmerger-" + System.currentTimeMillis());  
     int i = 0;
     if ("-workingdir".equals(args[i])) {
       i++;
@@ -152,6 +150,8 @@
     } catch (Exception e) {
       LOG.fatal("IndexMerger: " + StringUtils.stringifyException(e));
       return -1;
+    } finally {
+      FileSystem.getLocal(conf).delete(workDir);
     }
   }
 }



-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Nutch-cvs mailing list
Nutch-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

Reply via email to