Author: hairong
Date: Fri Jul 30 20:52:08 2010
New Revision: 980953

URL: http://svn.apache.org/viewvc?rev=980953&view=rev
Log:
HADOOP-6890. Improve listFiles API introduced by HADOOP-6870. Contributed by 
Hairong Kuang.

Modified:
    hadoop/common/trunk/CHANGES.txt
    hadoop/common/trunk/src/java/org/apache/hadoop/fs/AbstractFileSystem.java
    hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java
    hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java
    hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestListFiles.java

Modified: hadoop/common/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/CHANGES.txt?rev=980953&r1=980952&r2=980953&view=diff
==============================================================================
--- hadoop/common/trunk/CHANGES.txt (original)
+++ hadoop/common/trunk/CHANGES.txt Fri Jul 30 20:52:08 2010
@@ -101,6 +101,7 @@ Trunk (unreleased changes)
     HADOOP-6656. Adds a thread in the UserGroupInformation to renew TGTs 
     periodically. (Owen O'Malley and ddas via ddas)
 
+    HADOOP-6890. Improve listFiles API introduced by HADOOP-6870. (hairong)
   OPTIMIZATIONS
 
   BUG FIXES

Modified: 
hadoop/common/trunk/src/java/org/apache/hadoop/fs/AbstractFileSystem.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/fs/AbstractFileSystem.java?rev=980953&r1=980952&r2=980953&view=diff
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/fs/AbstractFileSystem.java 
(original)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/fs/AbstractFileSystem.java 
Fri Jul 30 20:52:08 2010
@@ -786,8 +786,8 @@ public abstract class AbstractFileSystem
 
   /**
    * The specification of this method matches that of
-   * {...@link FileContext#listLocatedStatus(Path)} except that Path f must be 
for this
-   * file system.
+   * {...@link FileContext#listLocatedStatus(Path)} except that Path f 
+   * must be for this file system.
    */
   protected Iterator<LocatedFileStatus> listLocatedStatus(final Path f)
       throws AccessControlException, FileNotFoundException,
@@ -795,15 +795,28 @@ public abstract class AbstractFileSystem
     return new Iterator<LocatedFileStatus>() {
       private Iterator<FileStatus> itor = listStatusIterator(f);
       
+      /**
+       *  {...@inheritdoc}
+       *  @return {...@inheritdog} 
+       *  @throws Runtimeexception if any IOException occurs during traversal;
+       *  the IOException is set as the cause of the RuntimeException
+       */
       @Override
       public boolean hasNext() {
         return itor.hasNext();
       }
       
+      /**
+       *  {...@inheritdoc}
+       *  @return {...@inheritdoc} 
+       *  @throws Runtimeexception if any IOException occurs during traversal;
+       *  the IOException is set as the cause of the RuntimeException
+       *  @exception {...@inheritdoc}
+       */
       @Override
       public LocatedFileStatus next() {
         if (!hasNext()) {
-          throw new NoSuchElementException();
+          throw new NoSuchElementException("No more entry in " + f);
         }
         FileStatus result = itor.next();
         try {

Modified: hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java?rev=980953&r1=980952&r2=980953&view=diff
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java 
(original)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java Fri Jul 
30 20:52:08 2010
@@ -1286,103 +1286,21 @@ public final class FileContext {
       }
     }.resolve(this, absF);
   }
-
-  /**
-   * List the statuses and block locations of the files in the given path 
-   * if the path is a directory.
-   * If the given path is a file, return the file's status and block locations.
-   * if recursive is true, list all file statuses and block locations in
-   * the subtree rooted at the given path.
-   * Files across symbolic links are also returned.
-   * 
-   * @param f is the path
-   * @param recursive if the subdirectories need to be traversed recursively
-   *
-   * @return an iterator that traverses statuses of the files
-   *
-   * @throws AccessControlException If access is denied
-   * @throws FileNotFoundException If <code>f</code> does not exist
-   * @throws UnsupportedFileSystemException If file system for <code>f</code> 
is
-   *           not supported
-   * @throws IOException If an I/O error occurred
-   * 
-   * Exceptions applicable to file systems accessed over RPC:
-   * @throws RpcClientException If an exception occurred in the RPC client
-   * @throws RpcServerException If an exception occurred in the RPC server
-   * @throws UnexpectedServerException If server implementation throws 
-   *           undeclared exception to RPC server
-   */
-  public Iterator<LocatedFileStatus> listFiles(
-      final Path f, final boolean recursive) throws AccessControlException,
-      FileNotFoundException, UnsupportedFileSystemException, 
-      IOException {
-    return new Iterator<LocatedFileStatus>() {
-      private Stack<Path> dirs = new Stack<Path>();
-      private Stack<Path> symLinks = new Stack<Path>();
-      Iterator<LocatedFileStatus> itor = listLocatedStatus(f);
-      LocatedFileStatus curFile;
-
-      @Override
-      public boolean hasNext() {
-        try {
-          while (curFile == null) {
-            if (itor.hasNext()) {
-              handleFileStat(itor.next());
-            } else if (!dirs.isEmpty()) {
-              Path dirPath = dirs.pop(); 
-              itor = listLocatedStatus(dirPath);
-            } else if (!symLinks.isEmpty()) {
-              Path symLink = symLinks.pop();
-              FileStatus stat = getFileStatus(symLink);
-              if (stat.isFile() || (recursive && stat.isDirectory())) {
-                itor = listLocatedStatus(stat.getPath());
-              }
-            } else {
-              return false;
-            }
-          }
-          return true;
-        } catch (IOException ioe) {
-          throw (RuntimeException)new RuntimeException().initCause(ioe);
-        }
-      }
-
-      private void handleFileStat(LocatedFileStatus stat) throws IOException {
-        if (stat.isFile()) { // file
-          curFile = stat;
-        } else if (stat.isSymlink()) { // symbolic link
-          symLinks.push(stat.getSymlink());
-        } else if (recursive) { // directory
-          dirs.push(stat.getPath());
-        }
-      }
-
-      @Override
-      public LocatedFileStatus next() {
-        if (hasNext()) {
-          LocatedFileStatus result = curFile;
-          curFile = null;
-          return result;
-        } 
-        throw new java.util.NoSuchElementException("No more entry in " + f);
-      }
-
-      @Override
-      public void remove() {
-        throw new UnsupportedOperationException("Remove is not supported");
-
-      }
-    };
-  }
   
   /**
    * List the statuses of the files/directories in the given path if the path 
is
-   * a directory. Each returned status contains a file's block locations.
+   * a directory. 
+   * Return the file's status and block locations If the path is a file.
+   * 
+   * If a returned status is a file, it contains the file's block locations.
    * 
    * @param f is the path
    *
    * @return an iterator that traverses statuses of the files/directories 
    *         in the given path
+   * If any IO exception (for example the input directory gets deleted while
+   * listing is being executed), next() or hasNext() of the returned iterator
+   * may throw a RuntimeException with the io exception as the cause.
    *
    * @throws AccessControlException If access is denied
    * @throws FileNotFoundException If <code>f</code> does not exist
@@ -1679,6 +1597,123 @@ public final class FileContext {
     }
 
     /**
+     * List the statuses and block locations of the files in the given path.
+     * 
+     * If the path is a directory, 
+     *   if recursive is false, returns files in the directory;
+     *   if recursive is true, return files in the subtree rooted at the path.
+     *   The subtree is traversed in the depth-first order.
+     * If the path is a file, return the file's status and block locations.
+     * Files across symbolic links are also returned.
+     * 
+     * @param f is the path
+     * @param recursive if the subdirectories need to be traversed recursively
+     *
+     * @return an iterator that traverses statuses of the files
+     * If any IO exception (for example a sub-directory gets deleted while
+     * listing is being executed), next() or hasNext() of the returned iterator
+     * may throw a RuntimeException with the IO exception as the cause.
+     *
+     * @throws AccessControlException If access is denied
+     * @throws FileNotFoundException If <code>f</code> does not exist
+     * @throws UnsupportedFileSystemException If file system for <code>f</code>
+     *         is not supported
+     * @throws IOException If an I/O error occurred
+     * 
+     * Exceptions applicable to file systems accessed over RPC:
+     * @throws RpcClientException If an exception occurred in the RPC client
+     * @throws RpcServerException If an exception occurred in the RPC server
+     * @throws UnexpectedServerException If server implementation throws 
+     *           undeclared exception to RPC server
+     */
+    public Iterator<LocatedFileStatus> listFiles(
+        final Path f, final boolean recursive) throws AccessControlException,
+        FileNotFoundException, UnsupportedFileSystemException, 
+        IOException {
+      return new Iterator<LocatedFileStatus>() {
+        private Stack<Iterator<LocatedFileStatus>> itors = 
+          new Stack<Iterator<LocatedFileStatus>>();
+        Iterator<LocatedFileStatus> curItor = listLocatedStatus(f);
+        LocatedFileStatus curFile;
+       
+        /**
+         *  {...@inheritdoc}
+         *  @return {...@inheritdog} 
+         *  @throws Runtimeexception if any IOException occurs during 
traversal;
+         *  the IOException is set as the cause of the RuntimeException
+         */
+        @Override
+        public boolean hasNext() {
+            while (curFile == null) {
+              if (curItor.hasNext()) {
+                handleFileStat(curItor.next());
+              } else if (!itors.empty()) {
+                curItor = itors.pop();
+              } else {
+                return false;
+              }
+            }
+            return true;
+        }
+
+        /**
+         * Process the input stat.
+         * If it is a file, return the file stat.
+         * If it is a directory, tranverse the directory if recursive is true;
+         * ignore it if recursive is false.
+         * If it is a symlink, resolve the symlink first and then process it
+         * depending on if it is a file or directory.
+         * @param stat input status
+         * @throws RuntimeException if any io error occurs; the io exception
+         * is set as the cause of RuntimeException
+         */
+        private void handleFileStat(LocatedFileStatus stat) {
+          try {
+            if (stat.isFile()) { // file
+              curFile = stat;
+            } else if (stat.isSymlink()) { // symbolic link
+              // resolve symbolic link
+              FileStatus symstat = FileContext.this.getFileStatus(
+                  stat.getSymlink());
+              if (symstat.isFile() || (recursive && symstat.isDirectory())) {
+                itors.push(curItor);
+                curItor = listLocatedStatus(stat.getPath());
+              }
+            } else if (recursive) { // directory
+              itors.push(curItor);
+              curItor = listLocatedStatus(stat.getPath());
+            }
+          } catch (IOException ioe) {
+            throw (RuntimeException)new RuntimeException().initCause(ioe);
+          }
+        }
+
+        /**
+         *  {...@inheritdoc}
+         *  @return {...@inheritdoc} 
+         *  @throws Runtimeexception if any IOException occurs during 
traversal;
+         *  the IOException is set as the cause of the RuntimeException
+         *  @exception {...@inheritdoc}
+         */
+        @Override
+        public LocatedFileStatus next() {
+          if (hasNext()) {
+            LocatedFileStatus result = curFile;
+            curFile = null;
+            return result;
+          } 
+          throw new java.util.NoSuchElementException("No more entry in " + f);
+        }
+
+        @Override
+        public void remove() {
+          throw new UnsupportedOperationException("Remove is not supported");
+
+        }
+      };
+    }
+
+    /**
      * <p>Return all the files that match filePattern and are not checksum
      * files. Results are sorted by their names.
      * 

Modified: hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java?rev=980953&r1=980952&r2=980953&view=diff
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java (original)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java Fri Jul 
30 20:52:08 2010
@@ -1320,16 +1320,22 @@ public abstract class FileSystem extends
   }
 
   /**
-   * List the statuses and block locations of the files in the given path 
-   * if the path is a directory.
-   * If the given path is a file, return the file's status and block locations.
-   * if recursive is true, list all file statuses and block locations in
-   * the subtree rooted at the given path.
+   * List the statuses and block locations of the files in the given path.
+   * 
+   * If the path is a directory, 
+   *   if recursive is false, returns files in the directory;
+   *   if recursive is true, return files in the subtree rooted at the path.
+   * If the path is a file, return the file's status and block locations.
+   * Files across symbolic links are also returned.
    * 
    * @param f is the path
    * @param recursive if the subdirectories need to be traversed recursively
    *
    * @return an iterator that traverses statuses of the files
+   * If any IO exception (for example a sub-directory gets deleted while
+   * listing is being executed), next() or hasNext() of the returned iterator
+   * may throw a RuntimeException with the IO exception as the cause.
+   *
    * @throws FileNotFoundException when the path does not exist;
    *         IOException see specific implementation
    */
@@ -1344,6 +1350,12 @@ public abstract class FileSystem extends
         list(f);
       }
       
+      /**
+       *  {...@inheritdoc}
+       *  @return {...@inheritdog} 
+       *  @throws Runtimeexception if any IOException occurs during traversal;
+       *  the IOException is set as the cause of the RuntimeException
+       */
       @Override
       public boolean hasNext() {
         if (fileStats.isEmpty()) {
@@ -1382,6 +1394,13 @@ public abstract class FileSystem extends
         }        
       }
       
+      /**
+       *  {...@inheritdoc}
+       *  @return {...@inheritdoc} 
+       *  @throws Runtimeexception if any IOException occurs during traversal;
+       *  the IOException is set as the cause of the RuntimeException
+       *  @exception {...@inheritdoc}
+       */
       @Override
       public LocatedFileStatus next() {
         if (!hasNext()) {

Modified: 
hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestListFiles.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestListFiles.java?rev=980953&r1=980952&r2=980953&view=diff
==============================================================================
--- hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestListFiles.java 
(original)
+++ hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestListFiles.java 
Fri Jul 30 20:52:08 2010
@@ -106,15 +106,15 @@ public class TestListFiles {
   public void testDirectory() throws IOException {
     fs.mkdirs(DIR1);
 
+    // test empty directory
     Iterator<LocatedFileStatus> itor = fs.listFiles(
         DIR1, true);
     assertFalse(itor.hasNext());
     itor = fs.listFiles(DIR1, false);
     assertFalse(itor.hasNext());
     
-    writeFile(fs, FILE2, FILE_LEN);
-    
-    // test empty directory
+    // testing directory with 1 file
+    writeFile(fs, FILE2, FILE_LEN);    
     itor = fs.listFiles(DIR1, true);
     LocatedFileStatus stat = itor.next();
     assertFalse(itor.hasNext());
@@ -123,7 +123,6 @@ public class TestListFiles {
     assertEquals(fs.makeQualified(FILE2), stat.getPath());
     assertEquals(1, stat.getBlockLocations().length);
     
-    // testing directory with 1 file
     itor = fs.listFiles(DIR1, false);
     stat = itor.next();
     assertFalse(itor.hasNext());


Reply via email to