Author: hairong Date: Fri Jul 30 20:52:08 2010 New Revision: 980953 URL: http://svn.apache.org/viewvc?rev=980953&view=rev Log: HADOOP-6890. Improve listFiles API introduced by HADOOP-6870. Contributed by Hairong Kuang.
Modified: hadoop/common/trunk/CHANGES.txt hadoop/common/trunk/src/java/org/apache/hadoop/fs/AbstractFileSystem.java hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestListFiles.java Modified: hadoop/common/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/CHANGES.txt?rev=980953&r1=980952&r2=980953&view=diff ============================================================================== --- hadoop/common/trunk/CHANGES.txt (original) +++ hadoop/common/trunk/CHANGES.txt Fri Jul 30 20:52:08 2010 @@ -101,6 +101,7 @@ Trunk (unreleased changes) HADOOP-6656. Adds a thread in the UserGroupInformation to renew TGTs periodically. (Owen O'Malley and ddas via ddas) + HADOOP-6890. Improve listFiles API introduced by HADOOP-6870. (hairong) OPTIMIZATIONS BUG FIXES Modified: hadoop/common/trunk/src/java/org/apache/hadoop/fs/AbstractFileSystem.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/fs/AbstractFileSystem.java?rev=980953&r1=980952&r2=980953&view=diff ============================================================================== --- hadoop/common/trunk/src/java/org/apache/hadoop/fs/AbstractFileSystem.java (original) +++ hadoop/common/trunk/src/java/org/apache/hadoop/fs/AbstractFileSystem.java Fri Jul 30 20:52:08 2010 @@ -786,8 +786,8 @@ public abstract class AbstractFileSystem /** * The specification of this method matches that of - * {...@link FileContext#listLocatedStatus(Path)} except that Path f must be for this - * file system. + * {...@link FileContext#listLocatedStatus(Path)} except that Path f + * must be for this file system. */ protected Iterator<LocatedFileStatus> listLocatedStatus(final Path f) throws AccessControlException, FileNotFoundException, @@ -795,15 +795,28 @@ public abstract class AbstractFileSystem return new Iterator<LocatedFileStatus>() { private Iterator<FileStatus> itor = listStatusIterator(f); + /** + * {...@inheritdoc} + * @return {...@inheritdog} + * @throws Runtimeexception if any IOException occurs during traversal; + * the IOException is set as the cause of the RuntimeException + */ @Override public boolean hasNext() { return itor.hasNext(); } + /** + * {...@inheritdoc} + * @return {...@inheritdoc} + * @throws Runtimeexception if any IOException occurs during traversal; + * the IOException is set as the cause of the RuntimeException + * @exception {...@inheritdoc} + */ @Override public LocatedFileStatus next() { if (!hasNext()) { - throw new NoSuchElementException(); + throw new NoSuchElementException("No more entry in " + f); } FileStatus result = itor.next(); try { Modified: hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java?rev=980953&r1=980952&r2=980953&view=diff ============================================================================== --- hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java (original) +++ hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileContext.java Fri Jul 30 20:52:08 2010 @@ -1286,103 +1286,21 @@ public final class FileContext { } }.resolve(this, absF); } - - /** - * List the statuses and block locations of the files in the given path - * if the path is a directory. - * If the given path is a file, return the file's status and block locations. - * if recursive is true, list all file statuses and block locations in - * the subtree rooted at the given path. - * Files across symbolic links are also returned. - * - * @param f is the path - * @param recursive if the subdirectories need to be traversed recursively - * - * @return an iterator that traverses statuses of the files - * - * @throws AccessControlException If access is denied - * @throws FileNotFoundException If <code>f</code> does not exist - * @throws UnsupportedFileSystemException If file system for <code>f</code> is - * not supported - * @throws IOException If an I/O error occurred - * - * Exceptions applicable to file systems accessed over RPC: - * @throws RpcClientException If an exception occurred in the RPC client - * @throws RpcServerException If an exception occurred in the RPC server - * @throws UnexpectedServerException If server implementation throws - * undeclared exception to RPC server - */ - public Iterator<LocatedFileStatus> listFiles( - final Path f, final boolean recursive) throws AccessControlException, - FileNotFoundException, UnsupportedFileSystemException, - IOException { - return new Iterator<LocatedFileStatus>() { - private Stack<Path> dirs = new Stack<Path>(); - private Stack<Path> symLinks = new Stack<Path>(); - Iterator<LocatedFileStatus> itor = listLocatedStatus(f); - LocatedFileStatus curFile; - - @Override - public boolean hasNext() { - try { - while (curFile == null) { - if (itor.hasNext()) { - handleFileStat(itor.next()); - } else if (!dirs.isEmpty()) { - Path dirPath = dirs.pop(); - itor = listLocatedStatus(dirPath); - } else if (!symLinks.isEmpty()) { - Path symLink = symLinks.pop(); - FileStatus stat = getFileStatus(symLink); - if (stat.isFile() || (recursive && stat.isDirectory())) { - itor = listLocatedStatus(stat.getPath()); - } - } else { - return false; - } - } - return true; - } catch (IOException ioe) { - throw (RuntimeException)new RuntimeException().initCause(ioe); - } - } - - private void handleFileStat(LocatedFileStatus stat) throws IOException { - if (stat.isFile()) { // file - curFile = stat; - } else if (stat.isSymlink()) { // symbolic link - symLinks.push(stat.getSymlink()); - } else if (recursive) { // directory - dirs.push(stat.getPath()); - } - } - - @Override - public LocatedFileStatus next() { - if (hasNext()) { - LocatedFileStatus result = curFile; - curFile = null; - return result; - } - throw new java.util.NoSuchElementException("No more entry in " + f); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Remove is not supported"); - - } - }; - } /** * List the statuses of the files/directories in the given path if the path is - * a directory. Each returned status contains a file's block locations. + * a directory. + * Return the file's status and block locations If the path is a file. + * + * If a returned status is a file, it contains the file's block locations. * * @param f is the path * * @return an iterator that traverses statuses of the files/directories * in the given path + * If any IO exception (for example the input directory gets deleted while + * listing is being executed), next() or hasNext() of the returned iterator + * may throw a RuntimeException with the io exception as the cause. * * @throws AccessControlException If access is denied * @throws FileNotFoundException If <code>f</code> does not exist @@ -1679,6 +1597,123 @@ public final class FileContext { } /** + * List the statuses and block locations of the files in the given path. + * + * If the path is a directory, + * if recursive is false, returns files in the directory; + * if recursive is true, return files in the subtree rooted at the path. + * The subtree is traversed in the depth-first order. + * If the path is a file, return the file's status and block locations. + * Files across symbolic links are also returned. + * + * @param f is the path + * @param recursive if the subdirectories need to be traversed recursively + * + * @return an iterator that traverses statuses of the files + * If any IO exception (for example a sub-directory gets deleted while + * listing is being executed), next() or hasNext() of the returned iterator + * may throw a RuntimeException with the IO exception as the cause. + * + * @throws AccessControlException If access is denied + * @throws FileNotFoundException If <code>f</code> does not exist + * @throws UnsupportedFileSystemException If file system for <code>f</code> + * is not supported + * @throws IOException If an I/O error occurred + * + * Exceptions applicable to file systems accessed over RPC: + * @throws RpcClientException If an exception occurred in the RPC client + * @throws RpcServerException If an exception occurred in the RPC server + * @throws UnexpectedServerException If server implementation throws + * undeclared exception to RPC server + */ + public Iterator<LocatedFileStatus> listFiles( + final Path f, final boolean recursive) throws AccessControlException, + FileNotFoundException, UnsupportedFileSystemException, + IOException { + return new Iterator<LocatedFileStatus>() { + private Stack<Iterator<LocatedFileStatus>> itors = + new Stack<Iterator<LocatedFileStatus>>(); + Iterator<LocatedFileStatus> curItor = listLocatedStatus(f); + LocatedFileStatus curFile; + + /** + * {...@inheritdoc} + * @return {...@inheritdog} + * @throws Runtimeexception if any IOException occurs during traversal; + * the IOException is set as the cause of the RuntimeException + */ + @Override + public boolean hasNext() { + while (curFile == null) { + if (curItor.hasNext()) { + handleFileStat(curItor.next()); + } else if (!itors.empty()) { + curItor = itors.pop(); + } else { + return false; + } + } + return true; + } + + /** + * Process the input stat. + * If it is a file, return the file stat. + * If it is a directory, tranverse the directory if recursive is true; + * ignore it if recursive is false. + * If it is a symlink, resolve the symlink first and then process it + * depending on if it is a file or directory. + * @param stat input status + * @throws RuntimeException if any io error occurs; the io exception + * is set as the cause of RuntimeException + */ + private void handleFileStat(LocatedFileStatus stat) { + try { + if (stat.isFile()) { // file + curFile = stat; + } else if (stat.isSymlink()) { // symbolic link + // resolve symbolic link + FileStatus symstat = FileContext.this.getFileStatus( + stat.getSymlink()); + if (symstat.isFile() || (recursive && symstat.isDirectory())) { + itors.push(curItor); + curItor = listLocatedStatus(stat.getPath()); + } + } else if (recursive) { // directory + itors.push(curItor); + curItor = listLocatedStatus(stat.getPath()); + } + } catch (IOException ioe) { + throw (RuntimeException)new RuntimeException().initCause(ioe); + } + } + + /** + * {...@inheritdoc} + * @return {...@inheritdoc} + * @throws Runtimeexception if any IOException occurs during traversal; + * the IOException is set as the cause of the RuntimeException + * @exception {...@inheritdoc} + */ + @Override + public LocatedFileStatus next() { + if (hasNext()) { + LocatedFileStatus result = curFile; + curFile = null; + return result; + } + throw new java.util.NoSuchElementException("No more entry in " + f); + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Remove is not supported"); + + } + }; + } + + /** * <p>Return all the files that match filePattern and are not checksum * files. Results are sorted by their names. * Modified: hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java?rev=980953&r1=980952&r2=980953&view=diff ============================================================================== --- hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java (original) +++ hadoop/common/trunk/src/java/org/apache/hadoop/fs/FileSystem.java Fri Jul 30 20:52:08 2010 @@ -1320,16 +1320,22 @@ public abstract class FileSystem extends } /** - * List the statuses and block locations of the files in the given path - * if the path is a directory. - * If the given path is a file, return the file's status and block locations. - * if recursive is true, list all file statuses and block locations in - * the subtree rooted at the given path. + * List the statuses and block locations of the files in the given path. + * + * If the path is a directory, + * if recursive is false, returns files in the directory; + * if recursive is true, return files in the subtree rooted at the path. + * If the path is a file, return the file's status and block locations. + * Files across symbolic links are also returned. * * @param f is the path * @param recursive if the subdirectories need to be traversed recursively * * @return an iterator that traverses statuses of the files + * If any IO exception (for example a sub-directory gets deleted while + * listing is being executed), next() or hasNext() of the returned iterator + * may throw a RuntimeException with the IO exception as the cause. + * * @throws FileNotFoundException when the path does not exist; * IOException see specific implementation */ @@ -1344,6 +1350,12 @@ public abstract class FileSystem extends list(f); } + /** + * {...@inheritdoc} + * @return {...@inheritdog} + * @throws Runtimeexception if any IOException occurs during traversal; + * the IOException is set as the cause of the RuntimeException + */ @Override public boolean hasNext() { if (fileStats.isEmpty()) { @@ -1382,6 +1394,13 @@ public abstract class FileSystem extends } } + /** + * {...@inheritdoc} + * @return {...@inheritdoc} + * @throws Runtimeexception if any IOException occurs during traversal; + * the IOException is set as the cause of the RuntimeException + * @exception {...@inheritdoc} + */ @Override public LocatedFileStatus next() { if (!hasNext()) { Modified: hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestListFiles.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestListFiles.java?rev=980953&r1=980952&r2=980953&view=diff ============================================================================== --- hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestListFiles.java (original) +++ hadoop/common/trunk/src/test/core/org/apache/hadoop/fs/TestListFiles.java Fri Jul 30 20:52:08 2010 @@ -106,15 +106,15 @@ public class TestListFiles { public void testDirectory() throws IOException { fs.mkdirs(DIR1); + // test empty directory Iterator<LocatedFileStatus> itor = fs.listFiles( DIR1, true); assertFalse(itor.hasNext()); itor = fs.listFiles(DIR1, false); assertFalse(itor.hasNext()); - writeFile(fs, FILE2, FILE_LEN); - - // test empty directory + // testing directory with 1 file + writeFile(fs, FILE2, FILE_LEN); itor = fs.listFiles(DIR1, true); LocatedFileStatus stat = itor.next(); assertFalse(itor.hasNext()); @@ -123,7 +123,6 @@ public class TestListFiles { assertEquals(fs.makeQualified(FILE2), stat.getPath()); assertEquals(1, stat.getBlockLocations().length); - // testing directory with 1 file itor = fs.listFiles(DIR1, false); stat = itor.next(); assertFalse(itor.hasNext());