Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/21408#discussion_r190262119
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
---
@@ -311,14 +314,27 @@ object InMemoryFileIndex extends Logging {
// The other constructor of LocatedFileStatus will call
FileStatus.getPermission(),
// which is very slow on some file system (RawLocalFileSystem,
which is launch a
// subprocess and parse the stdout).
- val locations = fs.getFileBlockLocations(f, 0, f.getLen)
- val lfs = new LocatedFileStatus(f.getLen, f.isDirectory,
f.getReplication, f.getBlockSize,
- f.getModificationTime, 0, null, null, null, null, f.getPath,
locations)
- if (f.isSymlink) {
- lfs.setSymlink(f.getSymlink)
+ try {
+ val locations = fs.getFileBlockLocations(f, 0, f.getLen)
+ val lfs = new LocatedFileStatus(f.getLen, f.isDirectory,
f.getReplication, f.getBlockSize,
+ f.getModificationTime, 0, null, null, null, null, f.getPath,
locations)
+ if (f.isSymlink) {
+ lfs.setSymlink(f.getSymlink)
+ }
+ Some(lfs)
+ } catch {
+ case _: FileNotFoundException =>
+ missingFiles += f.getPath.toString
+ None
}
- lfs
}
+
+ if (missingFiles.nonEmpty) {
+ logWarning(s"The paths [${missingFiles.mkString(", ")}] were not
found. " +
+ "Were they deleted very recently?")
--- End diff --
maybe
```
InMemoryFileIndex: the following files were missing during file scan:
path1
path2
...
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]