Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/21408#discussion_r190196361
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
---
@@ -311,14 +314,27 @@ object InMemoryFileIndex extends Logging {
// The other constructor of LocatedFileStatus will call
FileStatus.getPermission(),
// which is very slow on some file system (RawLocalFileSystem,
which is launch a
// subprocess and parse the stdout).
- val locations = fs.getFileBlockLocations(f, 0, f.getLen)
- val lfs = new LocatedFileStatus(f.getLen, f.isDirectory,
f.getReplication, f.getBlockSize,
- f.getModificationTime, 0, null, null, null, null, f.getPath,
locations)
- if (f.isSymlink) {
- lfs.setSymlink(f.getSymlink)
+ try {
+ val locations = fs.getFileBlockLocations(f, 0, f.getLen)
+ val lfs = new LocatedFileStatus(f.getLen, f.isDirectory,
f.getReplication, f.getBlockSize,
+ f.getModificationTime, 0, null, null, null, null, f.getPath,
locations)
+ if (f.isSymlink) {
+ lfs.setSymlink(f.getSymlink)
+ }
+ Some(lfs)
+ } catch {
+ case _: FileNotFoundException =>
+ missingFiles += f.getPath.toString
+ None
}
- lfs
}
+
+ if (missingFiles.nonEmpty) {
+ logWarning(s"The paths [${missingFiles.mkString(", ")}] were not
found. " +
+ "Were they deleted very recently?")
--- End diff --
error message looks like this:
```
InMemoryFileIndex: The paths
[hdfs://hdp265-1.openstacklocal:8020/rel/00171151/input/hyukjin/part-43011-
fd2d682a-ade1-4b0d-9e52-ab5c5d895cc9-c000.csv, ... ] were not found. Were
they deleted very recently?
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]