Github user HeartSaVioR commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22952#discussion_r237342346
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
 ---
    @@ -100,6 +101,36 @@ class FileStreamSource(
     
       logInfo(s"maxFilesPerBatch = $maxFilesPerBatch, maxFileAgeMs = 
$maxFileAgeMs")
     
    +  ensureNoOverlapBetweenSourceAndArchivePath()
    +
    +  private def ensureNoOverlapBetweenSourceAndArchivePath(): Unit = {
    +    @tailrec
    +    def removeGlob(path: Path): Path = {
    +      if (path.getName.contains("*")) {
    +        removeGlob(path.getParent)
    +      } else {
    +        path
    +      }
    +    }
    +
    +    sourceOptions.sourceArchiveDir match {
    +      case None =>
    +      case Some(archiveDir) =>
    +        val sourceUri = removeGlob(qualifiedBasePath).toUri
    +        val archiveUri = new Path(archiveDir).toUri
    +
    +        val sourcePath = sourceUri.getPath
    +        val archivePath = archiveUri.getPath
    --- End diff --
    
    Nice finding. Will address.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to