Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/21267#discussion_r186650331 --- Diff: python/pyspark/context.py --- @@ -211,9 +211,23 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, for path in self._conf.get("spark.submit.pyFiles", "").split(","): if path != "": (dirname, filename) = os.path.split(path) - if filename[-4:].lower() in self.PACKAGE_EXTENSIONS: - self._python_includes.append(filename) - sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename)) + try: + filepath = os.path.join(SparkFiles.getRootDirectory(), filename) + if not os.path.exists(filepath): + # In case of YARN with shell mode, 'spark.submit.pyFiles' files are + # not added via SparkContext.addFile. Here we check if the file exists, + # try to copy and then add it to the path. See SPARK-21945. + shutil.copyfile(path, filepath) + if filename[-4:].lower() in self.PACKAGE_EXTENSIONS: + self._python_includes.append(filename) + sys.path.insert(1, filepath) + except Exception as e: + from pyspark import util + warnings.warn( --- End diff -- Log was also tested manually: ``` .../python/pyspark/context.py:230: RuntimeWarning: Python file [/home/spark/tmp.py] specified in 'spark.submit.pyFiles' failed to be added in the Python path, excluding this in the Python path. : ... ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org