Github user justinuang commented on a diff in the pull request: https://github.com/apache/spark/pull/8318#discussion_r37459103 --- Diff: python/pyspark/__init__.py --- @@ -36,6 +36,33 @@ Finer-grained cache persistence levels. """ +import os +import sys + +import xml.etree.ElementTree as ET + +if (os.environ.get("SPARK_HOME", "not found") == "not found"): + raise ImportError("Environment variable SPARK_HOME is undefined.") + +spark_home = os.environ['SPARK_HOME'] +pom_xml_file_path = spark_home + '/pom.xml' + +try: + tree = ET.parse(pom_xml_file_path) + root = tree.getroot() + version_tag = root[4].text + snapshot_version = version_tag[:5] +except: + raise ImportError("Could not read the spark version, because pom.xml file" + + " is not found in SPARK_HOME(%s) directory." % (spark_home)) + +from pyspark.pyspark_version import __version__ +if (snapshot_version != __version__): + raise ImportError("Incompatible version of Spark(%s) and PySpark(%s)." % + (snapshot_version, __version__)) + +sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j-0.8.1-src.zip")) --- End diff -- We don't need this anymore, presumably if they pip installed the package, then py4j will already be installed in site-packages.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org