Repository: spark Updated Branches: refs/heads/branch-1.3 cb061603c -> 0dba382ee
[SPARK-5872] [SQL] create a sqlCtx in pyspark shell The sqlCtx will be HiveContext if hive is built in assembly jar, or SQLContext if not. It also skip the Hive tests in pyspark.sql.tests if no hive is available. Author: Davies Liu <[email protected]> Closes #4659 from davies/sqlctx and squashes the following commits: 0e6629a [Davies Liu] sqlCtx in pyspark (cherry picked from commit 4d4cc760fa9687ce563320094557ef9144488676) Signed-off-by: Michael Armbrust <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0dba382e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0dba382e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0dba382e Branch: refs/heads/branch-1.3 Commit: 0dba382ee65694969704384c4968e3a656b3c833 Parents: cb06160 Author: Davies Liu <[email protected]> Authored: Tue Feb 17 15:44:37 2015 -0800 Committer: Michael Armbrust <[email protected]> Committed: Tue Feb 17 15:44:45 2015 -0800 ---------------------------------------------------------------------- python/pyspark/shell.py | 13 ++++++++++++- python/pyspark/sql/tests.py | 12 ++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/0dba382e/python/pyspark/shell.py ---------------------------------------------------------------------- diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index 4cf4b89..1a02fec 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -31,8 +31,12 @@ if sys.version_info[0] != 2: import atexit import os import platform + +import py4j + import pyspark from pyspark.context import SparkContext +from pyspark.sql import SQLContext, HiveContext from pyspark.storagelevel import StorageLevel # this is the deprecated equivalent of ADD_JARS @@ -46,6 +50,13 @@ if os.environ.get("SPARK_EXECUTOR_URI"): sc = SparkContext(appName="PySparkShell", pyFiles=add_files) atexit.register(lambda: sc.stop()) +try: + # Try to access HiveConf, it will raise exception if Hive is not added + sc._jvm.org.apache.hadoop.hive.conf.HiveConf() + sqlCtx = HiveContext(sc) +except py4j.protocol.Py4JError: + sqlCtx = SQLContext(sc) + print("""Welcome to ____ __ / __/__ ___ _____/ /__ @@ -57,7 +68,7 @@ print("Using Python version %s (%s, %s)" % ( platform.python_version(), platform.python_build()[0], platform.python_build()[1])) -print("SparkContext available as sc.") +print("SparkContext available as sc, %s available as sqlCtx." % sqlCtx.__class__.__name__) if add_files is not None: print("Warning: ADD_FILES environment variable is deprecated, use --py-files argument instead") http://git-wip-us.apache.org/repos/asf/spark/blob/0dba382e/python/pyspark/sql/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index aa80bca..52f7e65 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -25,6 +25,8 @@ import pydoc import shutil import tempfile +import py4j + if sys.version_info[:2] <= (2, 6): try: import unittest2 as unittest @@ -329,9 +331,12 @@ class HiveContextSQLTests(ReusedPySparkTestCase): def setUpClass(cls): ReusedPySparkTestCase.setUpClass() cls.tempdir = tempfile.NamedTemporaryFile(delete=False) + try: + cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf() + except py4j.protocol.Py4JError: + cls.sqlCtx = None + return os.unlink(cls.tempdir.name) - print "type", type(cls.sc) - print "type", type(cls.sc._jsc) _scala_HiveContext =\ cls.sc._jvm.org.apache.spark.sql.hive.test.TestHiveContext(cls.sc._jsc.sc()) cls.sqlCtx = HiveContext(cls.sc, _scala_HiveContext) @@ -344,6 +349,9 @@ class HiveContextSQLTests(ReusedPySparkTestCase): shutil.rmtree(cls.tempdir.name, ignore_errors=True) def test_save_and_load_table(self): + if self.sqlCtx is None: + return # no hive available, skipped + df = self.df tmpPath = tempfile.mkdtemp() shutil.rmtree(tmpPath) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
