Repository: spark Updated Branches: refs/heads/master 4d0706d61 -> 5f02d2e5b
[SPARK-17215][SQL] Method `SQLContext.parseDataType(dataTypeString: String)` could be removed. ## What changes were proposed in this pull request? Method `SQLContext.parseDataType(dataTypeString: String)` could be removed, we should use `SparkSession.parseDataType(dataTypeString: String)` instead. This require updating PySpark. ## How was this patch tested? Existing test cases. Author: jiangxingbo <jiangxb1...@gmail.com> Closes #14790 from jiangxb1987/parseDataType. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5f02d2e5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5f02d2e5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5f02d2e5 Branch: refs/heads/master Commit: 5f02d2e5b4d37f554629cbd0e488e856fffd7b6b Parents: 4d0706d Author: jiangxingbo <jiangxb1...@gmail.com> Authored: Wed Aug 24 23:36:04 2016 -0700 Committer: Reynold Xin <r...@databricks.com> Committed: Wed Aug 24 23:36:04 2016 -0700 ---------------------------------------------------------------------- python/pyspark/sql/column.py | 7 +++---- python/pyspark/sql/functions.py | 6 +++--- python/pyspark/sql/readwriter.py | 4 +++- python/pyspark/sql/streaming.py | 4 +++- python/pyspark/sql/tests.py | 2 +- python/pyspark/sql/types.py | 6 +++--- .../src/main/scala/org/apache/spark/sql/SQLContext.scala | 10 ---------- 7 files changed, 16 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/5f02d2e5/python/pyspark/sql/column.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 4b99f30..8d5adc8 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -328,10 +328,9 @@ class Column(object): if isinstance(dataType, basestring): jc = self._jc.cast(dataType) elif isinstance(dataType, DataType): - from pyspark.sql import SQLContext - sc = SparkContext.getOrCreate() - ctx = SQLContext.getOrCreate(sc) - jdt = ctx._ssql_ctx.parseDataType(dataType.json()) + from pyspark.sql import SparkSession + spark = SparkSession.builder.getOrCreate() + jdt = spark._jsparkSession.parseDataType(dataType.json()) jc = self._jc.cast(jdt) else: raise TypeError("unexpected type: %s" % type(dataType)) http://git-wip-us.apache.org/repos/asf/spark/blob/5f02d2e5/python/pyspark/sql/functions.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 4ea83e2..89b3c07 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1760,11 +1760,11 @@ class UserDefinedFunction(object): self._judf = self._create_judf(name) def _create_judf(self, name): - from pyspark.sql import SQLContext + from pyspark.sql import SparkSession sc = SparkContext.getOrCreate() wrapped_func = _wrap_function(sc, self.func, self.returnType) - ctx = SQLContext.getOrCreate(sc) - jdt = ctx._ssql_ctx.parseDataType(self.returnType.json()) + spark = SparkSession.builder.getOrCreate() + jdt = spark._jsparkSession.parseDataType(self.returnType.json()) if name is None: f = self.func name = f.__name__ if hasattr(f, '__name__') else f.__class__.__name__ http://git-wip-us.apache.org/repos/asf/spark/blob/5f02d2e5/python/pyspark/sql/readwriter.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 3da6f49..3d79e0c 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -98,9 +98,11 @@ class DataFrameReader(OptionUtils): :param schema: a :class:`pyspark.sql.types.StructType` object """ + from pyspark.sql import SparkSession if not isinstance(schema, StructType): raise TypeError("schema should be StructType") - jschema = self._spark._ssql_ctx.parseDataType(schema.json()) + spark = SparkSession.builder.getOrCreate() + jschema = spark._jsparkSession.parseDataType(schema.json()) self._jreader = self._jreader.schema(jschema) return self http://git-wip-us.apache.org/repos/asf/spark/blob/5f02d2e5/python/pyspark/sql/streaming.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index 3761d2b..a0ba582 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -273,9 +273,11 @@ class DataStreamReader(OptionUtils): >>> s = spark.readStream.schema(sdf_schema) """ + from pyspark.sql import SparkSession if not isinstance(schema, StructType): raise TypeError("schema should be StructType") - jschema = self._spark._ssql_ctx.parseDataType(schema.json()) + spark = SparkSession.builder.getOrCreate() + jschema = spark._jsparkSession.parseDataType(schema.json()) self._jreader = self._jreader.schema(jschema) return self http://git-wip-us.apache.org/repos/asf/spark/blob/5f02d2e5/python/pyspark/sql/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index fc41701..fd8e9ce 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -574,7 +574,7 @@ class SQLTests(ReusedPySparkTestCase): def check_datatype(datatype): pickled = pickle.loads(pickle.dumps(datatype)) assert datatype == pickled - scala_datatype = self.spark._wrapped._ssql_ctx.parseDataType(datatype.json()) + scala_datatype = self.spark._jsparkSession.parseDataType(datatype.json()) python_datatype = _parse_datatype_json_string(scala_datatype.json()) assert datatype == python_datatype http://git-wip-us.apache.org/repos/asf/spark/blob/5f02d2e5/python/pyspark/sql/types.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 11b1e60..4a02312 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -851,7 +851,7 @@ def _parse_datatype_json_string(json_string): >>> def check_datatype(datatype): ... pickled = pickle.loads(pickle.dumps(datatype)) ... assert datatype == pickled - ... scala_datatype = sqlContext._ssql_ctx.parseDataType(datatype.json()) + ... scala_datatype = spark._jsparkSession.parseDataType(datatype.json()) ... python_datatype = _parse_datatype_json_string(scala_datatype.json()) ... assert datatype == python_datatype >>> for cls in _all_atomic_types.values(): @@ -1551,11 +1551,11 @@ register_input_converter(DateConverter()) def _test(): import doctest from pyspark.context import SparkContext - from pyspark.sql import SQLContext + from pyspark.sql import SparkSession globs = globals() sc = SparkContext('local[4]', 'PythonTest') globs['sc'] = sc - globs['sqlContext'] = SQLContext(sc) + globs['spark'] = SparkSession.builder.getOrCreate() (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) globs['sc'].stop() if failure_count: http://git-wip-us.apache.org/repos/asf/spark/blob/5f02d2e5/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index e7627ac..fbf2219 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -743,16 +743,6 @@ class SQLContext private[sql](val sparkSession: SparkSession) sparkSession.catalog.listTables(databaseName).collect().map(_.name) } - /** - * Parses the data type in our internal string representation. The data type string should - * have the same format as the one generated by `toString` in scala. - * It is only used by PySpark. - */ - // TODO: Remove this function (would require updating PySpark). - private[sql] def parseDataType(dataTypeString: String): DataType = { - DataType.fromJson(dataTypeString) - } - //////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// // Deprecated methods --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org