Repository: spark Updated Branches: refs/heads/branch-2.0 03dfe7830 -> 0ecc105d2
[SPARK-15250][SQL] Remove deprecated json API in DataFrameReader ## What changes were proposed in this pull request? This PR removes the old `json(path: String)` API which is covered by the new `json(paths: String*)`. ## How was this patch tested? Jenkins tests (existing tests should cover this) Author: hyukjinkwon <gurwls...@gmail.com> Author: Hyukjin Kwon <gurwls...@gmail.com> Closes #13040 from HyukjinKwon/SPARK-15250. (cherry picked from commit 3ff012051f5bb433abb868e590e59dea18867cd8) Signed-off-by: Reynold Xin <r...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ecc105d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ecc105d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ecc105d Branch: refs/heads/branch-2.0 Commit: 0ecc105d29bcf2a127d4687462514d9aef834449 Parents: 03dfe78 Author: hyukjinkwon <gurwls...@gmail.com> Authored: Tue May 10 22:21:17 2016 -0700 Committer: Reynold Xin <r...@databricks.com> Committed: Tue May 10 22:21:24 2016 -0700 ---------------------------------------------------------------------- project/MimaExcludes.scala | 3 ++ python/pyspark/sql/readwriter.py | 4 +-- .../org/apache/spark/sql/DataFrameReader.scala | 33 +------------------- 3 files changed, 6 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/0ecc105d/project/MimaExcludes.scala ---------------------------------------------------------------------- diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index b0d862d..69161e0 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -349,6 +349,9 @@ object MimaExcludes { // [SPARK-13686][MLLIB][STREAMING] Add a constructor parameter `reqParam` to (Streaming)LinearRegressionWithSGD ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.this") ) ++ Seq( + // SPARK-15250 Remove deprecated json API in DataFrameReader + ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.DataFrameReader.json") + ) ++ Seq( // SPARK-13920: MIMA checks should apply to @Experimental and @DeveloperAPI APIs ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.Aggregator.combineCombinersByKey"), ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.Aggregator.combineValuesByKey"), http://git-wip-us.apache.org/repos/asf/spark/blob/0ecc105d/python/pyspark/sql/readwriter.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 20250b4..7e79df3 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -241,8 +241,8 @@ class DataFrameReader(object): if columnNameOfCorruptRecord is not None: self.option("columnNameOfCorruptRecord", columnNameOfCorruptRecord) if isinstance(path, basestring): - return self._df(self._jreader.json(path)) - elif type(path) == list: + path = [path] + if type(path) == list: return self._df(self._jreader.json(self._sqlContext._sc._jvm.PythonUtils.toSeq(path))) elif isinstance(path, RDD): def func(iterator): http://git-wip-us.apache.org/repos/asf/spark/blob/0ecc105d/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 15d09e3..e1a64df 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -285,38 +285,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * * You can set the following JSON-specific options to deal with non-standard JSON files: * <li>`primitivesAsString` (default `false`): infers all primitive values as a string type</li> - * <li>`allowComments` (default `false`): ignores Java/C++ style comment in JSON records</li> - * <li>`allowUnquotedFieldNames` (default `false`): allows unquoted JSON field names</li> - * <li>`allowSingleQuotes` (default `true`): allows single quotes in addition to double quotes - * </li> - * <li>`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers - * (e.g. 00012)</li> - * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records - * during parsing.</li> - * <ul> - * <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record, and puts the - * malformed string into a new field configured by `columnNameOfCorruptRecord`. When - * a schema is set by user, it sets `null` for extra fields.</li> - * <li>`DROPMALFORMED` : ignores the whole corrupted records.</li> - * <li>`FAILFAST` : throws an exception when it meets corrupted records.</li> - * </ul> - * <li>`columnNameOfCorruptRecord` (default `_corrupt_record`): allows renaming the new field - * having malformed string created by `PERMISSIVE` mode. This overrides - * `spark.sql.columnNameOfCorruptRecord`.</li> - * - * @since 1.4.0 - */ - // TODO: Remove this one in Spark 2.0. - def json(path: String): DataFrame = format("json").load(path) - - /** - * Loads a JSON file (one object per line) and returns the result as a [[DataFrame]]. - * - * This function goes through the input once to determine the input schema. If you know the - * schema in advance, use the version that specifies the schema to avoid the extra scan. - * - * You can set the following JSON-specific options to deal with non-standard JSON files: - * <li>`primitivesAsString` (default `false`): infers all primitive values as a string type</li> * <li>`prefersDecimal` (default `false`): infers all floating-point values as a decimal * type. If the values do not fit in decimal, then it infers them as doubles.</li> * <li>`allowComments` (default `false`): ignores Java/C++ style comment in JSON records</li> @@ -342,6 +310,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * * @since 1.6.0 */ + @scala.annotation.varargs def json(paths: String*): DataFrame = format("json").load(paths : _*) /** --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org