spark git commit: [SPARK-25821][SQL] Remove SQLContext methods deprecated in 1.4

srowen Fri, 26 Oct 2018 14:50:15 -0700

Repository: spark
Updated Branches:
  refs/heads/master d325ffbf3 -> ca545f794



[SPARK-25821][SQL] Remove SQLContext methods deprecated in 1.4

## What changes were proposed in this pull request?

Remove SQLContext methods deprecated in 1.4

## How was this patch tested?

Existing tests.

Closes #22815 from srowen/SPARK-25821.

Authored-by: Sean Owen <sean.o...@databricks.com>
Signed-off-by: Sean Owen <sean.o...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca545f79
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca545f79
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca545f79

Branch: refs/heads/master
Commit: ca545f79410a464ef24e3986fac225f53bb2ef02
Parents: d325ffb
Author: Sean Owen <sean.o...@databricks.com>
Authored: Fri Oct 26 16:49:48 2018 -0500
Committer: Sean Owen <sean.o...@databricks.com>
Committed: Fri Oct 26 16:49:48 2018 -0500

----------------------------------------------------------------------
 R/pkg/NAMESPACE                                 |   2 -
 R/pkg/R/SQLContext.R                            |  61 +---
 R/pkg/tests/fulltests/test_sparkSQL.R           |  25 +-
 docs/sparkr.md                                  |   6 +-
 .../scala/org/apache/spark/sql/SQLContext.scala | 283 -------------------
 5 files changed, 8 insertions(+), 369 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ca545f79/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 36d7a9b..5a5dc20 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -420,13 +420,11 @@ export("as.DataFrame",
        "currentDatabase",
        "dropTempTable",
        "dropTempView",
-       "jsonFile",
        "listColumns",
        "listDatabases",
        "listFunctions",
        "listTables",
        "loadDF",
-       "parquetFile",
        "read.df",
        "read.jdbc",
        "read.json",

http://git-wip-us.apache.org/repos/asf/spark/blob/ca545f79/R/pkg/R/SQLContext.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index c819a7d..3f89ee9 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -343,7 +343,6 @@ setMethod("toDF", signature(x = "RDD"),
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' df <- read.json(path, multiLine = TRUE)
-#' df <- jsonFile(path)
 #' }
 #' @name read.json
 #' @method read.json default
@@ -363,51 +362,6 @@ read.json <- function(x, ...) {
   dispatchFunc("read.json(path)", x, ...)
 }
 
-#' @rdname read.json
-#' @name jsonFile
-#' @method jsonFile default
-#' @note jsonFile since 1.4.0
-jsonFile.default <- function(path) {
-  .Deprecated("read.json")
-  read.json(path)
-}
-
-jsonFile <- function(x, ...) {
-  dispatchFunc("jsonFile(path)", x, ...)
-}
-
-#' JSON RDD
-#'
-#' Loads an RDD storing one JSON object per string as a SparkDataFrame.
-#'
-#' @param sqlContext SQLContext to use
-#' @param rdd An RDD of JSON string
-#' @param schema A StructType object to use as schema
-#' @param samplingRatio The ratio of simpling used to infer the schema
-#' @return A SparkDataFrame
-#' @noRd
-#' @examples
-#'\dontrun{
-#' sparkR.session()
-#' rdd <- texFile(sc, "path/to/json")
-#' df <- jsonRDD(sqlContext, rdd)
-#'}
-
-# TODO: remove - this method is no longer exported
-# TODO: support schema
-jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) {
-  .Deprecated("read.json")
-  rdd <- serializeToString(rdd)
-  if (is.null(schema)) {
-    read <- callJMethod(sqlContext, "read")
-    # samplingRatio is deprecated
-    sdf <- callJMethod(read, "json", callJMethod(getJRDD(rdd), "rdd"))
-    dataFrame(sdf)
-  } else {
-    stop("not implemented")
-  }
-}
-
 #' Create a SparkDataFrame from an ORC file.
 #'
 #' Loads an ORC file, returning the result as a SparkDataFrame.
@@ -434,6 +388,7 @@ read.orc <- function(path, ...) {
 #' Loads a Parquet file, returning the result as a SparkDataFrame.
 #'
 #' @param path path of file to read. A vector of multiple paths is allowed.
+#' @param ... additional external data source specific named properties.
 #' @return SparkDataFrame
 #' @rdname read.parquet
 #' @name read.parquet
@@ -454,20 +409,6 @@ read.parquet <- function(x, ...) {
   dispatchFunc("read.parquet(...)", x, ...)
 }
 
-#' @param ... argument(s) passed to the method.
-#' @rdname read.parquet
-#' @name parquetFile
-#' @method parquetFile default
-#' @note parquetFile since 1.4.0
-parquetFile.default <- function(...) {
-  .Deprecated("read.parquet")
-  read.parquet(unlist(list(...)))
-}
-
-parquetFile <- function(x, ...) {
-  dispatchFunc("parquetFile(...)", x, ...)
-}
-
 #' Create a SparkDataFrame from a text file.
 #'
 #' Loads text files and returns a SparkDataFrame whose schema starts with

http://git-wip-us.apache.org/repos/asf/spark/blob/ca545f79/R/pkg/tests/fulltests/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R 
b/R/pkg/tests/fulltests/test_sparkSQL.R
index 509f689..68bf5ea 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -628,14 +628,10 @@ test_that("read/write json files", {
     jsonPath3 <- tempfile(pattern = "jsonPath3", fileext = ".json")
     write.json(df, jsonPath3)
 
-    # Test read.json()/jsonFile() works with multiple input paths
+    # Test read.json() works with multiple input paths
     jsonDF1 <- read.json(c(jsonPath2, jsonPath3))
     expect_is(jsonDF1, "SparkDataFrame")
     expect_equal(count(jsonDF1), 6)
-    # Suppress warnings because jsonFile is deprecated
-    jsonDF2 <- suppressWarnings(jsonFile(c(jsonPath2, jsonPath3)))
-    expect_is(jsonDF2, "SparkDataFrame")
-    expect_equal(count(jsonDF2), 6)
 
     unlink(jsonPath2)
     unlink(jsonPath3)
@@ -655,20 +651,6 @@ test_that("read/write json files - compression option", {
   unlink(jsonPath)
 })
 
-test_that("jsonRDD() on a RDD with json string", {
-  sqlContext <- suppressWarnings(sparkRSQL.init(sc))
-  rdd <- parallelize(sc, mockLines)
-  expect_equal(countRDD(rdd), 3)
-  df <- suppressWarnings(jsonRDD(sqlContext, rdd))
-  expect_is(df, "SparkDataFrame")
-  expect_equal(count(df), 3)
-
-  rdd2 <- flatMap(rdd, function(x) c(x, x))
-  df <- suppressWarnings(jsonRDD(sqlContext, rdd2))
-  expect_is(df, "SparkDataFrame")
-  expect_equal(count(df), 6)
-})
-
 test_that("test tableNames and tables", {
   count <- count(listTables())
 
@@ -2658,7 +2640,7 @@ test_that("read/write Parquet files", {
     expect_is(df2, "SparkDataFrame")
     expect_equal(count(df2), 3)
 
-    # Test write.parquet/saveAsParquetFile and read.parquet/parquetFile
+    # Test write.parquet/saveAsParquetFile and read.parquet
     parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet")
     write.parquet(df, parquetPath2)
     parquetPath3 <- tempfile(pattern = "parquetPath3", fileext = ".parquet")
@@ -2666,9 +2648,6 @@ test_that("read/write Parquet files", {
     parquetDF <- read.parquet(c(parquetPath2, parquetPath3))
     expect_is(parquetDF, "SparkDataFrame")
     expect_equal(count(parquetDF), count(df) * 2)
-    parquetDF2 <- suppressWarnings(parquetFile(parquetPath2, parquetPath3))
-    expect_is(parquetDF2, "SparkDataFrame")
-    expect_equal(count(parquetDF2), count(df) * 2)
 
     # Test if varargs works with variables
     saveMode <- "overwrite"

http://git-wip-us.apache.org/repos/asf/spark/blob/ca545f79/docs/sparkr.md
----------------------------------------------------------------------
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 79f8ab8..5882ed7 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -709,8 +709,12 @@ You can inspect the search path in R with 
[`search()`](https://stat.ethz.ch/R-ma
 
 ## Upgrading to SparkR 2.3.1 and above
 
- - In SparkR 2.3.0 and earlier, the `start` parameter of `substr` method was 
wrongly subtracted by one and considered as 0-based. This can lead to 
inconsistent substring results and also does not match with the behaviour with 
`substr` in R. In version 2.3.1 and later, it has been fixed so the `start` 
parameter of `substr` method is now 1-base. As an example, 
`substr(lit('abcdef'), 2, 4))` would result to `abc` in SparkR 2.3.0, and the 
result would be `bcd` in SparkR 2.3.1.
+ - In SparkR 2.3.0 and earlier, the `start` parameter of `substr` method was 
wrongly subtracted by one and considered as 0-based. This can lead to 
inconsistent substring results and also does not match with the behaviour with 
`substr` in R. In version 2.3.1 and later, it has been fixed so the `start` 
parameter of `substr` method is now 1-based. As an example, 
`substr(lit('abcdef'), 2, 4))` would result to `abc` in SparkR 2.3.0, and the 
result would be `bcd` in SparkR 2.3.1.
 
 ## Upgrading to SparkR 2.4.0
 
  - Previously, we don't check the validity of the size of the last layer in 
`spark.mlp`. For example, if the training data only has two labels, a `layers` 
param like `c(1, 3)` doesn't cause an error previously, now it does.
+
+## Upgrading to SparkR 3.0.0
+
+ - The deprecated methods `parquetFile`, `jsonRDD` and `jsonFile` in 
`SQLContext` have been removed. Use `read.parquet` and `read.json`.

http://git-wip-us.apache.org/repos/asf/spark/blob/ca545f79/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index dfb12f2..1b7e969 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -755,289 +755,6 @@ class SQLContext private[sql](val sparkSession: 
SparkSession)
     sessionState.catalog.listTables(databaseName).map(_.table).toArray
   }
 
-  ////////////////////////////////////////////////////////////////////////////
-  ////////////////////////////////////////////////////////////////////////////
-  // Deprecated methods
-  ////////////////////////////////////////////////////////////////////////////
-  ////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
-   */
-  @deprecated("Use createDataFrame instead.", "1.3.0")
-  def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = {
-    createDataFrame(rowRDD, schema)
-  }
-
-  /**
-   * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
-   */
-  @deprecated("Use createDataFrame instead.", "1.3.0")
-  def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
-    createDataFrame(rowRDD, schema)
-  }
-
-  /**
-   * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
-   */
-  @deprecated("Use createDataFrame instead.", "1.3.0")
-  def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
-    createDataFrame(rdd, beanClass)
-  }
-
-  /**
-   * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
-   */
-  @deprecated("Use createDataFrame instead.", "1.3.0")
-  def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
-    createDataFrame(rdd, beanClass)
-  }
-
-  /**
-   * Loads a Parquet file, returning the result as a `DataFrame`. This 
function returns an empty
-   * `DataFrame` if no paths are passed in.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().parquet()`.
-   */
-  @deprecated("Use read.parquet() instead.", "1.4.0")
-  @scala.annotation.varargs
-  def parquetFile(paths: String*): DataFrame = {
-    if (paths.isEmpty) {
-      emptyDataFrame
-    } else {
-      read.parquet(paths : _*)
-    }
-  }
-
-  /**
-   * Loads a JSON file (one object per line), returning the result as a 
`DataFrame`.
-   * It goes through the entire dataset once to determine the schema.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonFile(path: String): DataFrame = {
-    read.json(path)
-  }
-
-  /**
-   * Loads a JSON file (one object per line) and applies the given schema,
-   * returning the result as a `DataFrame`.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonFile(path: String, schema: StructType): DataFrame = {
-    read.schema(schema).json(path)
-  }
-
-  /**
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonFile(path: String, samplingRatio: Double): DataFrame = {
-    read.option("samplingRatio", samplingRatio.toString).json(path)
-  }
-
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record), 
returning the result as a
-   * `DataFrame`.
-   * It goes through the entire dataset once to determine the schema.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonRDD(json: RDD[String]): DataFrame = read.json(json)
-
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record), 
returning the result as a
-   * `DataFrame`.
-   * It goes through the entire dataset once to determine the schema.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json)
-
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record) and 
applies the given schema,
-   * returning the result as a `DataFrame`.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
-    read.schema(schema).json(json)
-  }
-
-  /**
-   * Loads an JavaRDD[String] storing JSON objects (one object per record) and 
applies the given
-   * schema, returning the result as a `DataFrame`.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
-    read.schema(schema).json(json)
-  }
-
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record) 
inferring the
-   * schema, returning the result as a `DataFrame`.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
-    read.option("samplingRatio", samplingRatio.toString).json(json)
-  }
-
-  /**
-   * Loads a JavaRDD[String] storing JSON objects (one object per record) 
inferring the
-   * schema, returning the result as a `DataFrame`.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
-    read.option("samplingRatio", samplingRatio.toString).json(json)
-  }
-
-  /**
-   * Returns the dataset stored at path as a DataFrame,
-   * using the default data source configured by spark.sql.sources.default.
-   *
-   * @group genericdata
-   * @deprecated As of 1.4.0, replaced by `read().load(path)`.
-   */
-  @deprecated("Use read.load(path) instead.", "1.4.0")
-  def load(path: String): DataFrame = {
-    read.load(path)
-  }
-
-  /**
-   * Returns the dataset stored at path as a DataFrame, using the given data 
source.
-   *
-   * @group genericdata
-   * @deprecated As of 1.4.0, replaced by `read().format(source).load(path)`.
-   */
-  @deprecated("Use read.format(source).load(path) instead.", "1.4.0")
-  def load(path: String, source: String): DataFrame = {
-    read.format(source).load(path)
-  }
-
-  /**
-   * (Java-specific) Returns the dataset specified by the given data source and
-   * a set of options as a DataFrame.
-   *
-   * @group genericdata
-   * @deprecated As of 1.4.0, replaced by 
`read().format(source).options(options).load()`.
-   */
-  @deprecated("Use read.format(source).options(options).load() instead.", 
"1.4.0")
-  def load(source: String, options: java.util.Map[String, String]): DataFrame 
= {
-    read.options(options).format(source).load()
-  }
-
-  /**
-   * (Scala-specific) Returns the dataset specified by the given data source 
and
-   * a set of options as a DataFrame.
-   *
-   * @group genericdata
-   * @deprecated As of 1.4.0, replaced by 
`read().format(source).options(options).load()`.
-   */
-  @deprecated("Use read.format(source).options(options).load() instead.", 
"1.4.0")
-  def load(source: String, options: Map[String, String]): DataFrame = {
-    read.options(options).format(source).load()
-  }
-
-  /**
-   * (Java-specific) Returns the dataset specified by the given data source and
-   * a set of options as a DataFrame, using the given schema as the schema of 
the DataFrame.
-   *
-   * @group genericdata
-   * @deprecated As of 1.4.0, replaced by
-   *            `read().format(source).schema(schema).options(options).load()`.
-   */
-  @deprecated("Use read.format(source).schema(schema).options(options).load() 
instead.", "1.4.0")
-  def load(
-      source: String,
-      schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    read.format(source).schema(schema).options(options).load()
-  }
-
-  /**
-   * (Scala-specific) Returns the dataset specified by the given data source 
and
-   * a set of options as a DataFrame, using the given schema as the schema of 
the DataFrame.
-   *
-   * @group genericdata
-   * @deprecated As of 1.4.0, replaced by
-   *            `read().format(source).schema(schema).options(options).load()`.
-   */
-  @deprecated("Use read.format(source).schema(schema).options(options).load() 
instead.", "1.4.0")
-  def load(source: String, schema: StructType, options: Map[String, String]): 
DataFrame = {
-    read.format(source).schema(schema).options(options).load()
-  }
-
-  /**
-   * Construct a `DataFrame` representing the database table accessible via 
JDBC URL
-   * url named table.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
-   */
-  @deprecated("Use read.jdbc() instead.", "1.4.0")
-  def jdbc(url: String, table: String): DataFrame = {
-    read.jdbc(url, table, new Properties)
-  }
-
-  /**
-   * Construct a `DataFrame` representing the database table accessible via 
JDBC URL
-   * url named table.  Partitions of the table will be retrieved in parallel 
based on the parameters
-   * passed to this function.
-   *
-   * @param columnName the name of a column of integral type that will be used 
for partitioning.
-   * @param lowerBound the minimum value of `columnName` used to decide 
partition stride
-   * @param upperBound the maximum value of `columnName` used to decide 
partition stride
-   * @param numPartitions the number of partitions.  the range 
`minValue`-`maxValue` will be split
-   *                      evenly into this many partitions
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
-   */
-  @deprecated("Use read.jdbc() instead.", "1.4.0")
-  def jdbc(
-      url: String,
-      table: String,
-      columnName: String,
-      lowerBound: Long,
-      upperBound: Long,
-      numPartitions: Int): DataFrame = {
-    read.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, 
new Properties)
-  }
-
-  /**
-   * Construct a `DataFrame` representing the database table accessible via 
JDBC URL
-   * url named table. The theParts parameter gives a list expressions
-   * suitable for inclusion in WHERE clauses; each one defines one partition
-   * of the `DataFrame`.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
-   */
-  @deprecated("Use read.jdbc() instead.", "1.4.0")
-  def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
-    read.jdbc(url, table, theParts, new Properties)
-  }
 }
 
 /**


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-25821][SQL] Remove SQLContext methods deprecated in 1.4

Reply via email to