Repository: spark Updated Branches: refs/heads/master 8fb2a02e2 -> d1f6c64c4
[SPARK-19828][R] Support array type in from_json in R ## What changes were proposed in this pull request? Since we could not directly define the array type in R, this PR proposes to support array types in R as string types that are used in `structField` as below: ```R jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]" df <- as.DataFrame(list(list("people" = jsonArr))) collect(select(df, alias(from_json(df$people, "array<struct<name:string>>"), "arrcol"))) ``` prints ```R arrcol 1 Bob, Alice ``` ## How was this patch tested? Unit tests in `test_sparkSQL.R`. Author: hyukjinkwon <gurwls...@gmail.com> Closes #17178 from HyukjinKwon/SPARK-19828. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d1f6c64c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d1f6c64c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d1f6c64c Branch: refs/heads/master Commit: d1f6c64c4b763c05d6d79ae5497f298dc3835f3e Parents: 8fb2a02 Author: hyukjinkwon <gurwls...@gmail.com> Authored: Tue Mar 14 19:51:25 2017 -0700 Committer: Felix Cheung <felixche...@apache.org> Committed: Tue Mar 14 19:51:25 2017 -0700 ---------------------------------------------------------------------- R/pkg/R/functions.R | 12 ++++++++++-- R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 ++++++++++++ .../scala/org/apache/spark/sql/api/r/SQLUtils.scala | 2 +- 3 files changed, 23 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/d1f6c64c/R/pkg/R/functions.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index edf2bcf..9867f2d 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -2437,6 +2437,7 @@ setMethod("date_format", signature(y = "Column", x = "character"), #' #' @param x Column containing the JSON string. #' @param schema a structType object to use as the schema to use when parsing the JSON string. +#' @param asJsonArray indicating if input string is JSON array of objects or a single object. #' @param ... additional named properties to control how the json is parsed, accepts the same #' options as the JSON data source. #' @@ -2452,11 +2453,18 @@ setMethod("date_format", signature(y = "Column", x = "character"), #'} #' @note from_json since 2.2.0 setMethod("from_json", signature(x = "Column", schema = "structType"), - function(x, schema, ...) { + function(x, schema, asJsonArray = FALSE, ...) { + if (asJsonArray) { + jschema <- callJStatic("org.apache.spark.sql.types.DataTypes", + "createArrayType", + schema$jobj) + } else { + jschema <- schema$jobj + } options <- varargsToStrEnv(...) jc <- callJStatic("org.apache.spark.sql.functions", "from_json", - x@jc, schema$jobj, options) + x@jc, jschema, options) column(jc) }) http://git-wip-us.apache.org/repos/asf/spark/blob/d1f6c64c/R/pkg/inst/tests/testthat/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 9735fe3..f7081cb 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1364,6 +1364,18 @@ test_that("column functions", { # check for unparseable df <- as.DataFrame(list(list("a" = ""))) expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]], NA) + + # check if array type in string is correctly supported. + jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]" + df <- as.DataFrame(list(list("people" = jsonArr))) + schema <- structType(structField("name", "string")) + arr <- collect(select(df, alias(from_json(df$people, schema, asJsonArray = TRUE), "arrcol"))) + expect_equal(ncol(arr), 1) + expect_equal(nrow(arr), 1) + expect_is(arr[[1]][[1]], "list") + expect_equal(length(arr$arrcol[[1]]), 2) + expect_equal(arr$arrcol[[1]][[1]]$name, "Bob") + expect_equal(arr$arrcol[[1]][[2]]$name, "Alice") }) test_that("column binary mathfunctions", { http://git-wip-us.apache.org/repos/asf/spark/blob/d1f6c64c/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala index a4c5bf7..c773286 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala @@ -81,7 +81,7 @@ private[sql] object SQLUtils extends Logging { new JavaSparkContext(spark.sparkContext) } - def createStructType(fields : Seq[StructField]): StructType = { + def createStructType(fields: Seq[StructField]): StructType = { StructType(fields) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org