This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new aa51da42908 [SPARK-39723][R] Implement functionExists/getFunc in SparkR for 3L namespace aa51da42908 is described below commit aa51da4290814bf3ccdc52000b8d90d6db575d3f Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Tue Jul 12 11:05:25 2022 +0800 [SPARK-39723][R] Implement functionExists/getFunc in SparkR for 3L namespace ### What changes were proposed in this pull request? 1, implement functionExists/getFunc in SparkR 2, update doc of ListFunctions ### Why are the changes needed? for 3L namespace ### Does this PR introduce _any_ user-facing change? yes, new API functionExists ### How was this patch tested? added UT Closes #37135 from zhengruifeng/r_3L_func. Lead-authored-by: Ruifeng Zheng <ruife...@apache.org> Co-authored-by: Ruifeng Zheng <ruife...@foxmail.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- R/pkg/NAMESPACE | 2 + R/pkg/R/catalog.R | 75 ++++++++++++++++++++++++++++++++++- R/pkg/pkgdown/_pkgdown_template.yml | 2 + R/pkg/tests/fulltests/test_sparkSQL.R | 34 +++++++++++++++- 4 files changed, 111 insertions(+), 2 deletions(-) diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 3937791421a..e078ba0c2cd 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -479,7 +479,9 @@ export("as.DataFrame", "databaseExists", "dropTempTable", "dropTempView", + "functionExists", "getDatabase", + "getFunc", "getTable", "listCatalogs", "listColumns", diff --git a/R/pkg/R/catalog.R b/R/pkg/R/catalog.R index 680415ea6cd..942af4de3c0 100644 --- a/R/pkg/R/catalog.R +++ b/R/pkg/R/catalog.R @@ -583,13 +583,14 @@ listColumns <- function(tableName, databaseName = NULL) { #' This includes all temporary functions. #' #' @param databaseName (optional) name of the database +#' The database name can be qualified with catalog name since 3.4.0. #' @return a SparkDataFrame of the list of function descriptions. #' @rdname listFunctions #' @name listFunctions #' @examples #' \dontrun{ #' sparkR.session() -#' listFunctions() +#' listFunctions(spark_catalog.default) #' } #' @note since 2.2.0 listFunctions <- function(databaseName = NULL) { @@ -606,6 +607,78 @@ listFunctions <- function(databaseName = NULL) { dataFrame(callJMethod(jdst, "toDF")) } +#' Checks if the function with the specified name exists. +#' +#' Checks if the function with the specified name exists. +#' +#' @param functionName name of the function, allowed to be qualified with catalog name +#' @rdname functionExists +#' @name functionExists +#' @examples +#' \dontrun{ +#' sparkR.session() +#' functionExists("spark_catalog.default.myFunc") +#' } +#' @note since 3.4.0 +functionExists <- function(functionName) { + sparkSession <- getSparkSession() + if (class(functionName) != "character") { + stop("functionName must be a string.") + } + catalog <- callJMethod(sparkSession, "catalog") + callJMethod(catalog, "functionExists", functionName) +} + +#' Get the function with the specified name +#' +#' Get the function with the specified name +#' +#' @param functionName name of the function, allowed to be qualified with catalog name +#' @return A named list. +#' @rdname getFunc +#' @name getFunc +#' @examples +#' \dontrun{ +#' sparkR.session() +#' func <- getFunc("spark_catalog.default.myFunc") +#' } +#' @note since 3.4.0. Use different name with the scala/python side, to avoid the +#' signature conflict with built-in "getFunction". +getFunc <- function(functionName) { + sparkSession <- getSparkSession() + if (class(functionName) != "character") { + stop("functionName must be a string.") + } + catalog <- callJMethod(sparkSession, "catalog") + jfunc <- handledCallJMethod(catalog, "getFunction", functionName) + + ret <- list(name = callJMethod(jfunc, "name")) + jcata <- callJMethod(jfunc, "catalog") + if (is.null(jcata)) { + ret$catalog <- NA + } else { + ret$catalog <- jcata + } + + jns <- callJMethod(jfunc, "namespace") + if (is.null(jns)) { + ret$namespace <- NA + } else { + ret$namespace <- jns + } + + jdesc <- callJMethod(jfunc, "description") + if (is.null(jdesc)) { + ret$description <- NA + } else { + ret$description <- jdesc + } + + ret$className <- callJMethod(jfunc, "className") + ret$isTemporary <- callJMethod(jfunc, "isTemporary") + ret +} + #' Recovers all the partitions in the directory of a table and update the catalog #' #' Recovers all the partitions in the directory of a table and update the catalog. The name should diff --git a/R/pkg/pkgdown/_pkgdown_template.yml b/R/pkg/pkgdown/_pkgdown_template.yml index df93f200ab2..1da1d62ee9c 100644 --- a/R/pkg/pkgdown/_pkgdown_template.yml +++ b/R/pkg/pkgdown/_pkgdown_template.yml @@ -266,7 +266,9 @@ reference: - databaseExists - dropTempTable - dropTempView + - functionExists - getDatabase + - getFunc - getTable - listCatalogs - listColumns diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index 85eca6b510b..fc54d89a1a4 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -4074,7 +4074,7 @@ test_that("catalog APIs, currentDatabase, setCurrentDatabase, listDatabases, get expect_equal(db$catalog, "spark_catalog") }) -test_that("catalog APIs, listTables, listColumns, listFunctions, getTable", { +test_that("catalog APIs, listTables, getTable, listColumns, listFunctions, functionExists", { tb <- listTables() count <- count(tables()) expect_equal(nrow(listTables("default")), count) @@ -4111,6 +4111,38 @@ test_that("catalog APIs, listTables, listColumns, listFunctions, getTable", { paste("Error in listFunctions : no such database - Database", "'zxwtyswklpf_db' not found")) + expect_true(functionExists("abs")) + expect_false(functionExists("aabbss")) + + func0 <- getFunc("abs") + expect_equal(func0$name, "abs") + expect_equal(func0$className, "org.apache.spark.sql.catalyst.expressions.Abs") + expect_true(func0$isTemporary) + + sql("CREATE FUNCTION func1 AS 'org.apache.spark.sql.catalyst.expressions.Add'") + + func1 <- getFunc("spark_catalog.default.func1") + expect_equal(func1$name, "func1") + expect_equal(func1$catalog, "spark_catalog") + expect_equal(length(func1$namespace), 1) + expect_equal(func1$namespace[[1]], "default") + expect_equal(func1$className, "org.apache.spark.sql.catalyst.expressions.Add") + expect_false(func1$isTemporary) + + expect_true(functionExists("func1")) + expect_true(functionExists("default.func1")) + expect_true(functionExists("spark_catalog.default.func1")) + + expect_false(functionExists("func2")) + expect_false(functionExists("default.func2")) + expect_false(functionExists("spark_catalog.default.func2")) + + sql("DROP FUNCTION func1") + + expect_false(functionExists("func1")) + expect_false(functionExists("default.func1")) + expect_false(functionExists("spark_catalog.default.func1")) + # recoverPartitions does not work with temporary view expect_error(recoverPartitions("cars"), paste("Error in recoverPartitions : analysis error - cars is a temp view.", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org