spark git commit: [SPARK-20585][SPARKR] R generic hint support

felixcheung Thu, 04 May 2017 01:41:59 -0700

Repository: spark
Updated Branches:
  refs/heads/master b8302ccd0 -> 9c36aa279



[SPARK-20585][SPARKR] R generic hint support

## What changes were proposed in this pull request?

Adds support for generic hints on `SparkDataFrame`

## How was this patch tested?

Unit tests, `check-cran.sh`

Author: zero323 <zero...@users.noreply.github.com>

Closes #17851 from zero323/SPARK-20585.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9c36aa27
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9c36aa27
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9c36aa27

Branch: refs/heads/master
Commit: 9c36aa27919fb7625e388f5c3c90af62ef902b24
Parents: b8302cc
Author: zero323 <zero...@users.noreply.github.com>
Authored: Thu May 4 01:41:36 2017 -0700
Committer: Felix Cheung <felixche...@apache.org>
Committed: Thu May 4 01:41:36 2017 -0700

----------------------------------------------------------------------
 R/pkg/NAMESPACE                           |  1 +
 R/pkg/R/DataFrame.R                       | 30 ++++++++++++++++++++++++++
 R/pkg/R/generics.R                        |  4 ++++
 R/pkg/inst/tests/testthat/test_sparkSQL.R | 12 +++++++++++
 4 files changed, 47 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9c36aa27/R/pkg/NAMESPACE
----------------------------------------------------------------------
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 7ecd168..daa168c 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -123,6 +123,7 @@ exportMethods("arrange",
               "group_by",
               "groupBy",
               "head",
+              "hint",
               "insertInto",
               "intersect",
               "isLocal",

http://git-wip-us.apache.org/repos/asf/spark/blob/9c36aa27/R/pkg/R/DataFrame.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 7e57ba6..1c88692 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -3715,3 +3715,33 @@ setMethod("rollup",
             sgd <- callJMethod(x@sdf, "rollup", jcol)
             groupedData(sgd)
           })
+
+#' hint
+#'
+#' Specifies execution plan hint and return a new SparkDataFrame.
+#'
+#' @param x a SparkDataFrame.
+#' @param name a name of the hint.
+#' @param ... optional parameters for the hint.
+#' @return A SparkDataFrame.
+#' @family SparkDataFrame functions
+#' @aliases hint,SparkDataFrame,character-method
+#' @rdname hint
+#' @name hint
+#' @export
+#' @examples
+#' \dontrun{
+#' df <- createDataFrame(mtcars)
+#' avg_mpg <- mean(groupBy(createDataFrame(mtcars), "cyl"), "mpg")
+#'
+#' head(join(df, hint(avg_mpg, "broadcast"), df$cyl == avg_mpg$cyl))
+#' }
+#' @note hint since 2.2.0
+setMethod("hint",
+          signature(x = "SparkDataFrame", name = "character"),
+          function(x, name, ...) {
+            parameters <- list(...)
+            stopifnot(all(sapply(parameters, is.character)))
+            jdf <- callJMethod(x@sdf, "hint", name, parameters)
+            dataFrame(jdf)
+          })

http://git-wip-us.apache.org/repos/asf/spark/blob/9c36aa27/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index e02d464..56ef1be 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -576,6 +576,10 @@ setGeneric("group_by", function(x, ...) { 
standardGeneric("group_by") })
 #' @export
 setGeneric("groupBy", function(x, ...) { standardGeneric("groupBy") })
 
+#' @rdname hint
+#' @export
+setGeneric("hint", function(x, name, ...) { standardGeneric("hint") })
+
 #' @rdname insertInto
 #' @export
 setGeneric("insertInto", function(x, tableName, ...) { 
standardGeneric("insertInto") })

http://git-wip-us.apache.org/repos/asf/spark/blob/9c36aa27/R/pkg/inst/tests/testthat/test_sparkSQL.R
----------------------------------------------------------------------
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index a7bb326..82007a5 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -2182,6 +2182,18 @@ test_that("join(), crossJoin() and merge() on a 
DataFrame", {
 
   unlink(jsonPath2)
   unlink(jsonPath3)
+
+  # Join with broadcast hint
+  df1 <- sql("SELECT * FROM range(10e10)")
+  df2 <- sql("SELECT * FROM range(10e10)")
+
+  execution_plan <- capture.output(explain(join(df1, df2, df1$id == df2$id)))
+  expect_false(any(grepl("BroadcastHashJoin", execution_plan)))
+
+  execution_plan_hint <- capture.output(
+    explain(join(df1, hint(df2, "broadcast"), df1$id == df2$id))
+  )
+  expect_true(any(grepl("BroadcastHashJoin", execution_plan_hint)))
 })
 
 test_that("toJSON() on DataFrame", {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20585][SPARKR] R generic hint support

Reply via email to