spark git commit: [SPARK-20889][SPARKR] Grouped documentation for AGGREGATE column methods

felixcheung Mon, 19 Jun 2017 19:42:13 -0700

Repository: spark
Updated Branches:
  refs/heads/master 9b57cd8d5 -> 8965fe764



[SPARK-20889][SPARKR] Grouped documentation for AGGREGATE column methods

## What changes were proposed in this pull request?
Grouped documentation for the aggregate functions for Column.

Author: actuaryzhang <actuaryzhan...@gmail.com>

Closes #18025 from actuaryzhang/sparkRDoc4.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8965fe76
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8965fe76
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8965fe76

Branch: refs/heads/master
Commit: 8965fe764a4218d944938aa4828072f1ad9dbda7
Parents: 9b57cd8
Author: actuaryzhang <actuaryzhan...@gmail.com>
Authored: Mon Jun 19 19:41:24 2017 -0700
Committer: Felix Cheung <felixche...@apache.org>
Committed: Mon Jun 19 19:41:24 2017 -0700

----------------------------------------------------------------------
 R/pkg/R/functions.R | 427 +++++++++++++++++++----------------------------
 R/pkg/R/generics.R  |  56 ++++---
 R/pkg/R/stats.R     |  22 +--
 3 files changed, 219 insertions(+), 286 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/8965fe76/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 7128c3b..01ca8b8 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -18,6 +18,22 @@
 #' @include generics.R column.R
 NULL
 
+#' Aggregate functions for Column operations
+#'
+#' Aggregate functions defined for \code{Column}.
+#'
+#' @param x Column to compute on.
+#' @param y,na.rm,use currently not used.
+#' @param ... additional argument(s). For example, it could be used to pass 
additional Columns.
+#' @name column_aggregate_functions
+#' @rdname column_aggregate_functions
+#' @family aggregate functions
+#' @examples
+#' \dontrun{
+#' # Dataframe used throughout this doc
+#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))}
+NULL
+
 #' lit
 #'
 #' A new \linkS4class{Column} is created to represent the literal value.
@@ -85,17 +101,20 @@ setMethod("acos",
             column(jc)
           })
 
-#' Returns the approximate number of distinct items in a group
+#' @details
+#' \code{approxCountDistinct}: Returns the approximate number of distinct 
items in a group.
 #'
-#' Returns the approximate number of distinct items in a group. This is a 
column
-#' aggregate function.
-#'
-#' @rdname approxCountDistinct
-#' @name approxCountDistinct
-#' @return the approximate number of distinct items in a group.
+#' @rdname column_aggregate_functions
 #' @export
-#' @aliases approxCountDistinct,Column-method
-#' @examples \dontrun{approxCountDistinct(df$c)}
+#' @aliases approxCountDistinct approxCountDistinct,Column-method
+#' @examples
+#'
+#' \dontrun{
+#' head(select(df, approxCountDistinct(df$gear)))
+#' head(select(df, approxCountDistinct(df$gear, 0.02)))
+#' head(select(df, countDistinct(df$gear, df$cyl)))
+#' head(select(df, n_distinct(df$gear)))
+#' head(distinct(select(df, "gear")))}
 #' @note approxCountDistinct(Column) since 1.4.0
 setMethod("approxCountDistinct",
           signature(x = "Column"),
@@ -342,10 +361,13 @@ setMethod("column",
 #'
 #' @rdname corr
 #' @name corr
-#' @family math functions
+#' @family aggregate functions
 #' @export
 #' @aliases corr,Column-method
-#' @examples \dontrun{corr(df$c, df$d)}
+#' @examples
+#' \dontrun{
+#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
+#' head(select(df, corr(df$mpg, df$hp)))}
 #' @note corr since 1.6.0
 setMethod("corr", signature(x = "Column"),
           function(x, col2) {
@@ -356,20 +378,22 @@ setMethod("corr", signature(x = "Column"),
 
 #' cov
 #'
-#' Compute the sample covariance between two expressions.
+#' Compute the covariance between two expressions.
+#'
+#' @details
+#' \code{cov}: Compute the sample covariance between two expressions.
 #'
 #' @rdname cov
 #' @name cov
-#' @family math functions
+#' @family aggregate functions
 #' @export
 #' @aliases cov,characterOrColumn-method
 #' @examples
 #' \dontrun{
-#' cov(df$c, df$d)
-#' cov("c", "d")
-#' covar_samp(df$c, df$d)
-#' covar_samp("c", "d")
-#' }
+#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
+#' head(select(df, cov(df$mpg, df$hp), cov("mpg", "hp"),
+#'                 covar_samp(df$mpg, df$hp), covar_samp("mpg", "hp"),
+#'                 covar_pop(df$mpg, df$hp), covar_pop("mpg", "hp")))}
 #' @note cov since 1.6.0
 setMethod("cov", signature(x = "characterOrColumn"),
           function(x, col2) {
@@ -377,6 +401,9 @@ setMethod("cov", signature(x = "characterOrColumn"),
             covar_samp(x, col2)
           })
 
+#' @details
+#' \code{covar_sample}: Alias for \code{cov}.
+#'
 #' @rdname cov
 #'
 #' @param col1 the first Column.
@@ -395,23 +422,13 @@ setMethod("covar_samp", signature(col1 = 
"characterOrColumn", col2 = "characterO
             column(jc)
           })
 
-#' covar_pop
+#' @details
+#' \code{covar_pop}: Computes the population covariance between two 
expressions.
 #'
-#' Compute the population covariance between two expressions.
-#'
-#' @param col1 First column to compute cov_pop.
-#' @param col2 Second column to compute cov_pop.
-#'
-#' @rdname covar_pop
+#' @rdname cov
 #' @name covar_pop
-#' @family math functions
 #' @export
 #' @aliases covar_pop,characterOrColumn,characterOrColumn-method
-#' @examples
-#' \dontrun{
-#' covar_pop(df$c, df$d)
-#' covar_pop("c", "d")
-#' }
 #' @note covar_pop since 2.0.0
 setMethod("covar_pop", signature(col1 = "characterOrColumn", col2 = 
"characterOrColumn"),
           function(col1, col2) {
@@ -823,18 +840,16 @@ setMethod("isnan",
             column(jc)
           })
 
-#' kurtosis
-#'
-#' Aggregate function: returns the kurtosis of the values in a group.
+#' @details
+#' \code{kurtosis}: Returns the kurtosis of the values in a group.
 #'
-#' @param x Column to compute on.
-#'
-#' @rdname kurtosis
-#' @name kurtosis
-#' @aliases kurtosis,Column-method
-#' @family aggregate functions
+#' @rdname column_aggregate_functions
+#' @aliases kurtosis kurtosis,Column-method
 #' @export
-#' @examples \dontrun{kurtosis(df$c)}
+#' @examples
+#'
+#' \dontrun{
+#' head(select(df, mean(df$mpg), sd(df$mpg), skewness(df$mpg), 
kurtosis(df$mpg)))}
 #' @note kurtosis since 1.6.0
 setMethod("kurtosis",
           signature(x = "Column"),
@@ -1040,18 +1055,11 @@ setMethod("ltrim",
             column(jc)
           })
 
-#' max
-#'
-#' Aggregate function: returns the maximum value of the expression in a group.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{max}: Returns the maximum value of the expression in a group.
 #'
-#' @rdname max
-#' @name max
-#' @family aggregate functions
-#' @aliases max,Column-method
-#' @export
-#' @examples \dontrun{max(df$c)}
+#' @rdname column_aggregate_functions
+#' @aliases max max,Column-method
 #' @note max since 1.5.0
 setMethod("max",
           signature(x = "Column"),
@@ -1081,19 +1089,24 @@ setMethod("md5",
             column(jc)
           })
 
-#' mean
+#' @details
+#' \code{mean}: Returns the average of the values in a group. Alias for 
\code{avg}.
 #'
-#' Aggregate function: returns the average of the values in a group.
-#' Alias for avg.
+#' @rdname column_aggregate_functions
+#' @aliases mean mean,Column-method
+#' @export
+#' @examples
 #'
-#' @param x Column to compute on.
+#' \dontrun{
+#' head(select(df, avg(df$mpg), mean(df$mpg), sum(df$mpg), min(df$wt), 
max(df$qsec)))
 #'
-#' @rdname mean
-#' @name mean
-#' @family aggregate functions
-#' @aliases mean,Column-method
-#' @export
-#' @examples \dontrun{mean(df$c)}
+#' # metrics by num of cylinders
+#' tmp <- agg(groupBy(df, "cyl"), avg(df$mpg), avg(df$hp), avg(df$wt), 
avg(df$qsec))
+#' head(orderBy(tmp, "cyl"))
+#'
+#' # car with the max mpg
+#' mpg_max <- as.numeric(collect(agg(df, max(df$mpg))))
+#' head(where(df, df$mpg == mpg_max))}
 #' @note mean since 1.5.0
 setMethod("mean",
           signature(x = "Column"),
@@ -1102,18 +1115,12 @@ setMethod("mean",
             column(jc)
           })
 
-#' min
-#'
-#' Aggregate function: returns the minimum value of the expression in a group.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{min}: Returns the minimum value of the expression in a group.
 #'
-#' @rdname min
-#' @name min
-#' @aliases min,Column-method
-#' @family aggregate functions
+#' @rdname column_aggregate_functions
+#' @aliases min min,Column-method
 #' @export
-#' @examples \dontrun{min(df$c)}
 #' @note min since 1.5.0
 setMethod("min",
           signature(x = "Column"),
@@ -1338,24 +1345,17 @@ setMethod("rtrim",
             column(jc)
           })
 
-#' sd
-#'
-#' Aggregate function: alias for \link{stddev_samp}
+
+#' @details
+#' \code{sd}: Alias for \code{stddev_samp}.
 #'
-#' @param x Column to compute on.
-#' @param na.rm currently not used.
-#' @rdname sd
-#' @name sd
-#' @family aggregate functions
-#' @aliases sd,Column-method
-#' @seealso \link{stddev_pop}, \link{stddev_samp}
+#' @rdname column_aggregate_functions
+#' @aliases sd sd,Column-method
 #' @export
 #' @examples
-#'\dontrun{
-#'stddev(df$c)
-#'select(df, stddev(df$age))
-#'agg(df, sd(df$age))
-#'}
+#'
+#' \dontrun{
+#' head(select(df, sd(df$mpg), stddev(df$mpg), stddev_pop(df$wt), 
stddev_samp(df$qsec)))}
 #' @note sd since 1.6.0
 setMethod("sd",
           signature(x = "Column"),
@@ -1465,18 +1465,12 @@ setMethod("sinh",
             column(jc)
           })
 
-#' skewness
-#'
-#' Aggregate function: returns the skewness of the values in a group.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{skewness}: Returns the skewness of the values in a group.
 #'
-#' @rdname skewness
-#' @name skewness
-#' @family aggregate functions
-#' @aliases skewness,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases skewness skewness,Column-method
 #' @export
-#' @examples \dontrun{skewness(df$c)}
 #' @note skewness since 1.6.0
 setMethod("skewness",
           signature(x = "Column"),
@@ -1527,9 +1521,11 @@ setMethod("spark_partition_id",
             column(jc)
           })
 
-#' @rdname sd
-#' @aliases stddev,Column-method
-#' @name stddev
+#' @details
+#' \code{stddev}: Alias for \code{std_dev}.
+#'
+#' @rdname column_aggregate_functions
+#' @aliases stddev stddev,Column-method
 #' @note stddev since 1.6.0
 setMethod("stddev",
           signature(x = "Column"),
@@ -1538,19 +1534,12 @@ setMethod("stddev",
             column(jc)
           })
 
-#' stddev_pop
-#'
-#' Aggregate function: returns the population standard deviation of the 
expression in a group.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{stddev_pop}: Returns the population standard deviation of the 
expression in a group.
 #'
-#' @rdname stddev_pop
-#' @name stddev_pop
-#' @family aggregate functions
-#' @aliases stddev_pop,Column-method
-#' @seealso \link{sd}, \link{stddev_samp}
+#' @rdname column_aggregate_functions
+#' @aliases stddev_pop stddev_pop,Column-method
 #' @export
-#' @examples \dontrun{stddev_pop(df$c)}
 #' @note stddev_pop since 1.6.0
 setMethod("stddev_pop",
           signature(x = "Column"),
@@ -1559,19 +1548,12 @@ setMethod("stddev_pop",
             column(jc)
           })
 
-#' stddev_samp
-#'
-#' Aggregate function: returns the unbiased sample standard deviation of the 
expression in a group.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{stddev_samp}: Returns the unbiased sample standard deviation of the 
expression in a group.
 #'
-#' @rdname stddev_samp
-#' @name stddev_samp
-#' @family aggregate functions
-#' @aliases stddev_samp,Column-method
-#' @seealso \link{stddev_pop}, \link{sd}
+#' @rdname column_aggregate_functions
+#' @aliases stddev_samp stddev_samp,Column-method
 #' @export
-#' @examples \dontrun{stddev_samp(df$c)}
 #' @note stddev_samp since 1.6.0
 setMethod("stddev_samp",
           signature(x = "Column"),
@@ -1630,18 +1612,12 @@ setMethod("sqrt",
             column(jc)
           })
 
-#' sum
-#'
-#' Aggregate function: returns the sum of all values in the expression.
-#'
-#' @param x Column to compute on.
+#' @details
+#' \code{sum}: Returns the sum of all values in the expression.
 #'
-#' @rdname sum
-#' @name sum
-#' @family aggregate functions
-#' @aliases sum,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases sum sum,Column-method
 #' @export
-#' @examples \dontrun{sum(df$c)}
 #' @note sum since 1.5.0
 setMethod("sum",
           signature(x = "Column"),
@@ -1650,18 +1626,17 @@ setMethod("sum",
             column(jc)
           })
 
-#' sumDistinct
-#'
-#' Aggregate function: returns the sum of distinct values in the expression.
+#' @details
+#' \code{sumDistinct}: Returns the sum of distinct values in the expression.
 #'
-#' @param x Column to compute on.
-#'
-#' @rdname sumDistinct
-#' @name sumDistinct
-#' @family aggregate functions
-#' @aliases sumDistinct,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases sumDistinct sumDistinct,Column-method
 #' @export
-#' @examples \dontrun{sumDistinct(df$c)}
+#' @examples
+#'
+#' \dontrun{
+#' head(select(df, sumDistinct(df$gear)))
+#' head(distinct(select(df, "gear")))}
 #' @note sumDistinct since 1.4.0
 setMethod("sumDistinct",
           signature(x = "Column"),
@@ -1952,24 +1927,16 @@ setMethod("upper",
             column(jc)
           })
 
-#' var
-#'
-#' Aggregate function: alias for \link{var_samp}.
+#' @details
+#' \code{var}: Alias for \code{var_samp}.
 #'
-#' @param x a Column to compute on.
-#' @param y,na.rm,use currently not used.
-#' @rdname var
-#' @name var
-#' @family aggregate functions
-#' @aliases var,Column-method
-#' @seealso \link{var_pop}, \link{var_samp}
+#' @rdname column_aggregate_functions
+#' @aliases var var,Column-method
 #' @export
 #' @examples
+#'
 #'\dontrun{
-#'variance(df$c)
-#'select(df, var_pop(df$age))
-#'agg(df, var(df$age))
-#'}
+#'head(agg(df, var(df$mpg), variance(df$mpg), var_pop(df$mpg), 
var_samp(df$mpg)))}
 #' @note var since 1.6.0
 setMethod("var",
           signature(x = "Column"),
@@ -1978,9 +1945,9 @@ setMethod("var",
             var_samp(x)
           })
 
-#' @rdname var
-#' @aliases variance,Column-method
-#' @name variance
+#' @rdname column_aggregate_functions
+#' @aliases variance variance,Column-method
+#' @export
 #' @note variance since 1.6.0
 setMethod("variance",
           signature(x = "Column"),
@@ -1989,19 +1956,12 @@ setMethod("variance",
             column(jc)
           })
 
-#' var_pop
+#' @details
+#' \code{var_pop}: Returns the population variance of the values in a group.
 #'
-#' Aggregate function: returns the population variance of the values in a 
group.
-#'
-#' @param x Column to compute on.
-#'
-#' @rdname var_pop
-#' @name var_pop
-#' @family aggregate functions
-#' @aliases var_pop,Column-method
-#' @seealso \link{var}, \link{var_samp}
+#' @rdname column_aggregate_functions
+#' @aliases var_pop var_pop,Column-method
 #' @export
-#' @examples \dontrun{var_pop(df$c)}
 #' @note var_pop since 1.5.0
 setMethod("var_pop",
           signature(x = "Column"),
@@ -2010,19 +1970,12 @@ setMethod("var_pop",
             column(jc)
           })
 
-#' var_samp
+#' @details
+#' \code{var_samp}: Returns the unbiased variance of the values in a group.
 #'
-#' Aggregate function: returns the unbiased variance of the values in a group.
-#'
-#' @param x Column to compute on.
-#'
-#' @rdname var_samp
-#' @name var_samp
-#' @aliases var_samp,Column-method
-#' @family aggregate functions
-#' @seealso \link{var_pop}, \link{var}
+#' @rdname column_aggregate_functions
+#' @aliases var_samp var_samp,Column-method
 #' @export
-#' @examples \dontrun{var_samp(df$c)}
 #' @note var_samp since 1.6.0
 setMethod("var_samp",
           signature(x = "Column"),
@@ -2235,17 +2188,11 @@ setMethod("pmod", signature(y = "Column"),
             column(jc)
           })
 
-
-#' @rdname approxCountDistinct
-#' @name approxCountDistinct
-#'
-#' @param x Column to compute on.
 #' @param rsd maximum estimation error allowed (default = 0.05)
-#' @param ... further arguments to be passed to or from other methods.
 #'
+#' @rdname column_aggregate_functions
 #' @aliases approxCountDistinct,Column-method
 #' @export
-#' @examples \dontrun{approxCountDistinct(df$c, 0.02)}
 #' @note approxCountDistinct(Column, numeric) since 1.4.0
 setMethod("approxCountDistinct",
           signature(x = "Column"),
@@ -2254,18 +2201,12 @@ setMethod("approxCountDistinct",
             column(jc)
           })
 
-#' Count Distinct Values
+#' @details
+#' \code{countDistinct}: Returns the number of distinct items in a group.
 #'
-#' @param x Column to compute on
-#' @param ... other columns
-#'
-#' @family aggregate functions
-#' @rdname countDistinct
-#' @name countDistinct
-#' @aliases countDistinct,Column-method
-#' @return the number of distinct items in a group.
+#' @rdname column_aggregate_functions
+#' @aliases countDistinct countDistinct,Column-method
 #' @export
-#' @examples \dontrun{countDistinct(df$c)}
 #' @note countDistinct since 1.4.0
 setMethod("countDistinct",
           signature(x = "Column"),
@@ -2384,15 +2325,12 @@ setMethod("sign", signature(x = "Column"),
             signum(x)
           })
 
-#' n_distinct
-#'
-#' Aggregate function: returns the number of distinct items in a group.
+#' @details
+#' \code{n_distinct}: Returns the number of distinct items in a group.
 #'
-#' @rdname countDistinct
-#' @name n_distinct
-#' @aliases n_distinct,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases n_distinct n_distinct,Column-method
 #' @export
-#' @examples \dontrun{n_distinct(df$c)}
 #' @note n_distinct since 1.4.0
 setMethod("n_distinct", signature(x = "Column"),
           function(x, ...) {
@@ -3717,18 +3655,18 @@ setMethod("create_map",
             column(jc)
           })
 
-#' collect_list
+#' @details
+#' \code{collect_list}: Creates a list of objects with duplicates.
 #'
-#' Creates a list of objects with duplicates.
-#'
-#' @param x Column to compute on
-#'
-#' @rdname collect_list
-#' @name collect_list
-#' @family aggregate functions
-#' @aliases collect_list,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases collect_list collect_list,Column-method
 #' @export
-#' @examples \dontrun{collect_list(df$x)}
+#' @examples
+#'
+#' \dontrun{
+#' df2 = df[df$mpg > 20, ]
+#' collect(select(df2, collect_list(df2$gear)))
+#' collect(select(df2, collect_set(df2$gear)))}
 #' @note collect_list since 2.3.0
 setMethod("collect_list",
           signature(x = "Column"),
@@ -3737,18 +3675,12 @@ setMethod("collect_list",
             column(jc)
           })
 
-#' collect_set
-#'
-#' Creates a list of objects with duplicate elements eliminated.
+#' @details
+#' \code{collect_set}: Creates a list of objects with duplicate elements 
eliminated.
 #'
-#' @param x Column to compute on
-#'
-#' @rdname collect_set
-#' @name collect_set
-#' @family aggregate functions
-#' @aliases collect_set,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases collect_set collect_set,Column-method
 #' @export
-#' @examples \dontrun{collect_set(df$x)}
 #' @note collect_set since 2.3.0
 setMethod("collect_set",
           signature(x = "Column"),
@@ -3908,24 +3840,17 @@ setMethod("not",
             column(jc)
           })
 
-#' grouping_bit
-#'
-#' Indicates whether a specified column in a GROUP BY list is aggregated or 
not,
-#' returns 1 for aggregated or 0 for not aggregated in the result set.
+#' @details
+#' \code{grouping_bit}: Indicates whether a specified column in a GROUP BY 
list is aggregated or not,
+#' returns 1 for aggregated or 0 for not aggregated in the result set. Same as 
\code{GROUPING} in SQL
+#' and \code{grouping} function in Scala.
 #'
-#' Same as \code{GROUPING} in SQL and \code{grouping} function in Scala.
-#'
-#' @param x Column to compute on
-#'
-#' @rdname grouping_bit
-#' @name grouping_bit
-#' @family aggregate functions
-#' @aliases grouping_bit,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases grouping_bit grouping_bit,Column-method
 #' @export
 #' @examples
-#' \dontrun{
-#' df <- createDataFrame(mtcars)
 #'
+#' \dontrun{
 #' # With cube
 #' agg(
 #'   cube(df, "cyl", "gear", "am"),
@@ -3938,8 +3863,7 @@ setMethod("not",
 #'   rollup(df, "cyl", "gear", "am"),
 #'   mean(df$mpg),
 #'   grouping_bit(df$cyl), grouping_bit(df$gear), grouping_bit(df$am)
-#' )
-#' }
+#' )}
 #' @note grouping_bit since 2.3.0
 setMethod("grouping_bit",
           signature(x = "Column"),
@@ -3948,26 +3872,18 @@ setMethod("grouping_bit",
             column(jc)
           })
 
-#' grouping_id
-#'
-#' Returns the level of grouping.
-#'
+#' @details
+#' \code{grouping_id}: Returns the level of grouping.
 #' Equals to \code{
 #' grouping_bit(c1) * 2^(n - 1) + grouping_bit(c2) * 2^(n - 2)  + ... + 
grouping_bit(cn)
 #' }
 #'
-#' @param x Column to compute on
-#' @param ... additional Column(s) (optional).
-#'
-#' @rdname grouping_id
-#' @name grouping_id
-#' @family aggregate functions
-#' @aliases grouping_id,Column-method
+#' @rdname column_aggregate_functions
+#' @aliases grouping_id grouping_id,Column-method
 #' @export
 #' @examples
-#' \dontrun{
-#' df <- createDataFrame(mtcars)
 #'
+#' \dontrun{
 #' # With cube
 #' agg(
 #'   cube(df, "cyl", "gear", "am"),
@@ -3980,8 +3896,7 @@ setMethod("grouping_bit",
 #'   rollup(df, "cyl", "gear", "am"),
 #'   mean(df$mpg),
 #'   grouping_id(df$cyl, df$gear, df$am)
-#' )
-#' }
+#' )}
 #' @note grouping_id since 2.3.0
 setMethod("grouping_id",
           signature(x = "Column"),

http://git-wip-us.apache.org/repos/asf/spark/blob/8965fe76/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 5630d0c..b3cc486 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -479,7 +479,7 @@ setGeneric("corr", function(x, ...) 
{standardGeneric("corr") })
 #' @export
 setGeneric("covar_samp", function(col1, col2) {standardGeneric("covar_samp") })
 
-#' @rdname covar_pop
+#' @rdname cov
 #' @export
 setGeneric("covar_pop", function(col1, col2) {standardGeneric("covar_pop") })
 
@@ -907,8 +907,9 @@ setGeneric("windowOrderBy", function(col, ...) { 
standardGeneric("windowOrderBy"
 #' @export
 setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
 
-#' @rdname approxCountDistinct
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("approxCountDistinct", function(x, ...) { 
standardGeneric("approxCountDistinct") })
 
 #' @rdname array_contains
@@ -949,12 +950,14 @@ setGeneric("cbrt", function(x) { standardGeneric("cbrt") 
})
 #' @export
 setGeneric("ceil", function(x) { standardGeneric("ceil") })
 
-#' @rdname collect_list
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("collect_list", function(x) { standardGeneric("collect_list") })
 
-#' @rdname collect_set
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("collect_set", function(x) { standardGeneric("collect_set") })
 
 #' @rdname column
@@ -973,8 +976,9 @@ setGeneric("concat_ws", function(sep, x, ...) { 
standardGeneric("concat_ws") })
 #' @export
 setGeneric("conv", function(x, fromBase, toBase) { standardGeneric("conv") })
 
-#' @rdname countDistinct
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("countDistinct", function(x, ...) { 
standardGeneric("countDistinct") })
 
 #' @rdname crc32
@@ -1071,12 +1075,14 @@ setGeneric("from_unixtime", function(x, ...) { 
standardGeneric("from_unixtime")
 #' @export
 setGeneric("greatest", function(x, ...) { standardGeneric("greatest") })
 
-#' @rdname grouping_bit
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("grouping_bit", function(x) { standardGeneric("grouping_bit") })
 
-#' @rdname grouping_id
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("grouping_id", function(x, ...) { standardGeneric("grouping_id") })
 
 #' @rdname hex
@@ -1109,8 +1115,9 @@ setGeneric("instr", function(y, x) { 
standardGeneric("instr") })
 #' @export
 setGeneric("isnan", function(x) { standardGeneric("isnan") })
 
-#' @rdname kurtosis
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("kurtosis", function(x) { standardGeneric("kurtosis") })
 
 #' @rdname lag
@@ -1203,8 +1210,9 @@ setGeneric("next_day", function(y, x) { 
standardGeneric("next_day") })
 #' @export
 setGeneric("ntile", function(x) { standardGeneric("ntile") })
 
-#' @rdname countDistinct
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })
 
 #' @param x empty. Should be used with no argument.
@@ -1274,8 +1282,9 @@ setGeneric("rpad", function(x, len, pad) { 
standardGeneric("rpad") })
 #' @export
 setGeneric("rtrim", function(x) { standardGeneric("rtrim") })
 
-#' @rdname sd
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("sd", function(x, na.rm = FALSE) { standardGeneric("sd") })
 
 #' @rdname second
@@ -1310,8 +1319,9 @@ setGeneric("signum", function(x) { 
standardGeneric("signum") })
 #' @export
 setGeneric("size", function(x) { standardGeneric("size") })
 
-#' @rdname skewness
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("skewness", function(x) { standardGeneric("skewness") })
 
 #' @rdname sort_array
@@ -1331,16 +1341,19 @@ setGeneric("soundex", function(x) { 
standardGeneric("soundex") })
 #' @export
 setGeneric("spark_partition_id", function(x = "missing") { 
standardGeneric("spark_partition_id") })
 
-#' @rdname sd
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("stddev", function(x) { standardGeneric("stddev") })
 
-#' @rdname stddev_pop
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("stddev_pop", function(x) { standardGeneric("stddev_pop") })
 
-#' @rdname stddev_samp
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("stddev_samp", function(x) { standardGeneric("stddev_samp") })
 
 #' @rdname struct
@@ -1351,8 +1364,9 @@ setGeneric("struct", function(x, ...) { 
standardGeneric("struct") })
 #' @export
 setGeneric("substring_index", function(x, delim, count) { 
standardGeneric("substring_index") })
 
-#' @rdname sumDistinct
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
 
 #' @rdname toDegrees
@@ -1403,20 +1417,24 @@ setGeneric("unix_timestamp", function(x, format) { 
standardGeneric("unix_timesta
 #' @export
 setGeneric("upper", function(x) { standardGeneric("upper") })
 
-#' @rdname var
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("var", function(x, y = NULL, na.rm = FALSE, use) { 
standardGeneric("var") })
 
-#' @rdname var
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("variance", function(x) { standardGeneric("variance") })
 
-#' @rdname var_pop
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("var_pop", function(x) { standardGeneric("var_pop") })
 
-#' @rdname var_samp
+#' @rdname column_aggregate_functions
 #' @export
+#' @name NULL
 setGeneric("var_samp", function(x) { standardGeneric("var_samp") })
 
 #' @rdname weekofyear

http://git-wip-us.apache.org/repos/asf/spark/blob/8965fe76/R/pkg/R/stats.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index d78a108..9a9fa84 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -52,22 +52,23 @@ setMethod("crosstab",
             collect(dataFrame(sct))
           })
 
-#' Calculate the sample covariance of two numerical columns of a 
SparkDataFrame.
+#' @details
+#' \code{cov}: When applied to SparkDataFrame, this calculates the sample 
covariance of two numerical
+#' columns of \emph{one} SparkDataFrame.
 #'
 #' @param colName1 the name of the first column
 #' @param colName2 the name of the second column
 #' @return The covariance of the two columns.
 #'
 #' @rdname cov
-#' @name cov
 #' @aliases cov,SparkDataFrame-method
 #' @family stat functions
 #' @export
 #' @examples
-#'\dontrun{
-#' df <- read.json("/path/to/file.json")
-#' cov <- cov(df, "title", "gender")
-#' }
+#'
+#' \dontrun{
+#' cov(df, "mpg", "hp")
+#' cov(df, df$mpg, df$hp)}
 #' @note cov since 1.6.0
 setMethod("cov",
           signature(x = "SparkDataFrame"),
@@ -93,11 +94,10 @@ setMethod("cov",
 #' @family stat functions
 #' @export
 #' @examples
-#'\dontrun{
-#' df <- read.json("/path/to/file.json")
-#' corr <- corr(df, "title", "gender")
-#' corr <- corr(df, "title", "gender", method = "pearson")
-#' }
+#'
+#' \dontrun{
+#' corr(df, "mpg", "hp")
+#' corr(df, "mpg", "hp", method = "pearson")}
 #' @note corr since 1.6.0
 setMethod("corr",
           signature(x = "SparkDataFrame"),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-20889][SPARKR] Grouped documentation for AGGREGATE column methods

Reply via email to