Repository: spark
Updated Branches:
  refs/heads/branch-2.0 26d5a8b0d -> 029789611


[SPARK-16508][SPARKR] Fix CRAN undocumented/duplicated arguments warnings.

This PR tries to fix all the remaining "undocumented/duplicated arguments" 
warnings given by CRAN-check.

One left is doc for R `stats::glm` exported in SparkR. To mute that warning, we 
have to also provide document for all arguments of that non-SparkR function.

Some previous conversation is in #14558.

R unit test and `check-cran.sh` script (with no-test).

Author: Junyang Qian <junya...@databricks.com>

Closes #14705 from junyangq/SPARK-16508-master.

(cherry picked from commit 01401e965b58f7e8ab615764a452d7d18f1d4bf0)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/02978961
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/02978961
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/02978961

Branch: refs/heads/branch-2.0
Commit: 0297896119e11f23da4b14f62f50ec72b5fac57f
Parents: 26d5a8b
Author: Junyang Qian <junya...@databricks.com>
Authored: Sat Aug 20 06:59:23 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Sun Aug 21 11:23:03 2016 -0700

----------------------------------------------------------------------
 R/pkg/R/DataFrame.R  | 219 ++++++++++++++++++++++++++--------------------
 R/pkg/R/SQLContext.R |  30 ++++---
 R/pkg/R/WindowSpec.R |  11 ++-
 R/pkg/R/column.R     |  18 +++-
 R/pkg/R/functions.R  | 173 ++++++++++++++++++++++++------------
 R/pkg/R/generics.R   |  61 ++++++++++---
 R/pkg/R/group.R      |   7 +-
 R/pkg/R/mllib.R      | 108 ++++++++++++-----------
 R/pkg/R/schema.R     |   5 +-
 R/pkg/R/sparkR.R     |  21 ++---
 R/pkg/R/stats.R      |  25 +++---
 11 files changed, 415 insertions(+), 263 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/DataFrame.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 92e60e7..0266939 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -120,8 +120,9 @@ setMethod("schema",
 #'
 #' Print the logical and physical Catalyst plans to the console for debugging.
 #'
-#' @param x A SparkDataFrame
+#' @param x a SparkDataFrame.
 #' @param extended Logical. If extended is FALSE, explain() only prints the 
physical plan.
+#' @param ... further arguments to be passed to or from other methods.
 #' @family SparkDataFrame functions
 #' @aliases explain,SparkDataFrame-method
 #' @rdname explain
@@ -177,11 +178,11 @@ setMethod("isLocal",
 #'
 #' Print the first numRows rows of a SparkDataFrame
 #'
-#' @param x A SparkDataFrame
-#' @param numRows The number of rows to print. Defaults to 20.
-#' @param truncate Whether truncate long strings. If true, strings more than 
20 characters will be
-#' truncated and all cells will be aligned right
-#'
+#' @param x a SparkDataFrame.
+#' @param numRows the number of rows to print. Defaults to 20.
+#' @param truncate whether truncate long strings. If \code{TRUE}, strings more 
than
+#'                 20 characters will be truncated and all cells will be 
aligned right.
+#' @param ... further arguments to be passed to or from other methods.
 #' @family SparkDataFrame functions
 #' @aliases showDF,SparkDataFrame-method
 #' @rdname showDF
@@ -206,7 +207,7 @@ setMethod("showDF",
 #'
 #' Print the SparkDataFrame column names and types
 #'
-#' @param x A SparkDataFrame
+#' @param object a SparkDataFrame.
 #'
 #' @family SparkDataFrame functions
 #' @rdname show
@@ -257,11 +258,11 @@ setMethod("dtypes",
             })
           })
 
-#' Column names
+#' Column Names of SparkDataFrame
 #'
-#' Return all column names as a list
+#' Return all column names as a list.
 #'
-#' @param x A SparkDataFrame
+#' @param x a SparkDataFrame.
 #'
 #' @family SparkDataFrame functions
 #' @rdname columns
@@ -318,6 +319,8 @@ setMethod("colnames",
             columns(x)
           })
 
+#' @param value a character vector. Must have the same length as the number
+#'              of columns in the SparkDataFrame.
 #' @rdname columns
 #' @aliases colnames<-,SparkDataFrame-method
 #' @name colnames<-
@@ -509,9 +512,10 @@ setMethod("registerTempTable",
 #'
 #' Insert the contents of a SparkDataFrame into a table registered in the 
current SparkSession.
 #'
-#' @param x A SparkDataFrame
-#' @param tableName A character vector containing the name of the table
-#' @param overwrite A logical argument indicating whether or not to overwrite
+#' @param x a SparkDataFrame.
+#' @param tableName a character vector containing the name of the table.
+#' @param overwrite a logical argument indicating whether or not to overwrite.
+#' @param ... further arguments to be passed to or from other methods.
 #' the existing rows in the table.
 #'
 #' @family SparkDataFrame functions
@@ -570,7 +574,9 @@ setMethod("cache",
 #' supported storage levels, refer to
 #' 
\url{http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence}.
 #'
-#' @param x The SparkDataFrame to persist
+#' @param x the SparkDataFrame to persist.
+#' @param newLevel storage level chosen for the persistance. See available 
options in
+#'        the description.
 #'
 #' @family SparkDataFrame functions
 #' @rdname persist
@@ -598,8 +604,9 @@ setMethod("persist",
 #' Mark this SparkDataFrame as non-persistent, and remove all blocks for it 
from memory and
 #' disk.
 #'
-#' @param x The SparkDataFrame to unpersist
-#' @param blocking Whether to block until all blocks are deleted
+#' @param x the SparkDataFrame to unpersist.
+#' @param blocking whether to block until all blocks are deleted.
+#' @param ... further arguments to be passed to or from other methods.
 #'
 #' @family SparkDataFrame functions
 #' @rdname unpersist-methods
@@ -633,9 +640,10 @@ setMethod("unpersist",
 #'  \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
 #'                      using `spark.sql.shuffle.partitions` as number of 
partitions.}
 #'}
-#' @param x A SparkDataFrame
-#' @param numPartitions The number of partitions to use.
-#' @param col The column by which the partitioning will be performed.
+#' @param x a SparkDataFrame.
+#' @param numPartitions the number of partitions to use.
+#' @param col the column by which the partitioning will be performed.
+#' @param ... additional column(s) to be used in the partitioning.
 #'
 #' @family SparkDataFrame functions
 #' @rdname repartition
@@ -914,11 +922,10 @@ setMethod("sample_frac",
 
 #' Returns the number of rows in a SparkDataFrame
 #'
-#' @param x A SparkDataFrame
-#'
+#' @param x a SparkDataFrame.
 #' @family SparkDataFrame functions
 #' @rdname nrow
-#' @name count
+#' @name nrow
 #' @aliases count,SparkDataFrame-method
 #' @export
 #' @examples
@@ -994,9 +1001,10 @@ setMethod("dim",
 
 #' Collects all the elements of a SparkDataFrame and coerces them into an R 
data.frame.
 #'
-#' @param x A SparkDataFrame
-#' @param stringsAsFactors (Optional) A logical indicating whether or not 
string columns
+#' @param x a SparkDataFrame.
+#' @param stringsAsFactors (Optional) a logical indicating whether or not 
string columns
 #' should be converted to factors. FALSE by default.
+#' @param ... further arguments to be passed to or from other methods.
 #'
 #' @family SparkDataFrame functions
 #' @rdname collect
@@ -1091,8 +1099,10 @@ setMethod("limit",
             dataFrame(res)
           })
 
-#' Take the first NUM rows of a SparkDataFrame and return a the results as a R 
data.frame
+#' Take the first NUM rows of a SparkDataFrame and return the results as a R 
data.frame
 #'
+#' @param x a SparkDataFrame.
+#' @param num number of rows to take.
 #' @family SparkDataFrame functions
 #' @rdname take
 #' @name take
@@ -1119,9 +1129,9 @@ setMethod("take",
 #' then head() returns the first 6 rows in keeping with the current data.frame
 #' convention in R.
 #'
-#' @param x A SparkDataFrame
-#' @param num The number of rows to return. Default is 6.
-#' @return A data.frame
+#' @param x a SparkDataFrame.
+#' @param num the number of rows to return. Default is 6.
+#' @return A data.frame.
 #'
 #' @family SparkDataFrame functions
 #' @aliases head,SparkDataFrame-method
@@ -1145,7 +1155,8 @@ setMethod("head",
 
 #' Return the first row of a SparkDataFrame
 #'
-#' @param x A SparkDataFrame
+#' @param x a SparkDataFrame or a column used in aggregation function.
+#' @param ... further arguments to be passed to or from other methods.
 #'
 #' @family SparkDataFrame functions
 #' @aliases first,SparkDataFrame-method
@@ -1196,8 +1207,9 @@ setMethod("toRDD",
 #'
 #' Groups the SparkDataFrame using the specified columns, so we can run 
aggregation on them.
 #'
-#' @param x a SparkDataFrame
-#' @return a GroupedData
+#' @param x a SparkDataFrame.
+#' @param ... variable(s) (character names(s) or Column(s)) to group on.
+#' @return A GroupedData.
 #' @family SparkDataFrame functions
 #' @aliases groupBy,SparkDataFrame-method
 #' @rdname groupBy
@@ -1239,7 +1251,6 @@ setMethod("group_by",
 #'
 #' Compute aggregates by specifying a list of columns
 #'
-#' @param x a SparkDataFrame
 #' @family SparkDataFrame functions
 #' @aliases agg,SparkDataFrame-method
 #' @rdname summarize
@@ -1386,16 +1397,15 @@ setMethod("dapplyCollect",
 #' Groups the SparkDataFrame using the specified columns and applies the R 
function to each
 #' group.
 #'
-#' @param x A SparkDataFrame
-#' @param cols Grouping columns
-#' @param func A function to be applied to each group partition specified by 
grouping
+#' @param cols grouping columns.
+#' @param func a function to be applied to each group partition specified by 
grouping
 #'             column of the SparkDataFrame. The function `func` takes as 
argument
 #'             a key - grouping columns and a data frame - a local R 
data.frame.
 #'             The output of `func` is a local R data.frame.
-#' @param schema The schema of the resulting SparkDataFrame after the function 
is applied.
+#' @param schema the schema of the resulting SparkDataFrame after the function 
is applied.
 #'               The schema must match to output of `func`. It has to be 
defined for each
 #'               output column with preferred output column name and 
corresponding data type.
-#' @return a SparkDataFrame
+#' @return A SparkDataFrame.
 #' @family SparkDataFrame functions
 #' @aliases gapply,SparkDataFrame-method
 #' @rdname gapply
@@ -1478,13 +1488,12 @@ setMethod("gapply",
 #' Groups the SparkDataFrame using the specified columns, applies the R 
function to each
 #' group and collects the result back to R as data.frame.
 #'
-#' @param x A SparkDataFrame
-#' @param cols Grouping columns
-#' @param func A function to be applied to each group partition specified by 
grouping
+#' @param cols grouping columns.
+#' @param func a function to be applied to each group partition specified by 
grouping
 #'             column of the SparkDataFrame. The function `func` takes as 
argument
 #'             a key - grouping columns and a data frame - a local R 
data.frame.
 #'             The output of `func` is a local R data.frame.
-#' @return a data.frame
+#' @return A data.frame.
 #' @family SparkDataFrame functions
 #' @aliases gapplyCollect,SparkDataFrame-method
 #' @rdname gapplyCollect
@@ -1631,6 +1640,7 @@ getColumn <- function(x, c) {
   column(callJMethod(x@sdf, "col", c))
 }
 
+#' @param name name of a Column (without being wrapped by \code{""}).
 #' @rdname select
 #' @name $
 #' @aliases $,SparkDataFrame-method
@@ -1640,6 +1650,7 @@ setMethod("$", signature(x = "SparkDataFrame"),
             getColumn(x, name)
           })
 
+#' @param value a Column or NULL. If NULL, the specified Column is dropped.
 #' @rdname select
 #' @name $<-
 #' @aliases $<-,SparkDataFrame-method
@@ -1714,12 +1725,13 @@ setMethod("[", signature(x = "SparkDataFrame"),
 #' Subset
 #'
 #' Return subsets of SparkDataFrame according to given conditions
-#' @param x A SparkDataFrame
-#' @param subset (Optional) A logical expression to filter on rows
-#' @param select expression for the single Column or a list of columns to 
select from the SparkDataFrame
+#' @param x a SparkDataFrame.
+#' @param i,subset (Optional) a logical expression to filter on rows.
+#' @param j,select expression for the single Column or a list of columns to 
select from the SparkDataFrame.
 #' @param drop if TRUE, a Column will be returned if the resulting dataset has 
only one column.
-#' Otherwise, a SparkDataFrame will always be returned.
-#' @return A new SparkDataFrame containing only the rows that meet the 
condition with selected columns
+#'             Otherwise, a SparkDataFrame will always be returned.
+#' @param ... currently not used.
+#' @return A new SparkDataFrame containing only the rows that meet the 
condition with selected columns.
 #' @export
 #' @family SparkDataFrame functions
 #' @aliases subset,SparkDataFrame-method
@@ -1754,9 +1766,12 @@ setMethod("subset", signature(x = "SparkDataFrame"),
 #' Select
 #'
 #' Selects a set of columns with names or Column expressions.
-#' @param x A SparkDataFrame
-#' @param col A list of columns or single Column or name
-#' @return A new SparkDataFrame with selected columns
+#' @param x a SparkDataFrame.
+#' @param col a list of columns or single Column or name.
+#' @param ... additional column(s) if only one column is specified in 
\code{col}.
+#'            If more than one column is assigned in \code{col}, \code{...}
+#'            should be left empty.
+#' @return A new SparkDataFrame with selected columns.
 #' @export
 #' @family SparkDataFrame functions
 #' @rdname select
@@ -1853,9 +1868,9 @@ setMethod("selectExpr",
 #' Return a new SparkDataFrame by adding a column or replacing the existing 
column
 #' that has the same name.
 #'
-#' @param x A SparkDataFrame
-#' @param colName A column name.
-#' @param col A Column expression.
+#' @param x a SparkDataFrame.
+#' @param colName a column name.
+#' @param col a Column expression.
 #' @return A SparkDataFrame with the new column added or the existing column 
replaced.
 #' @family SparkDataFrame functions
 #' @aliases withColumn,SparkDataFrame,character,Column-method
@@ -1884,8 +1899,8 @@ setMethod("withColumn",
 #'
 #' Return a new SparkDataFrame with the specified columns added or replaced.
 #'
-#' @param .data A SparkDataFrame
-#' @param col a named argument of the form name = col
+#' @param .data a SparkDataFrame.
+#' @param ... additional column argument(s) each in the form name = col.
 #' @return A new SparkDataFrame with the new columns added or replaced.
 #' @family SparkDataFrame functions
 #' @aliases mutate,SparkDataFrame-method
@@ -1962,6 +1977,7 @@ setMethod("mutate",
             do.call(select, c(x, colList, deDupCols))
           })
 
+#' @param _data a SparkDataFrame.
 #' @export
 #' @rdname mutate
 #' @aliases transform,SparkDataFrame-method
@@ -2273,11 +2289,18 @@ setMethod("join",
 #'   specified, the common column names in \code{x} and \code{y} will be used.
 #' @param by.x a character vector specifying the joining columns for x.
 #' @param by.y a character vector specifying the joining columns for y.
+#' @param all a boolean value setting \code{all.x} and \code{all.y}
+#'            if any of them are unset.
 #' @param all.x a boolean value indicating whether all the rows in x should
 #'              be including in the join
 #' @param all.y a boolean value indicating whether all the rows in y should
 #'              be including in the join
 #' @param sort a logical argument indicating whether the resulting columns 
should be sorted
+#' @param suffixes a string vector of length 2 used to make colnames of
+#'                 \code{x} and \code{y} unique.
+#'                 The first element is appended to each colname of \code{x}.
+#'                 The second element is appended to each colname of \code{y}.
+#' @param ... additional argument(s) passed to the method.
 #' @details  If all.x and all.y are set to FALSE, a natural join will be 
returned. If
 #'   all.x is set to TRUE and all.y is set to FALSE, a left outer join will
 #'   be returned. If all.x is set to FALSE and all.y is set to TRUE, a right
@@ -2306,7 +2329,7 @@ setMethod("merge",
           signature(x = "SparkDataFrame", y = "SparkDataFrame"),
           function(x, y, by = intersect(names(x), names(y)), by.x = by, by.y = 
by,
                    all = FALSE, all.x = all, all.y = all,
-                   sort = TRUE, suffixes = c("_x", "_y"), ... ) {
+                   sort = TRUE, suffixes = c("_x", "_y"), ...) {
 
             if (length(suffixes) != 2) {
               stop("suffixes must have length 2")
@@ -2459,8 +2482,10 @@ setMethod("unionAll",
 #' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL.
 #' Note that this does not remove duplicate rows across the two 
SparkDataFrames.
 #'
-#' @param x A SparkDataFrame
-#' @param ... Additional SparkDataFrame
+#' @param x a SparkDataFrame.
+#' @param ... additional SparkDataFrame(s).
+#' @param deparse.level currently not used (put here to match the signature of
+#'                      the base implementation).
 #' @return A SparkDataFrame containing the result of the union.
 #' @family SparkDataFrame functions
 #' @aliases rbind,SparkDataFrame-method
@@ -2517,8 +2542,8 @@ setMethod("intersect",
 #' Return a new SparkDataFrame containing rows in this SparkDataFrame
 #' but not in another SparkDataFrame. This is equivalent to `EXCEPT` in SQL.
 #'
-#' @param x A SparkDataFrame
-#' @param y A SparkDataFrame
+#' @param x a SparkDataFrame.
+#' @param y a SparkDataFrame.
 #' @return A SparkDataFrame containing the result of the except operation.
 #' @family SparkDataFrame functions
 #' @aliases except,SparkDataFrame,SparkDataFrame-method
@@ -2559,10 +2584,11 @@ setMethod("except",
 #'         and to not change the existing data.
 #' }
 #'
-#' @param df A SparkDataFrame
-#' @param path A name for the table
-#' @param source A name for external data source
-#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it 
is 'error' by default)
+#' @param df a SparkDataFrame.
+#' @param path a name for the table.
+#' @param source a name for external data source.
+#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it 
is 'error' by default)
+#' @param ... additional argument(s) passed to the method.
 #'
 #' @family SparkDataFrame functions
 #' @aliases write.df,SparkDataFrame,character-method
@@ -2621,10 +2647,11 @@ setMethod("saveDF",
 #'  ignore: The save operation is expected to not save the contents of the 
SparkDataFrame
 #'     and to not change the existing data. \cr
 #'
-#' @param df A SparkDataFrame
-#' @param tableName A name for the table
-#' @param source A name for external data source
-#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it 
is 'error' by default)
+#' @param df a SparkDataFrame.
+#' @param tableName a name for the table.
+#' @param source a name for external data source.
+#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it 
is 'error' by default).
+#' @param ... additional option(s) passed to the method.
 #'
 #' @family SparkDataFrame functions
 #' @aliases saveAsTable,SparkDataFrame,character-method
@@ -2660,10 +2687,10 @@ setMethod("saveAsTable",
 #' Computes statistics for numeric columns.
 #' If no columns are given, this function computes statistics for all 
numerical columns.
 #'
-#' @param x A SparkDataFrame to be computed.
-#' @param col A string of name
-#' @param ... Additional expressions
-#' @return A SparkDataFrame
+#' @param x a SparkDataFrame to be computed.
+#' @param col a string of name.
+#' @param ... additional expressions.
+#' @return A SparkDataFrame.
 #' @family SparkDataFrame functions
 #' @aliases describe,SparkDataFrame,character-method 
describe,SparkDataFrame,ANY-method
 #' @rdname summary
@@ -2698,6 +2725,7 @@ setMethod("describe",
             dataFrame(sdf)
           })
 
+#' @param object a SparkDataFrame to be summarized.
 #' @rdname summary
 #' @name summary
 #' @aliases summary,SparkDataFrame-method
@@ -2713,16 +2741,20 @@ setMethod("summary",
 #'
 #' dropna, na.omit - Returns a new SparkDataFrame omitting rows with null 
values.
 #'
-#' @param x A SparkDataFrame.
+#' @param x a SparkDataFrame.
 #' @param how "any" or "all".
 #'            if "any", drop a row if it contains any nulls.
 #'            if "all", drop a row only if all its values are null.
 #'            if minNonNulls is specified, how is ignored.
-#' @param minNonNulls If specified, drop rows that have less than
+#' @param minNonNulls if specified, drop rows that have less than
 #'                    minNonNulls non-null values.
 #'                    This overwrites the how parameter.
-#' @param cols Optional list of column names to consider.
-#' @return A SparkDataFrame
+#' @param cols optional list of column names to consider. In `fillna`,
+#'             columns specified in cols that do not have matching data
+#'             type are ignored. For example, if value is a character, and
+#'             subset contains a non-character column, then the non-character
+#'             column is simply ignored.
+#' @return A SparkDataFrame.
 #'
 #' @family SparkDataFrame functions
 #' @rdname nafunctions
@@ -2754,6 +2786,8 @@ setMethod("dropna",
             dataFrame(sdf)
           })
 
+#' @param object a SparkDataFrame.
+#' @param ... further arguments to be passed to or from other methods.
 #' @rdname nafunctions
 #' @name na.omit
 #' @aliases na.omit,SparkDataFrame-method
@@ -2767,18 +2801,12 @@ setMethod("na.omit",
 
 #' fillna - Replace null values.
 #'
-#' @param x A SparkDataFrame.
-#' @param value Value to replace null values with.
+#' @param value value to replace null values with.
 #'              Should be an integer, numeric, character or named list.
 #'              If the value is a named list, then cols is ignored and
 #'              value must be a mapping from column name (character) to
 #'              replacement value. The replacement value must be an
 #'              integer, numeric or character.
-#' @param cols optional list of column names to consider.
-#'             Columns specified in cols that do not have matching data
-#'             type are ignored. For example, if value is a character, and
-#'             subset contains a non-character column, then the non-character
-#'             column is simply ignored.
 #'
 #' @rdname nafunctions
 #' @name fillna
@@ -2843,8 +2871,11 @@ setMethod("fillna",
 #' Since data.frames are held in memory, ensure that you have enough memory
 #' in your system to accommodate the contents.
 #'
-#' @param x a SparkDataFrame
-#' @return a data.frame
+#' @param x a SparkDataFrame.
+#' @param row.names NULL or a character vector giving the row names for the 
data frame.
+#' @param optional If `TRUE`, converting column names is optional.
+#' @param ... additional arguments to pass to base::as.data.frame.
+#' @return A data.frame.
 #' @family SparkDataFrame functions
 #' @aliases as.data.frame,SparkDataFrame-method
 #' @rdname as.data.frame
@@ -2998,9 +3029,10 @@ setMethod("str",
 #' Returns a new SparkDataFrame with columns dropped.
 #' This is a no-op if schema doesn't contain column name(s).
 #'
-#' @param x A SparkDataFrame.
-#' @param cols A character vector of column names or a Column.
-#' @return A SparkDataFrame
+#' @param x a SparkDataFrame.
+#' @param col a character vector of column names or a Column.
+#' @param ... further arguments to be passed to or from other methods.
+#' @return A SparkDataFrame.
 #'
 #' @family SparkDataFrame functions
 #' @rdname drop
@@ -3019,7 +3051,7 @@ setMethod("str",
 #' @note drop since 2.0.0
 setMethod("drop",
           signature(x = "SparkDataFrame"),
-          function(x, col) {
+          function(x, col, ...) {
             stopifnot(class(col) == "character" || class(col) == "Column")
 
             if (class(col) == "Column") {
@@ -3047,8 +3079,8 @@ setMethod("drop",
 #'
 #' @name histogram
 #' @param nbins the number of bins (optional). Default value is 10.
+#' @param col the column as Character string or a Column to build the 
histogram from.
 #' @param df the SparkDataFrame containing the Column to build the histogram 
from.
-#' @param colname the name of the column to build the histogram from.
 #' @return a data.frame with the histogram statistics, i.e., counts and 
centroids.
 #' @rdname histogram
 #' @aliases histogram,SparkDataFrame,characterOrColumn-method
@@ -3179,10 +3211,11 @@ setMethod("histogram",
 #'         and to not change the existing data.
 #' }
 #'
-#' @param x A SparkDataFrame
-#' @param url JDBC database url of the form `jdbc:subprotocol:subname`
-#' @param tableName The name of the table in the external database
-#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it 
is 'error' by default)
+#' @param x s SparkDataFrame.
+#' @param url JDBC database url of the form `jdbc:subprotocol:subname`.
+#' @param tableName yhe name of the table in the external database.
+#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it 
is 'error' by default).
+#' @param ... additional JDBC database connection properties.
 #' @family SparkDataFrame functions
 #' @rdname write.jdbc
 #' @name write.jdbc

http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/SQLContext.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 0c06bba..a9cd2d8 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -165,9 +165,9 @@ getDefaultSqlSource <- function() {
 #'
 #' Converts R data.frame or list into SparkDataFrame.
 #'
-#' @param data An RDD or list or data.frame
-#' @param schema a list of column names or named list (StructType), optional
-#' @return a SparkDataFrame
+#' @param data an RDD or list or data.frame.
+#' @param schema a list of column names or named list (StructType), optional.
+#' @return A SparkDataFrame.
 #' @rdname createDataFrame
 #' @export
 #' @examples
@@ -257,23 +257,25 @@ createDataFrame.default <- function(data, schema = NULL, 
samplingRatio = 1.0) {
 }
 
 createDataFrame <- function(x, ...) {
-  dispatchFunc("createDataFrame(data, schema = NULL, samplingRatio = 1.0)", x, 
...)
+  dispatchFunc("createDataFrame(data, schema = NULL)", x, ...)
 }
 
+#' @param samplingRatio Currently not used.
 #' @rdname createDataFrame
 #' @aliases createDataFrame
 #' @export
 #' @method as.DataFrame default
 #' @note as.DataFrame since 1.6.0
 as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) {
-  createDataFrame(data, schema, samplingRatio)
+  createDataFrame(data, schema)
 }
 
+#' @param ... additional argument(s).
 #' @rdname createDataFrame
 #' @aliases as.DataFrame
 #' @export
-as.DataFrame <- function(x, ...) {
-  dispatchFunc("as.DataFrame(data, schema = NULL, samplingRatio = 1.0)", x, 
...)
+as.DataFrame <- function(data, ...) {
+  dispatchFunc("as.DataFrame(data, schema = NULL)", data, ...)
 }
 
 #' toDF
@@ -398,7 +400,7 @@ read.orc <- function(path) {
 #'
 #' Loads a Parquet file, returning the result as a SparkDataFrame.
 #'
-#' @param path Path of file to read. A vector of multiple paths is allowed.
+#' @param path path of file to read. A vector of multiple paths is allowed.
 #' @return SparkDataFrame
 #' @rdname read.parquet
 #' @export
@@ -418,6 +420,7 @@ read.parquet <- function(x, ...) {
   dispatchFunc("read.parquet(...)", x, ...)
 }
 
+#' @param ... argument(s) passed to the method.
 #' @rdname read.parquet
 #' @name parquetFile
 #' @export
@@ -727,6 +730,7 @@ dropTempView <- function(viewName) {
 #' @param source The name of external data source
 #' @param schema The data schema defined in structType
 #' @param na.strings Default string value for NA when source is "csv"
+#' @param ... additional external data source specific named properties.
 #' @return SparkDataFrame
 #' @rdname read.df
 #' @name read.df
@@ -791,10 +795,11 @@ loadDF <- function(x, ...) {
 #' If `source` is not specified, the default data source configured by
 #' "spark.sql.sources.default" will be used.
 #'
-#' @param tableName A name of the table
-#' @param path The path of files to load
-#' @param source the name of external data source
-#' @return SparkDataFrame
+#' @param tableName a name of the table.
+#' @param path the path of files to load.
+#' @param source the name of external data source.
+#' @param ... additional argument(s) passed to the method.
+#' @return A SparkDataFrame.
 #' @rdname createExternalTable
 #' @export
 #' @examples
@@ -840,6 +845,7 @@ createExternalTable <- function(x, ...) {
 #'                      clause expressions used to split the column 
`partitionColumn` evenly.
 #'                      This defaults to SparkContext.defaultParallelism when 
unset.
 #' @param predicates a list of conditions in the where clause; each one 
defines one partition
+#' @param ... additional JDBC database connection named propertie(s).
 #' @return SparkDataFrame
 #' @rdname read.jdbc
 #' @name read.jdbc

http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/WindowSpec.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
index 751ba3f..b55356b 100644
--- a/R/pkg/R/WindowSpec.R
+++ b/R/pkg/R/WindowSpec.R
@@ -54,8 +54,10 @@ setMethod("show", "WindowSpec",
 #'
 #' Defines the partitioning columns in a WindowSpec.
 #'
-#' @param x a WindowSpec
-#' @return a WindowSpec
+#' @param x a WindowSpec.
+#' @param col a column to partition on (desribed by the name or Column).
+#' @param ... additional column(s) to partition on.
+#' @return A WindowSpec.
 #' @rdname partitionBy
 #' @name partitionBy
 #' @aliases partitionBy,WindowSpec-method
@@ -86,7 +88,7 @@ setMethod("partitionBy",
 #'
 #' Defines the ordering columns in a WindowSpec.
 #' @param x a WindowSpec
-#' @param col a character or Column object indicating an ordering column
+#' @param col a character or Column indicating an ordering column
 #' @param ... additional sorting fields
 #' @return A WindowSpec.
 #' @name orderBy
@@ -192,6 +194,9 @@ setMethod("rangeBetween",
 #'
 #' Define a windowing column.
 #'
+#' @param x a Column, usually one returned by window function(s).
+#' @param window a WindowSpec object. Can be created by `windowPartitionBy` or
+#'        `windowOrderBy` and configured by other WindowSpec methods.
 #' @rdname over
 #' @name over
 #' @aliases over,Column,WindowSpec-method

http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/column.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 0edb9d2..af486e1 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -163,8 +163,9 @@ setMethod("alias",
 #' @family colum_func
 #' @aliases substr,Column-method
 #'
-#' @param start starting position
-#' @param stop ending position
+#' @param x a Column.
+#' @param start starting position.
+#' @param stop ending position.
 #' @note substr since 1.4.0
 setMethod("substr", signature(x = "Column"),
           function(x, start, stop) {
@@ -219,6 +220,7 @@ setMethod("endsWith", signature(x = "Column"),
 #' @family colum_func
 #' @aliases between,Column-method
 #'
+#' @param x a Column
 #' @param bounds lower and upper bounds
 #' @note between since 1.5.0
 setMethod("between", signature(x = "Column"),
@@ -233,6 +235,11 @@ setMethod("between", signature(x = "Column"),
 
 #' Casts the column to a different data type.
 #'
+#' @param x a Column.
+#' @param dataType a character object describing the target data type.
+#'        See
+#'        
\href{https://spark.apache.org/docs/latest/sparkr.html#data-type-mapping-between-r-and-spark}{
+#'        Spark Data Types} for available data types.
 #' @rdname cast
 #' @name cast
 #' @family colum_func
@@ -254,10 +261,12 @@ setMethod("cast",
 
 #' Match a column with given values.
 #'
+#' @param x a Column.
+#' @param table a collection of values (coercible to list) to compare with.
 #' @rdname match
 #' @name %in%
 #' @aliases %in%,Column-method
-#' @return a matched values as a result of comparing with given values.
+#' @return A matched values as a result of comparing with given values.
 #' @export
 #' @examples
 #' \dontrun{
@@ -277,6 +286,9 @@ setMethod("%in%",
 #' If values in the specified column are null, returns the value.
 #' Can be used in conjunction with `when` to specify a default value for 
expressions.
 #'
+#' @param x a Column.
+#' @param value value to replace when the corresponding entry in \code{x} is 
NA.
+#'              Can be a single value or a Column.
 #' @rdname otherwise
 #' @name otherwise
 #' @family colum_func

http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 573c915..b3c10de 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -23,6 +23,7 @@ NULL
 #' A new \linkS4class{Column} is created to represent the literal value.
 #' If the parameter is a \linkS4class{Column}, it is returned unchanged.
 #'
+#' @param x a literal value or a Column.
 #' @family normal_funcs
 #' @rdname lit
 #' @name lit
@@ -89,8 +90,6 @@ setMethod("acos",
 #' Returns the approximate number of distinct items in a group. This is a 
column
 #' aggregate function.
 #'
-#' @param x Column to compute on.
-#'
 #' @rdname approxCountDistinct
 #' @name approxCountDistinct
 #' @return the approximate number of distinct items in a group.
@@ -171,8 +170,6 @@ setMethod("atan",
 #'
 #' Aggregate function: returns the average of the values in a group.
 #'
-#' @param x Column to compute on.
-#'
 #' @rdname avg
 #' @name avg
 #' @family agg_funcs
@@ -319,7 +316,7 @@ setMethod("column",
 #'
 #' Computes the Pearson Correlation Coefficient for two Columns.
 #'
-#' @param x Column to compute on.
+#' @param col2 a (second) Column.
 #'
 #' @rdname corr
 #' @name corr
@@ -339,8 +336,6 @@ setMethod("corr", signature(x = "Column"),
 #'
 #' Compute the sample covariance between two expressions.
 #'
-#' @param x Column to compute on.
-#'
 #' @rdname cov
 #' @name cov
 #' @family math_funcs
@@ -362,8 +357,8 @@ setMethod("cov", signature(x = "characterOrColumn"),
 
 #' @rdname cov
 #'
-#' @param col1 First column to compute cov_samp.
-#' @param col2 Second column to compute cov_samp.
+#' @param col1 the first Column.
+#' @param col2 the second Column.
 #' @name covar_samp
 #' @aliases covar_samp,characterOrColumn,characterOrColumn-method
 #' @note covar_samp since 2.0.0
@@ -451,9 +446,7 @@ setMethod("cosh",
 #'
 #' Returns the number of items in a group. This is a column aggregate function.
 #'
-#' @param x Column to compute on.
-#'
-#' @rdname nrow
+#' @rdname count
 #' @name count
 #' @family agg_funcs
 #' @aliases count,Column-method
@@ -493,6 +486,7 @@ setMethod("crc32",
 #' Calculates the hash code of given columns, and returns the result as a int 
column.
 #'
 #' @param x Column to compute on.
+#' @param ... additional Column(s) to be included.
 #'
 #' @rdname hash
 #' @name hash
@@ -663,7 +657,8 @@ setMethod("factorial",
 #' The function by default returns the first values it sees. It will return 
the first non-missing
 #' value it sees when na.rm is set to true. If all values are missing, then NA 
is returned.
 #'
-#' @param x Column to compute on.
+#' @param na.rm a logical value indicating whether NA values should be stripped
+#'        before the computation proceeds.
 #'
 #' @rdname first
 #' @name first
@@ -832,7 +827,10 @@ setMethod("kurtosis",
 #' The function by default returns the last values it sees. It will return the 
last non-missing
 #' value it sees when na.rm is set to true. If all values are missing, then NA 
is returned.
 #'
-#' @param x Column to compute on.
+#' @param x column to compute on.
+#' @param na.rm a logical value indicating whether NA values should be stripped
+#'        before the computation proceeds.
+#' @param ... further arguments to be passed to or from other methods.
 #'
 #' @rdname last
 #' @name last
@@ -1143,7 +1141,7 @@ setMethod("minute",
 #' @export
 #' @examples \dontrun{select(df, monotonically_increasing_id())}
 setMethod("monotonically_increasing_id",
-          signature(x = "missing"),
+          signature("missing"),
           function() {
             jc <- callJStatic("org.apache.spark.sql.functions", 
"monotonically_increasing_id")
             column(jc)
@@ -1272,13 +1270,16 @@ setMethod("round",
 
 #' bround
 #'
-#' Returns the value of the column `e` rounded to `scale` decimal places using 
HALF_EVEN rounding
-#' mode if `scale` >= 0 or at integral part when `scale` < 0.
+#' Returns the value of the column \code{e} rounded to \code{scale} decimal 
places using HALF_EVEN rounding
+#' mode if \code{scale} >= 0 or at integer part when \code{scale} < 0.
 #' Also known as Gaussian rounding or bankers' rounding that rounds to the 
nearest even number.
 #' bround(2.5, 0) = 2, bround(3.5, 0) = 4.
 #'
 #' @param x Column to compute on.
-#'
+#' @param scale round to \code{scale} digits to the right of the decimal point 
when \code{scale} > 0,
+#'        the nearest even number when \code{scale} = 0, and \code{scale} 
digits to the left
+#'        of the decimal point when \code{scale} < 0.
+#' @param ... further arguments to be passed to or from other methods.
 #' @rdname bround
 #' @name bround
 #' @family math_funcs
@@ -1319,7 +1320,7 @@ setMethod("rtrim",
 #' Aggregate function: alias for \link{stddev_samp}
 #'
 #' @param x Column to compute on.
-#'
+#' @param na.rm currently not used.
 #' @rdname sd
 #' @name sd
 #' @family agg_funcs
@@ -1497,7 +1498,7 @@ setMethod("soundex",
 #' \dontrun{select(df, spark_partition_id())}
 #' @note spark_partition_id since 2.0.0
 setMethod("spark_partition_id",
-          signature(x = "missing"),
+          signature("missing"),
           function() {
             jc <- callJStatic("org.apache.spark.sql.functions", 
"spark_partition_id")
             column(jc)
@@ -1560,7 +1561,8 @@ setMethod("stddev_samp",
 #'
 #' Creates a new struct column that composes multiple input columns.
 #'
-#' @param x Column to compute on.
+#' @param x a column to compute on.
+#' @param ... optional column(s) to be included.
 #'
 #' @rdname struct
 #' @name struct
@@ -1831,8 +1833,8 @@ setMethod("upper",
 #'
 #' Aggregate function: alias for \link{var_samp}.
 #'
-#' @param x Column to compute on.
-#'
+#' @param x a Column to compute on.
+#' @param y,na.rm,use currently not used.
 #' @rdname var
 #' @name var
 #' @family agg_funcs
@@ -2114,7 +2116,9 @@ setMethod("pmod", signature(y = "Column"),
 #' @rdname approxCountDistinct
 #' @name approxCountDistinct
 #'
+#' @param x Column to compute on.
 #' @param rsd maximum estimation error allowed (default = 0.05)
+#' @param ... further arguments to be passed to or from other methods.
 #'
 #' @aliases approxCountDistinct,Column-method
 #' @export
@@ -2127,7 +2131,7 @@ setMethod("approxCountDistinct",
             column(jc)
           })
 
-#' Count Distinct
+#' Count Distinct Values
 #'
 #' @param x Column to compute on
 #' @param ... other columns
@@ -2156,7 +2160,7 @@ setMethod("countDistinct",
 #' concat
 #'
 #' Concatenates multiple input string columns together into a single string 
column.
-#' 
+#'
 #' @param x Column to compute on
 #' @param ... other columns
 #'
@@ -2246,7 +2250,6 @@ setMethod("ceiling",
           })
 
 #' @rdname sign
-#' @param x Column to compute on
 #'
 #' @name sign
 #' @aliases sign,Column-method
@@ -2262,9 +2265,6 @@ setMethod("sign", signature(x = "Column"),
 #'
 #' Aggregate function: returns the number of distinct items in a group.
 #'
-#' @param x Column to compute on
-#' @param ... other columns
-#'
 #' @rdname countDistinct
 #' @name n_distinct
 #' @aliases n_distinct,Column-method
@@ -2276,9 +2276,7 @@ setMethod("n_distinct", signature(x = "Column"),
             countDistinct(x, ...)
           })
 
-#' @rdname nrow
-#' @param x Column to compute on
-#'
+#' @rdname count
 #' @name n
 #' @aliases n,Column-method
 #' @export
@@ -2300,8 +2298,8 @@ setMethod("n", signature(x = "Column"),
 #' NOTE: Use when ever possible specialized functions like \code{year}. These 
benefit from a
 #' specialized implementation.
 #'
-#' @param y Column to compute on
-#' @param x date format specification 
+#' @param y Column to compute on.
+#' @param x date format specification.
 #'
 #' @family datetime_funcs
 #' @rdname date_format
@@ -2320,8 +2318,8 @@ setMethod("date_format", signature(y = "Column", x = 
"character"),
 #'
 #' Assumes given timestamp is UTC and converts to given timezone.
 #'
-#' @param y Column to compute on
-#' @param x time zone to use 
+#' @param y Column to compute on.
+#' @param x time zone to use.
 #'
 #' @family datetime_funcs
 #' @rdname from_utc_timestamp
@@ -2370,8 +2368,8 @@ setMethod("instr", signature(y = "Column", x = 
"character"),
 #' Day of the week parameter is case insensitive, and accepts first three or 
two characters:
 #' "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
 #'
-#' @param y Column to compute on
-#' @param x Day of the week string 
+#' @param y Column to compute on.
+#' @param x Day of the week string.
 #'
 #' @family datetime_funcs
 #' @rdname next_day
@@ -2637,6 +2635,7 @@ setMethod("conv", signature(x = "Column", fromBase = 
"numeric", toBase = "numeri
 #' Parses the expression string into the column that it represents, similar to
 #' SparkDataFrame.selectExpr
 #'
+#' @param x an expression character object to be parsed.
 #' @family normal_funcs
 #' @rdname expr
 #' @aliases expr,character-method
@@ -2654,6 +2653,9 @@ setMethod("expr", signature(x = "character"),
 #'
 #' Formats the arguments in printf-style and returns the result as a string 
column.
 #'
+#' @param format a character object of format strings.
+#' @param x a Column.
+#' @param ... additional Column(s).
 #' @family string_funcs
 #' @rdname format_string
 #' @name format_string
@@ -2676,6 +2678,11 @@ setMethod("format_string", signature(format = 
"character", x = "Column"),
 #' representing the timestamp of that moment in the current system time zone 
in the given
 #' format.
 #'
+#' @param x a Column of unix timestamp.
+#' @param format the target format. See
+#'               
\href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{
+#'               Customizing Formats} for available options.
+#' @param ... further arguments to be passed to or from other methods.
 #' @family datetime_funcs
 #' @rdname from_unixtime
 #' @name from_unixtime
@@ -2702,19 +2709,21 @@ setMethod("from_unixtime", signature(x = "Column"),
 #' [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond 
precision. Windows in
 #' the order of months are not supported.
 #'
-#' The time column must be of TimestampType.
-#'
-#' Durations are provided as strings, e.g. '1 second', '1 day 12 hours', '2 
minutes'. Valid
-#' interval strings are 'week', 'day', 'hour', 'minute', 'second', 
'millisecond', 'microsecond'.
-#' If the `slideDuration` is not provided, the windows will be tumbling 
windows.
-#'
-#' The startTime is the offset with respect to 1970-01-01 00:00:00 UTC with 
which to start
-#' window intervals. For example, in order to have hourly tumbling windows 
that start 15 minutes
-#' past the hour, e.g. 12:15-13:15, 13:15-14:15... provide `startTime` as `15 
minutes`.
-#'
-#' The output column will be a struct called 'window' by default with the 
nested columns 'start'
-#' and 'end'.
-#'
+#' @param x a time Column. Must be of TimestampType.
+#' @param windowDuration a string specifying the width of the window, e.g. '1 
second',
+#'                       '1 day 12 hours', '2 minutes'. Valid interval strings 
are 'week',
+#'                       'day', 'hour', 'minute', 'second', 'millisecond', 
'microsecond'.
+#' @param slideDuration a string specifying the sliding interval of the 
window. Same format as
+#'                      \code{windowDuration}. A new window will be generated 
every
+#'                      \code{slideDuration}. Must be less than or equal to
+#'                      the \code{windowDuration}.
+#' @param startTime the offset with respect to 1970-01-01 00:00:00 UTC with 
which to start
+#'                  window intervals. For example, in order to have hourly 
tumbling windows
+#'                  that start 15 minutes past the hour, e.g. 12:15-13:15, 
13:15-14:15... provide
+#'                  \code{startTime} as \code{"15 minutes"}.
+#' @param ... further arguments to be passed to or from other methods.
+#' @return An output column of struct called 'window' by default with the 
nested columns 'start'
+#'         and 'end'.
 #' @family datetime_funcs
 #' @rdname window
 #' @name window
@@ -2766,6 +2775,10 @@ setMethod("window", signature(x = "Column"),
 #' NOTE: The position is not zero based, but 1 based index, returns 0 if substr
 #' could not be found in str.
 #'
+#' @param substr a character string to be matched.
+#' @param str a Column where matches are sought for each entry.
+#' @param pos start position of search.
+#' @param ... further arguments to be passed to or from other methods.
 #' @family string_funcs
 #' @rdname locate
 #' @aliases locate,character,Column-method
@@ -2785,6 +2798,9 @@ setMethod("locate", signature(substr = "character", str = 
"Column"),
 #'
 #' Left-pad the string column with
 #'
+#' @param x the string Column to be left-padded.
+#' @param len maximum length of each output result.
+#' @param pad a character string to be padded with.
 #' @family string_funcs
 #' @rdname lpad
 #' @aliases lpad,Column,numeric,character-method
@@ -2804,6 +2820,7 @@ setMethod("lpad", signature(x = "Column", len = 
"numeric", pad = "character"),
 #'
 #' Generate a random column with i.i.d. samples from U[0.0, 1.0].
 #'
+#' @param seed a random seed. Can be missing.
 #' @family normal_funcs
 #' @rdname rand
 #' @name rand
@@ -2832,6 +2849,7 @@ setMethod("rand", signature(seed = "numeric"),
 #'
 #' Generate a column with i.i.d. samples from the standard normal distribution.
 #'
+#' @param seed a random seed. Can be missing.
 #' @family normal_funcs
 #' @rdname randn
 #' @name randn
@@ -2860,6 +2878,9 @@ setMethod("randn", signature(seed = "numeric"),
 #'
 #' Extract a specific(idx) group identified by a java regex, from the 
specified string column.
 #'
+#' @param x a string Column.
+#' @param pattern a regular expression.
+#' @param idx a group index.
 #' @family string_funcs
 #' @rdname regexp_extract
 #' @name regexp_extract
@@ -2880,6 +2901,9 @@ setMethod("regexp_extract",
 #'
 #' Replace all substrings of the specified string value that match regexp with 
rep.
 #'
+#' @param x a string Column.
+#' @param pattern a regular expression.
+#' @param replacement a character string that a matched \code{pattern} is 
replaced with.
 #' @family string_funcs
 #' @rdname regexp_replace
 #' @name regexp_replace
@@ -2900,6 +2924,9 @@ setMethod("regexp_replace",
 #'
 #' Right-padded with pad to a length of len.
 #'
+#' @param x the string Column to be right-padded.
+#' @param len maximum length of each output result.
+#' @param pad a character string to be padded with.
 #' @family string_funcs
 #' @rdname rpad
 #' @name rpad
@@ -2922,6 +2949,11 @@ setMethod("rpad", signature(x = "Column", len = 
"numeric", pad = "character"),
 #' returned. If count is negative, every to the right of the final delimiter 
(counting from the
 #' right) is returned. substring_index performs a case-sensitive match when 
searching for delim.
 #'
+#' @param x a Column.
+#' @param delim a delimiter string.
+#' @param count number of occurrences of \code{delim} before the substring is 
returned.
+#'              A positive number means counting from the left, while negative 
means
+#'              counting from the right.
 #' @family string_funcs
 #' @rdname substring_index
 #' @aliases substring_index,Column,character,numeric-method
@@ -2949,6 +2981,11 @@ setMethod("substring_index",
 #' The translate will happen when any character in the string matching with 
the character
 #' in the matchingString.
 #'
+#' @param x a string Column.
+#' @param matchingString a source string where each character will be 
translated.
+#' @param replaceString a target string where each \code{matchingString} 
character will
+#'                      be replaced by the character in \code{replaceString}
+#'                      at the same location, if any.
 #' @family string_funcs
 #' @rdname translate
 #' @name translate
@@ -2997,6 +3034,10 @@ setMethod("unix_timestamp", signature(x = "Column", 
format = "missing"),
             column(jc)
           })
 
+#' @param x a Column of date, in string, date or timestamp type.
+#' @param format the target format. See
+#'               
\href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{
+#'               Customizing Formats} for available options.
 #' @rdname unix_timestamp
 #' @name unix_timestamp
 #' @aliases unix_timestamp,Column,character-method
@@ -3012,6 +3053,8 @@ setMethod("unix_timestamp", signature(x = "Column", 
format = "character"),
 #' Evaluates a list of conditions and returns one of multiple possible result 
expressions.
 #' For unmatched expressions null is returned.
 #'
+#' @param condition the condition to test on. Must be a Column expression.
+#' @param value result expression.
 #' @family normal_funcs
 #' @rdname when
 #' @name when
@@ -3033,6 +3076,9 @@ setMethod("when", signature(condition = "Column", value = 
"ANY"),
 #' Evaluates a list of conditions and returns \code{yes} if the conditions are 
satisfied.
 #' Otherwise \code{no} is returned for unmatched conditions.
 #'
+#' @param test a Column expression that describes the condition.
+#' @param yes return values for \code{TRUE} elements of test.
+#' @param no return values for \code{FALSE} elements of test.
 #' @family normal_funcs
 #' @rdname ifelse
 #' @name ifelse
@@ -3074,10 +3120,14 @@ setMethod("ifelse",
 #' @family window_funcs
 #' @aliases cume_dist,missing-method
 #' @export
-#' @examples \dontrun{cume_dist()}
+#' @examples \dontrun{
+#'   df <- createDataFrame(iris)
+#'   ws <- orderBy(windowPartitionBy("Species"), "Sepal_Length")
+#'   out <- select(df, over(cume_dist(), ws), df$Sepal_Length, df$Species)
+#' }
 #' @note cume_dist since 1.6.0
 setMethod("cume_dist",
-          signature(x = "missing"),
+          signature("missing"),
           function() {
             jc <- callJStatic("org.apache.spark.sql.functions", "cume_dist")
             column(jc)
@@ -3101,7 +3151,7 @@ setMethod("cume_dist",
 #' @examples \dontrun{dense_rank()}
 #' @note dense_rank since 1.6.0
 setMethod("dense_rank",
-          signature(x = "missing"),
+          signature("missing"),
           function() {
             jc <- callJStatic("org.apache.spark.sql.functions", "dense_rank")
             column(jc)
@@ -3115,6 +3165,11 @@ setMethod("dense_rank",
 #'
 #' This is equivalent to the LAG function in SQL.
 #'
+#' @param x the column as a character string or a Column to compute on.
+#' @param offset the number of rows back from the current row from which to 
obtain a value.
+#'               If not specified, the default is 1.
+#' @param defaultValue default to use when the offset row does not exist.
+#' @param ... further arguments to be passed to or from other methods.
 #' @rdname lag
 #' @name lag
 #' @aliases lag,characterOrColumn-method
@@ -3143,7 +3198,7 @@ setMethod("lag",
 #' an `offset` of one will return the next row at any given point in the 
window partition.
 #'
 #' This is equivalent to the LEAD function in SQL.
-#' 
+#'
 #' @param x Column to compute on
 #' @param offset Number of rows to offset
 #' @param defaultValue (Optional) default value to use
@@ -3211,7 +3266,7 @@ setMethod("ntile",
 #' @examples \dontrun{percent_rank()}
 #' @note percent_rank since 1.6.0
 setMethod("percent_rank",
-          signature(x = "missing"),
+          signature("missing"),
           function() {
             jc <- callJStatic("org.apache.spark.sql.functions", "percent_rank")
             column(jc)
@@ -3243,6 +3298,8 @@ setMethod("rank",
           })
 
 # Expose rank() in the R base package
+#' @param x a numeric, complex, character or logical vector.
+#' @param ... additional argument(s) passed to the method.
 #' @name rank
 #' @rdname rank
 #' @aliases rank,ANY-method
@@ -3267,7 +3324,7 @@ setMethod("rank",
 #' @examples \dontrun{row_number()}
 #' @note row_number since 1.6.0
 setMethod("row_number",
-          signature(x = "missing"),
+          signature("missing"),
           function() {
             jc <- callJStatic("org.apache.spark.sql.functions", "row_number")
             column(jc)
@@ -3318,7 +3375,7 @@ setMethod("explode",
 #' size
 #'
 #' Returns length of array or map.
-#' 
+#'
 #' @param x Column to compute on
 #'
 #' @rdname size

http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 52ab730..70db7cb 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -380,6 +380,9 @@ setGeneric("value", function(bcast) { 
standardGeneric("value") })
 
 ####################  SparkDataFrame Methods ########################
 
+#' @param x a SparkDataFrame or GroupedData.
+#' @param ... further arguments to be passed to or from other methods.
+#' @return A SparkDataFrame.
 #' @rdname summarize
 #' @export
 setGeneric("agg", function (x, ...) { standardGeneric("agg") })
@@ -407,6 +410,8 @@ setGeneric("cache", function(x) { standardGeneric("cache") 
})
 #' @export
 setGeneric("collect", function(x, ...) { standardGeneric("collect") })
 
+#' @param do.NULL currently not used.
+#' @param prefix currently not used.
 #' @rdname columns
 #' @export
 setGeneric("colnames", function(x, do.NULL = TRUE, prefix = "col") { 
standardGeneric("colnames") })
@@ -427,15 +432,24 @@ setGeneric("coltypes<-", function(x, value) { 
standardGeneric("coltypes<-") })
 #' @export
 setGeneric("columns", function(x) {standardGeneric("columns") })
 
-#' @rdname nrow
+#' @param x a GroupedData or Column.
+#' @rdname count
 #' @export
 setGeneric("count", function(x) { standardGeneric("count") })
 
 #' @rdname cov
+#' @param x a Column object or a SparkDataFrame.
+#' @param ... additional argument(s). If `x` is a Column object, a Column 
object
+#'        should be provided. If `x` is a SparkDataFrame, two column names 
should
+#'        be provided.
 #' @export
 setGeneric("cov", function(x, ...) {standardGeneric("cov") })
 
 #' @rdname corr
+#' @param x a Column object or a SparkDataFrame.
+#' @param ... additional argument(s). If `x` is a Column object, a Column 
object
+#'        should be provided. If `x` is a SparkDataFrame, two column names 
should
+#'        be provided.
 #' @export
 setGeneric("corr", function(x, ...) {standardGeneric("corr") })
 
@@ -462,10 +476,14 @@ setGeneric("dapply", function(x, func, schema) { 
standardGeneric("dapply") })
 #' @export
 setGeneric("dapplyCollect", function(x, func) { 
standardGeneric("dapplyCollect") })
 
+#' @param x a SparkDataFrame or GroupedData.
+#' @param ... additional argument(s) passed to the method.
 #' @rdname gapply
 #' @export
 setGeneric("gapply", function(x, ...) { standardGeneric("gapply") })
 
+#' @param x a SparkDataFrame or GroupedData.
+#' @param ... additional argument(s) passed to the method.
 #' @rdname gapplyCollect
 #' @export
 setGeneric("gapplyCollect", function(x, ...) { 
standardGeneric("gapplyCollect") })
@@ -667,8 +685,8 @@ setGeneric("selectExpr", function(x, expr, ...) { 
standardGeneric("selectExpr")
 #' @export
 setGeneric("showDF", function(x, ...) { standardGeneric("showDF") })
 
-# @rdname subset
-# @export
+#' @rdname subset
+#' @export
 setGeneric("subset", function(x, ...) { standardGeneric("subset") })
 
 #' @rdname summarize
@@ -735,6 +753,8 @@ setGeneric("between", function(x, bounds) { 
standardGeneric("between") })
 setGeneric("cast", function(x, dataType) { standardGeneric("cast") })
 
 #' @rdname columnfunctions
+#' @param x a Column object.
+#' @param ... additional argument(s).
 #' @export
 setGeneric("contains", function(x, ...) { standardGeneric("contains") })
 
@@ -830,6 +850,8 @@ setGeneric("array_contains", function(x, value) { 
standardGeneric("array_contain
 #' @export
 setGeneric("ascii", function(x) { standardGeneric("ascii") })
 
+#' @param x Column to compute on or a GroupedData object.
+#' @param ... additional argument(s) when `x` is a GroupedData object.
 #' @rdname avg
 #' @export
 setGeneric("avg", function(x, ...) { standardGeneric("avg") })
@@ -886,9 +908,10 @@ setGeneric("crc32", function(x) { standardGeneric("crc32") 
})
 #' @export
 setGeneric("hash", function(x, ...) { standardGeneric("hash") })
 
+#' @param x empty. Should be used with no argument.
 #' @rdname cume_dist
 #' @export
-setGeneric("cume_dist", function(x) { standardGeneric("cume_dist") })
+setGeneric("cume_dist", function(x = "missing") { standardGeneric("cume_dist") 
})
 
 #' @rdname datediff
 #' @export
@@ -918,9 +941,10 @@ setGeneric("dayofyear", function(x) { 
standardGeneric("dayofyear") })
 #' @export
 setGeneric("decode", function(x, charset) { standardGeneric("decode") })
 
+#' @param x empty. Should be used with no argument.
 #' @rdname dense_rank
 #' @export
-setGeneric("dense_rank", function(x) { standardGeneric("dense_rank") })
+setGeneric("dense_rank", function(x = "missing") { 
standardGeneric("dense_rank") })
 
 #' @rdname encode
 #' @export
@@ -1034,10 +1058,11 @@ setGeneric("md5", function(x) { standardGeneric("md5") 
})
 #' @export
 setGeneric("minute", function(x) { standardGeneric("minute") })
 
+#' @param x empty. Should be used with no argument.
 #' @rdname monotonically_increasing_id
 #' @export
 setGeneric("monotonically_increasing_id",
-           function(x) { standardGeneric("monotonically_increasing_id") })
+           function(x = "missing") { 
standardGeneric("monotonically_increasing_id") })
 
 #' @rdname month
 #' @export
@@ -1047,7 +1072,7 @@ setGeneric("month", function(x) { 
standardGeneric("month") })
 #' @export
 setGeneric("months_between", function(y, x) { 
standardGeneric("months_between") })
 
-#' @rdname nrow
+#' @rdname count
 #' @export
 setGeneric("n", function(x) { standardGeneric("n") })
 
@@ -1071,9 +1096,10 @@ setGeneric("ntile", function(x) { 
standardGeneric("ntile") })
 #' @export
 setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") })
 
+#' @param x empty. Should be used with no argument.
 #' @rdname percent_rank
 #' @export
-setGeneric("percent_rank", function(x) { standardGeneric("percent_rank") })
+setGeneric("percent_rank", function(x = "missing") { 
standardGeneric("percent_rank") })
 
 #' @rdname pmod
 #' @export
@@ -1114,11 +1140,12 @@ setGeneric("reverse", function(x) { 
standardGeneric("reverse") })
 
 #' @rdname rint
 #' @export
-setGeneric("rint", function(x, ...) { standardGeneric("rint") })
+setGeneric("rint", function(x) { standardGeneric("rint") })
 
+#' @param x empty. Should be used with no argument.
 #' @rdname row_number
 #' @export
-setGeneric("row_number", function(x) { standardGeneric("row_number") })
+setGeneric("row_number", function(x = "missing") { 
standardGeneric("row_number") })
 
 #' @rdname rpad
 #' @export
@@ -1176,9 +1203,10 @@ setGeneric("sort_array", function(x, asc = TRUE) { 
standardGeneric("sort_array")
 #' @export
 setGeneric("soundex", function(x) { standardGeneric("soundex") })
 
+#' @param x empty. Should be used with no argument.
 #' @rdname spark_partition_id
 #' @export
-setGeneric("spark_partition_id", function(x) { 
standardGeneric("spark_partition_id") })
+setGeneric("spark_partition_id", function(x = "missing") { 
standardGeneric("spark_partition_id") })
 
 #' @rdname sd
 #' @export
@@ -1276,10 +1304,16 @@ setGeneric("year", function(x) { 
standardGeneric("year") })
 #' @export
 setGeneric("spark.glm", function(data, formula, ...) { 
standardGeneric("spark.glm") })
 
+#' @param x,y For \code{glm}: logical values indicating whether the response 
vector
+#'          and model matrix used in the fitting process should be returned as
+#'          components of the returned value.
+#' @inheritParams stats::glm
 #' @rdname glm
 #' @export
 setGeneric("glm")
 
+#' @param object a fitted ML model object.
+#' @param ... additional argument(s) passed to the method.
 #' @rdname predict
 #' @export
 setGeneric("predict", function(object, ...) { standardGeneric("predict") })
@@ -1302,8 +1336,11 @@ setGeneric("spark.naiveBayes", function(data, formula, 
...) { standardGeneric("s
 
 #' @rdname spark.survreg
 #' @export
-setGeneric("spark.survreg", function(data, formula, ...) { 
standardGeneric("spark.survreg") })
+setGeneric("spark.survreg", function(data, formula) { 
standardGeneric("spark.survreg") })
 
+#' @param object a fitted ML model object.
+#' @param path the directory where the model is saved.
+#' @param ... additional argument(s) passed to the method.
 #' @rdname write.ml
 #' @export
 setGeneric("write.ml", function(object, path, ...) { 
standardGeneric("write.ml") })

http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/group.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R
index 85348ae..3c85ada 100644
--- a/R/pkg/R/group.R
+++ b/R/pkg/R/group.R
@@ -59,8 +59,7 @@ setMethod("show", "GroupedData",
 #' Count the number of rows for each group.
 #' The resulting SparkDataFrame will also contain the grouping columns.
 #'
-#' @param x a GroupedData
-#' @return a SparkDataFrame
+#' @return A SparkDataFrame.
 #' @rdname count
 #' @aliases count,GroupedData-method
 #' @export
@@ -83,8 +82,6 @@ setMethod("count",
 #' df2 <- agg(df, <column> = <aggFunction>)
 #' df2 <- agg(df, newColName = aggFunction(column))
 #'
-#' @param x a GroupedData
-#' @return a SparkDataFrame
 #' @rdname summarize
 #' @aliases agg,GroupedData-method
 #' @name agg
@@ -201,7 +198,6 @@ createMethods()
 
 #' gapply
 #'
-#' @param x A GroupedData
 #' @rdname gapply
 #' @aliases gapply,GroupedData-method
 #' @name gapply
@@ -216,7 +212,6 @@ setMethod("gapply",
 
 #' gapplyCollect
 #'
-#' @param x A GroupedData
 #' @rdname gapplyCollect
 #' @aliases gapplyCollect,GroupedData-method
 #' @name gapplyCollect

http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/mllib.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index 50c601f..008d92f 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -82,15 +82,16 @@ NULL
 #' Users can call \code{summary} to print a summary of the fitted model, 
\code{predict} to make
 #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load 
fitted models.
 #'
-#' @param data SparkDataFrame for training.
-#' @param formula A symbolic description of the model to be fitted. Currently 
only a few formula
+#' @param data a SparkDataFrame for training.
+#' @param formula a symbolic description of the model to be fitted. Currently 
only a few formula
 #'                operators are supported, including '~', '.', ':', '+', and 
'-'.
-#' @param family A description of the error distribution and link function to 
be used in the model.
+#' @param family a description of the error distribution and link function to 
be used in the model.
 #'               This can be a character string naming a family function, a 
family function or
 #'               the result of a call to a family function. Refer R family at
 #'               
\url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
-#' @param tol Positive convergence tolerance of iterations.
-#' @param maxIter Integer giving the maximal number of IRLS iterations.
+#' @param tol positive convergence tolerance of iterations.
+#' @param maxIter integer giving the maximal number of IRLS iterations.
+#' @param ... additional arguments passed to the method.
 #' @aliases spark.glm,SparkDataFrame,formula-method
 #' @return \code{spark.glm} returns a fitted generalized linear model
 #' @rdname spark.glm
@@ -142,15 +143,15 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", 
formula = "formula"),
 #' Generalized Linear Models (R-compliant)
 #'
 #' Fits a generalized linear model, similarly to R's glm().
-#' @param formula A symbolic description of the model to be fitted. Currently 
only a few formula
+#' @param formula a symbolic description of the model to be fitted. Currently 
only a few formula
 #'                operators are supported, including '~', '.', ':', '+', and 
'-'.
-#' @param data SparkDataFrame for training.
-#' @param family A description of the error distribution and link function to 
be used in the model.
+#' @param data a SparkDataFrame or R's glm data for training.
+#' @param family a description of the error distribution and link function to 
be used in the model.
 #'               This can be a character string naming a family function, a 
family function or
 #'               the result of a call to a family function. Refer R family at
 #'               
\url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}.
-#' @param epsilon Positive convergence tolerance of iterations.
-#' @param maxit Integer giving the maximal number of IRLS iterations.
+#' @param epsilon positive convergence tolerance of iterations.
+#' @param maxit integer giving the maximal number of IRLS iterations.
 #' @return \code{glm} returns a fitted generalized linear model.
 #' @rdname glm
 #' @export
@@ -171,7 +172,7 @@ setMethod("glm", signature(formula = "formula", family = 
"ANY", data = "SparkDat
 
 #  Returns the summary of a model produced by glm() or spark.glm(), similarly 
to R's summary().
 
-#' @param object A fitted generalized linear model
+#' @param object a fitted generalized linear model.
 #' @return \code{summary} returns a summary object of the fitted model, a list 
of components
 #'         including at least the coefficients, null/residual deviance, 
null/residual degrees
 #'         of freedom, AIC and number of iterations IRLS takes.
@@ -212,7 +213,7 @@ setMethod("summary", signature(object = 
"GeneralizedLinearRegressionModel"),
 #  Prints the summary of GeneralizedLinearRegressionModel
 
 #' @rdname spark.glm
-#' @param x Summary object of fitted generalized linear model returned by 
\code{summary} function
+#' @param x summary object of fitted generalized linear model returned by 
\code{summary} function
 #' @export
 #' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0
 print.summary.GeneralizedLinearRegressionModel <- function(x, ...) {
@@ -244,7 +245,7 @@ print.summary.GeneralizedLinearRegressionModel <- 
function(x, ...) {
 #  Makes predictions from a generalized linear model produced by glm() or 
spark.glm(),
 #  similarly to R's predict().
 
-#' @param newData SparkDataFrame for testing
+#' @param newData a SparkDataFrame for testing.
 #' @return \code{predict} returns a SparkDataFrame containing predicted labels 
in a column named
 #'         "prediction"
 #' @rdname spark.glm
@@ -258,7 +259,7 @@ setMethod("predict", signature(object = 
"GeneralizedLinearRegressionModel"),
 # Makes predictions from a naive Bayes model or a model produced by 
spark.naiveBayes(),
 # similarly to R package e1071's predict.
 
-#' @param newData A SparkDataFrame for testing
+#' @param newData a SparkDataFrame for testing.
 #' @return \code{predict} returns a SparkDataFrame containing predicted 
labeled in a column named
 #' "prediction"
 #' @rdname spark.naiveBayes
@@ -271,9 +272,9 @@ setMethod("predict", signature(object = "NaiveBayesModel"),
 
 # Returns the summary of a naive Bayes model produced by 
\code{spark.naiveBayes}
 
-#' @param object A naive Bayes model fitted by \code{spark.naiveBayes}
+#' @param object a naive Bayes model fitted by \code{spark.naiveBayes}.
 #' @return \code{summary} returns a list containing \code{apriori}, the label 
distribution, and
-#'         \code{tables}, conditional probabilities given the target label
+#'         \code{tables}, conditional probabilities given the target label.
 #' @rdname spark.naiveBayes
 #' @export
 #' @note summary(NaiveBayesModel) since 2.0.0
@@ -298,14 +299,15 @@ setMethod("summary", signature(object = 
"NaiveBayesModel"),
 #' Users can call \code{summary} to print a summary of the fitted model, 
\code{predict} to make
 #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load 
fitted models.
 #'
-#' @param data SparkDataFrame for training
-#' @param formula A symbolic description of the model to be fitted. Currently 
only a few formula
+#' @param data a SparkDataFrame for training.
+#' @param formula a symbolic description of the model to be fitted. Currently 
only a few formula
 #'                operators are supported, including '~', '.', ':', '+', and 
'-'.
 #'                Note that the response variable of formula is empty in 
spark.kmeans.
-#' @param k Number of centers
-#' @param maxIter Maximum iteration number
-#' @param initMode The initialization algorithm choosen to fit the model
-#' @return \code{spark.kmeans} returns a fitted k-means model
+#' @param k number of centers.
+#' @param maxIter maximum iteration number.
+#' @param initMode the initialization algorithm choosen to fit the model.
+#' @param ... additional argument(s) passed to the method.
+#' @return \code{spark.kmeans} returns a fitted k-means model.
 #' @rdname spark.kmeans
 #' @aliases spark.kmeans,SparkDataFrame,formula-method
 #' @name spark.kmeans
@@ -346,8 +348,11 @@ setMethod("spark.kmeans", signature(data = 
"SparkDataFrame", formula = "formula"
 #' Get fitted result from a k-means model, similarly to R's fitted().
 #' Note: A saved-loaded model does not support this method.
 #'
-#' @param object A fitted k-means model
-#' @return \code{fitted} returns a SparkDataFrame containing fitted values
+#' @param object a fitted k-means model.
+#' @param method type of fitted results, \code{"centers"} for cluster centers
+#'        or \code{"classes"} for assigned classes.
+#' @param ... additional argument(s) passed to the method.
+#' @return \code{fitted} returns a SparkDataFrame containing fitted values.
 #' @rdname fitted
 #' @export
 #' @examples
@@ -371,8 +376,8 @@ setMethod("fitted", signature(object = "KMeansModel"),
 
 #  Get the summary of a k-means model
 
-#' @param object A fitted k-means model
-#' @return \code{summary} returns the model's coefficients, size and cluster
+#' @param object a fitted k-means model.
+#' @return \code{summary} returns the model's coefficients, size and cluster.
 #' @rdname spark.kmeans
 #' @export
 #' @note summary(KMeansModel) since 2.0.0
@@ -398,7 +403,8 @@ setMethod("summary", signature(object = "KMeansModel"),
 
 #  Predicted values based on a k-means model
 
-#' @return \code{predict} returns the predicted values based on a k-means model
+#' @param newData a SparkDataFrame for testing.
+#' @return \code{predict} returns the predicted values based on a k-means 
model.
 #' @rdname spark.kmeans
 #' @export
 #' @note predict(KMeansModel) since 2.0.0
@@ -414,11 +420,12 @@ setMethod("predict", signature(object = "KMeansModel"),
 #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load 
fitted models.
 #' Only categorical data is supported.
 #'
-#' @param data A \code{SparkDataFrame} of observations and labels for model 
fitting
-#' @param formula A symbolic description of the model to be fitted. Currently 
only a few formula
+#' @param data a \code{SparkDataFrame} of observations and labels for model 
fitting.
+#' @param formula a symbolic description of the model to be fitted. Currently 
only a few formula
 #'               operators are supported, including '~', '.', ':', '+', and 
'-'.
-#' @param smoothing Smoothing parameter
-#' @return \code{spark.naiveBayes} returns a fitted naive Bayes model
+#' @param smoothing smoothing parameter.
+#' @param ... additional argument(s) passed to the method. Currently only 
\code{smoothing}.
+#' @return \code{spark.naiveBayes} returns a fitted naive Bayes model.
 #' @rdname spark.naiveBayes
 #' @aliases spark.naiveBayes,SparkDataFrame,formula-method
 #' @name spark.naiveBayes
@@ -454,8 +461,8 @@ setMethod("spark.naiveBayes", signature(data = 
"SparkDataFrame", formula = "form
 
 # Saves the Bernoulli naive Bayes model to the input path.
 
-#' @param path The directory where the model is saved
-#' @param overwrite Overwrites or not if the output path already exists. 
Default is FALSE
+#' @param path the directory where the model is saved
+#' @param overwrite overwrites or not if the output path already exists. 
Default is FALSE
 #'                  which means throw exception if the output path exists.
 #'
 #' @rdname spark.naiveBayes
@@ -473,10 +480,9 @@ setMethod("write.ml", signature(object = 
"NaiveBayesModel", path = "character"),
 
 # Saves the AFT survival regression model to the input path.
 
-#' @param path The directory where the model is saved
-#' @param overwrite Overwrites or not if the output path already exists. 
Default is FALSE
+#' @param path the directory where the model is saved.
+#' @param overwrite overwrites or not if the output path already exists. 
Default is FALSE
 #'                  which means throw exception if the output path exists.
-#'
 #' @rdname spark.survreg
 #' @export
 #' @note write.ml(AFTSurvivalRegressionModel, character) since 2.0.0
@@ -492,8 +498,8 @@ setMethod("write.ml", signature(object = 
"AFTSurvivalRegressionModel", path = "c
 
 #  Saves the generalized linear model to the input path.
 
-#' @param path The directory where the model is saved
-#' @param overwrite Overwrites or not if the output path already exists. 
Default is FALSE
+#' @param path the directory where the model is saved.
+#' @param overwrite overwrites or not if the output path already exists. 
Default is FALSE
 #'                  which means throw exception if the output path exists.
 #'
 #' @rdname spark.glm
@@ -510,8 +516,8 @@ setMethod("write.ml", signature(object = 
"GeneralizedLinearRegressionModel", pat
 
 #  Save fitted MLlib model to the input path
 
-#' @param path The directory where the model is saved
-#' @param overwrite Overwrites or not if the output path already exists. 
Default is FALSE
+#' @param path the directory where the model is saved.
+#' @param overwrite overwrites or not if the output path already exists. 
Default is FALSE
 #'                  which means throw exception if the output path exists.
 #'
 #' @rdname spark.kmeans
@@ -528,8 +534,8 @@ setMethod("write.ml", signature(object = "KMeansModel", 
path = "character"),
 
 #' Load a fitted MLlib model from the input path.
 #'
-#' @param path Path of the model to read.
-#' @return a fitted MLlib model
+#' @param path path of the model to read.
+#' @return A fitted MLlib model.
 #' @rdname read.ml
 #' @name read.ml
 #' @export
@@ -563,11 +569,11 @@ read.ml <- function(path) {
 #' \code{predict} to make predictions on new data, and 
\code{write.ml}/\code{read.ml} to
 #' save/load fitted models.
 #'
-#' @param data A SparkDataFrame for training
-#' @param formula A symbolic description of the model to be fitted. Currently 
only a few formula
+#' @param data a SparkDataFrame for training.
+#' @param formula a symbolic description of the model to be fitted. Currently 
only a few formula
 #'                operators are supported, including '~', ':', '+', and '-'.
-#'                Note that operator '.' is not supported currently
-#' @return \code{spark.survreg} returns a fitted AFT survival regression model
+#'                Note that operator '.' is not supported currently.
+#' @return \code{spark.survreg} returns a fitted AFT survival regression model.
 #' @rdname spark.survreg
 #' @seealso survival: \url{https://cran.r-project.org/web/packages/survival/}
 #' @export
@@ -591,7 +597,7 @@ read.ml <- function(path) {
 #' }
 #' @note spark.survreg since 2.0.0
 setMethod("spark.survreg", signature(data = "SparkDataFrame", formula = 
"formula"),
-          function(data, formula, ...) {
+          function(data, formula) {
             formula <- paste(deparse(formula), collapse = "")
             jobj <- 
callJStatic("org.apache.spark.ml.r.AFTSurvivalRegressionWrapper",
                                 "fit", formula, data@sdf)
@@ -602,14 +608,14 @@ setMethod("spark.survreg", signature(data = 
"SparkDataFrame", formula = "formula
 # Returns a summary of the AFT survival regression model produced by 
spark.survreg,
 # similarly to R's summary().
 
-#' @param object A fitted AFT survival regression model
+#' @param object a fitted AFT survival regression model.
 #' @return \code{summary} returns a list containing the model's coefficients,
 #' intercept and log(scale)
 #' @rdname spark.survreg
 #' @export
 #' @note summary(AFTSurvivalRegressionModel) since 2.0.0
 setMethod("summary", signature(object = "AFTSurvivalRegressionModel"),
-          function(object, ...) {
+          function(object) {
             jobj <- object@jobj
             features <- callJMethod(jobj, "rFeatures")
             coefficients <- callJMethod(jobj, "rCoefficients")
@@ -622,9 +628,9 @@ setMethod("summary", signature(object = 
"AFTSurvivalRegressionModel"),
 # Makes predictions from an AFT survival regression model or a model produced 
by
 # spark.survreg, similarly to R package survival's predict.
 
-#' @param newData A SparkDataFrame for testing
+#' @param newData a SparkDataFrame for testing.
 #' @return \code{predict} returns a SparkDataFrame containing predicted values
-#' on the original scale of the data (mean predicted value at scale = 1.0)
+#' on the original scale of the data (mean predicted value at scale = 1.0).
 #' @rdname spark.survreg
 #' @export
 #' @note predict(AFTSurvivalRegressionModel) since 2.0.0

http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/schema.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R
index b429f5d..cb5bdb9 100644
--- a/R/pkg/R/schema.R
+++ b/R/pkg/R/schema.R
@@ -92,8 +92,9 @@ print.structType <- function(x, ...) {
 #'
 #' Create a structField object that contains the metadata for a single field 
in a schema.
 #'
-#' @param x The name of the field
-#' @return a structField object
+#' @param x the name of the field.
+#' @param ... additional argument(s) passed to the method.
+#' @return A structField object.
 #' @rdname structField
 #' @export
 #' @examples

http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/sparkR.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index f8bdee7..85815af 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -320,14 +320,15 @@ sparkRHive.init <- function(jsc = NULL) {
 #' For details on how to initialize and use SparkR, refer to SparkR 
programming guide at
 #' 
\url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}.
 #'
-#' @param master The Spark master URL
-#' @param appName Application name to register with cluster manager
-#' @param sparkHome Spark Home directory
-#' @param sparkConfig Named list of Spark configuration to set on worker nodes
-#' @param sparkJars Character vector of jar files to pass to the worker nodes
-#' @param sparkPackages Character vector of packages from spark-packages.org
-#' @param enableHiveSupport Enable support for Hive, fallback if not built 
with Hive support; once
+#' @param master the Spark master URL.
+#' @param appName application name to register with cluster manager.
+#' @param sparkHome Spark Home directory.
+#' @param sparkConfig named list of Spark configuration to set on worker nodes.
+#' @param sparkJars character vector of jar files to pass to the worker nodes.
+#' @param sparkPackages character vector of packages from spark-packages.org
+#' @param enableHiveSupport enable support for Hive, fallback if not built 
with Hive support; once
 #'        set, this cannot be turned off on an existing session
+#' @param ... named Spark properties passed to the method.
 #' @export
 #' @examples
 #'\dontrun{
@@ -413,9 +414,9 @@ sparkR.session <- function(
 #' Assigns a group ID to all the jobs started by this thread until the group 
ID is set to a
 #' different value or cleared.
 #'
-#' @param groupid the ID to be assigned to job groups
-#' @param description description for the job group ID
-#' @param interruptOnCancel flag to indicate if the job is interrupted on job 
cancellation
+#' @param groupId the ID to be assigned to job groups.
+#' @param description description for the job group ID.
+#' @param interruptOnCancel flag to indicate if the job is interrupted on job 
cancellation.
 #' @rdname setJobGroup
 #' @name setJobGroup
 #' @examples

http://git-wip-us.apache.org/repos/asf/spark/blob/02978961/R/pkg/R/stats.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index 2b4ce19..8ea24d8 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -25,6 +25,7 @@ setOldClass("jobj")
 #' table. The number of distinct values for each column should be less than 
1e4. At most 1e6
 #' non-zero pair frequencies will be returned.
 #'
+#' @param x a SparkDataFrame
 #' @param col1 name of the first column. Distinct items will make the first 
item of each row.
 #' @param col2 name of the second column. Distinct items will make the column 
names of the output.
 #' @return a local R data.frame representing the contingency table. The first 
column of each row
@@ -53,10 +54,9 @@ setMethod("crosstab",
 
 #' Calculate the sample covariance of two numerical columns of a 
SparkDataFrame.
 #'
-#' @param x A SparkDataFrame
-#' @param col1 the name of the first column
-#' @param col2 the name of the second column
-#' @return the covariance of the two columns.
+#' @param colName1 the name of the first column
+#' @param colName2 the name of the second column
+#' @return The covariance of the two columns.
 #'
 #' @rdname cov
 #' @name cov
@@ -71,19 +71,18 @@ setMethod("crosstab",
 #' @note cov since 1.6.0
 setMethod("cov",
           signature(x = "SparkDataFrame"),
-          function(x, col1, col2) {
-            stopifnot(class(col1) == "character" && class(col2) == "character")
+          function(x, colName1, colName2) {
+            stopifnot(class(colName1) == "character" && class(colName2) == 
"character")
             statFunctions <- callJMethod(x@sdf, "stat")
-            callJMethod(statFunctions, "cov", col1, col2)
+            callJMethod(statFunctions, "cov", colName1, colName2)
           })
 
 #' Calculates the correlation of two columns of a SparkDataFrame.
 #' Currently only supports the Pearson Correlation Coefficient.
 #' For Spearman Correlation, consider using RDD methods found in MLlib's 
Statistics.
 #'
-#' @param x A SparkDataFrame
-#' @param col1 the name of the first column
-#' @param col2 the name of the second column
+#' @param colName1 the name of the first column
+#' @param colName2 the name of the second column
 #' @param method Optional. A character specifying the method for calculating 
the correlation.
 #'               only "pearson" is allowed now.
 #' @return The Pearson Correlation Coefficient as a Double.
@@ -102,10 +101,10 @@ setMethod("cov",
 #' @note corr since 1.6.0
 setMethod("corr",
           signature(x = "SparkDataFrame"),
-          function(x, col1, col2, method = "pearson") {
-            stopifnot(class(col1) == "character" && class(col2) == "character")
+          function(x, colName1, colName2, method = "pearson") {
+            stopifnot(class(colName1) == "character" && class(colName2) == 
"character")
             statFunctions <- callJMethod(x@sdf, "stat")
-            callJMethod(statFunctions, "corr", col1, col2, method)
+            callJMethod(statFunctions, "corr", colName1, colName2, method)
           })
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to