Repository: spark Updated Branches: refs/heads/master 39f328ba3 -> 01401e965
[SPARK-16508][SPARKR] Fix CRAN undocumented/duplicated arguments warnings. ## What changes were proposed in this pull request? This PR tries to fix all the remaining "undocumented/duplicated arguments" warnings given by CRAN-check. One left is doc for R `stats::glm` exported in SparkR. To mute that warning, we have to also provide document for all arguments of that non-SparkR function. Some previous conversation is in #14558. ## How was this patch tested? R unit test and `check-cran.sh` script (with no-test). Author: Junyang Qian <junya...@databricks.com> Closes #14705 from junyangq/SPARK-16508-master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/01401e96 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/01401e96 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/01401e96 Branch: refs/heads/master Commit: 01401e965b58f7e8ab615764a452d7d18f1d4bf0 Parents: 39f328b Author: Junyang Qian <junya...@databricks.com> Authored: Sat Aug 20 06:59:23 2016 -0700 Committer: Felix Cheung <felixche...@apache.org> Committed: Sat Aug 20 06:59:23 2016 -0700 ---------------------------------------------------------------------- R/pkg/R/DataFrame.R | 221 +++++++++++++++++++++++++++------------------- R/pkg/R/SQLContext.R | 30 ++++--- R/pkg/R/WindowSpec.R | 11 ++- R/pkg/R/column.R | 18 +++- R/pkg/R/functions.R | 173 ++++++++++++++++++++++++------------ R/pkg/R/generics.R | 62 ++++++++++--- R/pkg/R/group.R | 7 +- R/pkg/R/mllib.R | 113 +++++++++++++----------- R/pkg/R/schema.R | 5 +- R/pkg/R/sparkR.R | 21 ++--- R/pkg/R/stats.R | 25 +++--- 11 files changed, 419 insertions(+), 267 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/01401e96/R/pkg/R/DataFrame.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 09be06d..540dc31 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -120,8 +120,9 @@ setMethod("schema", #' #' Print the logical and physical Catalyst plans to the console for debugging. #' -#' @param x A SparkDataFrame +#' @param x a SparkDataFrame. #' @param extended Logical. If extended is FALSE, explain() only prints the physical plan. +#' @param ... further arguments to be passed to or from other methods. #' @family SparkDataFrame functions #' @aliases explain,SparkDataFrame-method #' @rdname explain @@ -177,11 +178,13 @@ setMethod("isLocal", #' #' Print the first numRows rows of a SparkDataFrame #' -#' @param x A SparkDataFrame -#' @param numRows The number of rows to print. Defaults to 20. -#' @param truncate Whether truncate long strings. If true, strings more than 20 characters will be -#' truncated. However, if set greater than zero, truncates strings longer than `truncate` -#' characters and all cells will be aligned right. +#' @param x a SparkDataFrame. +#' @param numRows the number of rows to print. Defaults to 20. +#' @param truncate whether truncate long strings. If \code{TRUE}, strings more than +#' 20 characters will be truncated. However, if set greater than zero, +#' truncates strings longer than `truncate` characters and all cells +#' will be aligned right. +#' @param ... further arguments to be passed to or from other methods. #' @family SparkDataFrame functions #' @aliases showDF,SparkDataFrame-method #' @rdname showDF @@ -211,7 +214,7 @@ setMethod("showDF", #' #' Print the SparkDataFrame column names and types #' -#' @param x A SparkDataFrame +#' @param object a SparkDataFrame. #' #' @family SparkDataFrame functions #' @rdname show @@ -262,11 +265,11 @@ setMethod("dtypes", }) }) -#' Column names +#' Column Names of SparkDataFrame #' -#' Return all column names as a list +#' Return all column names as a list. #' -#' @param x A SparkDataFrame +#' @param x a SparkDataFrame. #' #' @family SparkDataFrame functions #' @rdname columns @@ -323,6 +326,8 @@ setMethod("colnames", columns(x) }) +#' @param value a character vector. Must have the same length as the number +#' of columns in the SparkDataFrame. #' @rdname columns #' @aliases colnames<-,SparkDataFrame-method #' @name colnames<- @@ -514,9 +519,10 @@ setMethod("registerTempTable", #' #' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession. #' -#' @param x A SparkDataFrame -#' @param tableName A character vector containing the name of the table -#' @param overwrite A logical argument indicating whether or not to overwrite +#' @param x a SparkDataFrame. +#' @param tableName a character vector containing the name of the table. +#' @param overwrite a logical argument indicating whether or not to overwrite. +#' @param ... further arguments to be passed to or from other methods. #' the existing rows in the table. #' #' @family SparkDataFrame functions @@ -575,7 +581,9 @@ setMethod("cache", #' supported storage levels, refer to #' \url{http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence}. #' -#' @param x The SparkDataFrame to persist +#' @param x the SparkDataFrame to persist. +#' @param newLevel storage level chosen for the persistance. See available options in +#' the description. #' #' @family SparkDataFrame functions #' @rdname persist @@ -603,8 +611,9 @@ setMethod("persist", #' Mark this SparkDataFrame as non-persistent, and remove all blocks for it from memory and #' disk. #' -#' @param x The SparkDataFrame to unpersist -#' @param blocking Whether to block until all blocks are deleted +#' @param x the SparkDataFrame to unpersist. +#' @param blocking whether to block until all blocks are deleted. +#' @param ... further arguments to be passed to or from other methods. #' #' @family SparkDataFrame functions #' @rdname unpersist-methods @@ -638,9 +647,10 @@ setMethod("unpersist", #' \item{3.} {Return a new SparkDataFrame partitioned by the given column(s), #' using `spark.sql.shuffle.partitions` as number of partitions.} #'} -#' @param x A SparkDataFrame -#' @param numPartitions The number of partitions to use. -#' @param col The column by which the partitioning will be performed. +#' @param x a SparkDataFrame. +#' @param numPartitions the number of partitions to use. +#' @param col the column by which the partitioning will be performed. +#' @param ... additional column(s) to be used in the partitioning. #' #' @family SparkDataFrame functions #' @rdname repartition @@ -919,11 +929,10 @@ setMethod("sample_frac", #' Returns the number of rows in a SparkDataFrame #' -#' @param x A SparkDataFrame -#' +#' @param x a SparkDataFrame. #' @family SparkDataFrame functions #' @rdname nrow -#' @name count +#' @name nrow #' @aliases count,SparkDataFrame-method #' @export #' @examples @@ -999,9 +1008,10 @@ setMethod("dim", #' Collects all the elements of a SparkDataFrame and coerces them into an R data.frame. #' -#' @param x A SparkDataFrame -#' @param stringsAsFactors (Optional) A logical indicating whether or not string columns +#' @param x a SparkDataFrame. +#' @param stringsAsFactors (Optional) a logical indicating whether or not string columns #' should be converted to factors. FALSE by default. +#' @param ... further arguments to be passed to or from other methods. #' #' @family SparkDataFrame functions #' @rdname collect @@ -1096,8 +1106,10 @@ setMethod("limit", dataFrame(res) }) -#' Take the first NUM rows of a SparkDataFrame and return a the results as a R data.frame +#' Take the first NUM rows of a SparkDataFrame and return the results as a R data.frame #' +#' @param x a SparkDataFrame. +#' @param num number of rows to take. #' @family SparkDataFrame functions #' @rdname take #' @name take @@ -1124,9 +1136,9 @@ setMethod("take", #' then head() returns the first 6 rows in keeping with the current data.frame #' convention in R. #' -#' @param x A SparkDataFrame -#' @param num The number of rows to return. Default is 6. -#' @return A data.frame +#' @param x a SparkDataFrame. +#' @param num the number of rows to return. Default is 6. +#' @return A data.frame. #' #' @family SparkDataFrame functions #' @aliases head,SparkDataFrame-method @@ -1150,7 +1162,8 @@ setMethod("head", #' Return the first row of a SparkDataFrame #' -#' @param x A SparkDataFrame +#' @param x a SparkDataFrame or a column used in aggregation function. +#' @param ... further arguments to be passed to or from other methods. #' #' @family SparkDataFrame functions #' @aliases first,SparkDataFrame-method @@ -1201,8 +1214,9 @@ setMethod("toRDD", #' #' Groups the SparkDataFrame using the specified columns, so we can run aggregation on them. #' -#' @param x a SparkDataFrame -#' @return a GroupedData +#' @param x a SparkDataFrame. +#' @param ... variable(s) (character names(s) or Column(s)) to group on. +#' @return A GroupedData. #' @family SparkDataFrame functions #' @aliases groupBy,SparkDataFrame-method #' @rdname groupBy @@ -1244,7 +1258,6 @@ setMethod("group_by", #' #' Compute aggregates by specifying a list of columns #' -#' @param x a SparkDataFrame #' @family SparkDataFrame functions #' @aliases agg,SparkDataFrame-method #' @rdname summarize @@ -1391,16 +1404,15 @@ setMethod("dapplyCollect", #' Groups the SparkDataFrame using the specified columns and applies the R function to each #' group. #' -#' @param x A SparkDataFrame -#' @param cols Grouping columns -#' @param func A function to be applied to each group partition specified by grouping +#' @param cols grouping columns. +#' @param func a function to be applied to each group partition specified by grouping #' column of the SparkDataFrame. The function `func` takes as argument #' a key - grouping columns and a data frame - a local R data.frame. #' The output of `func` is a local R data.frame. -#' @param schema The schema of the resulting SparkDataFrame after the function is applied. +#' @param schema the schema of the resulting SparkDataFrame after the function is applied. #' The schema must match to output of `func`. It has to be defined for each #' output column with preferred output column name and corresponding data type. -#' @return a SparkDataFrame +#' @return A SparkDataFrame. #' @family SparkDataFrame functions #' @aliases gapply,SparkDataFrame-method #' @rdname gapply @@ -1483,13 +1495,12 @@ setMethod("gapply", #' Groups the SparkDataFrame using the specified columns, applies the R function to each #' group and collects the result back to R as data.frame. #' -#' @param x A SparkDataFrame -#' @param cols Grouping columns -#' @param func A function to be applied to each group partition specified by grouping +#' @param cols grouping columns. +#' @param func a function to be applied to each group partition specified by grouping #' column of the SparkDataFrame. The function `func` takes as argument #' a key - grouping columns and a data frame - a local R data.frame. #' The output of `func` is a local R data.frame. -#' @return a data.frame +#' @return A data.frame. #' @family SparkDataFrame functions #' @aliases gapplyCollect,SparkDataFrame-method #' @rdname gapplyCollect @@ -1636,6 +1647,7 @@ getColumn <- function(x, c) { column(callJMethod(x@sdf, "col", c)) } +#' @param name name of a Column (without being wrapped by \code{""}). #' @rdname select #' @name $ #' @aliases $,SparkDataFrame-method @@ -1645,6 +1657,7 @@ setMethod("$", signature(x = "SparkDataFrame"), getColumn(x, name) }) +#' @param value a Column or NULL. If NULL, the specified Column is dropped. #' @rdname select #' @name $<- #' @aliases $<-,SparkDataFrame-method @@ -1719,12 +1732,13 @@ setMethod("[", signature(x = "SparkDataFrame"), #' Subset #' #' Return subsets of SparkDataFrame according to given conditions -#' @param x A SparkDataFrame -#' @param subset (Optional) A logical expression to filter on rows -#' @param select expression for the single Column or a list of columns to select from the SparkDataFrame +#' @param x a SparkDataFrame. +#' @param i,subset (Optional) a logical expression to filter on rows. +#' @param j,select expression for the single Column or a list of columns to select from the SparkDataFrame. #' @param drop if TRUE, a Column will be returned if the resulting dataset has only one column. -#' Otherwise, a SparkDataFrame will always be returned. -#' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns +#' Otherwise, a SparkDataFrame will always be returned. +#' @param ... currently not used. +#' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns. #' @export #' @family SparkDataFrame functions #' @aliases subset,SparkDataFrame-method @@ -1759,9 +1773,12 @@ setMethod("subset", signature(x = "SparkDataFrame"), #' Select #' #' Selects a set of columns with names or Column expressions. -#' @param x A SparkDataFrame -#' @param col A list of columns or single Column or name -#' @return A new SparkDataFrame with selected columns +#' @param x a SparkDataFrame. +#' @param col a list of columns or single Column or name. +#' @param ... additional column(s) if only one column is specified in \code{col}. +#' If more than one column is assigned in \code{col}, \code{...} +#' should be left empty. +#' @return A new SparkDataFrame with selected columns. #' @export #' @family SparkDataFrame functions #' @rdname select @@ -1858,9 +1875,9 @@ setMethod("selectExpr", #' Return a new SparkDataFrame by adding a column or replacing the existing column #' that has the same name. #' -#' @param x A SparkDataFrame -#' @param colName A column name. -#' @param col A Column expression. +#' @param x a SparkDataFrame. +#' @param colName a column name. +#' @param col a Column expression. #' @return A SparkDataFrame with the new column added or the existing column replaced. #' @family SparkDataFrame functions #' @aliases withColumn,SparkDataFrame,character,Column-method @@ -1889,8 +1906,8 @@ setMethod("withColumn", #' #' Return a new SparkDataFrame with the specified columns added or replaced. #' -#' @param .data A SparkDataFrame -#' @param col a named argument of the form name = col +#' @param .data a SparkDataFrame. +#' @param ... additional column argument(s) each in the form name = col. #' @return A new SparkDataFrame with the new columns added or replaced. #' @family SparkDataFrame functions #' @aliases mutate,SparkDataFrame-method @@ -1967,6 +1984,7 @@ setMethod("mutate", do.call(select, c(x, colList, deDupCols)) }) +#' @param _data a SparkDataFrame. #' @export #' @rdname mutate #' @aliases transform,SparkDataFrame-method @@ -2278,11 +2296,18 @@ setMethod("join", #' specified, the common column names in \code{x} and \code{y} will be used. #' @param by.x a character vector specifying the joining columns for x. #' @param by.y a character vector specifying the joining columns for y. +#' @param all a boolean value setting \code{all.x} and \code{all.y} +#' if any of them are unset. #' @param all.x a boolean value indicating whether all the rows in x should #' be including in the join #' @param all.y a boolean value indicating whether all the rows in y should #' be including in the join #' @param sort a logical argument indicating whether the resulting columns should be sorted +#' @param suffixes a string vector of length 2 used to make colnames of +#' \code{x} and \code{y} unique. +#' The first element is appended to each colname of \code{x}. +#' The second element is appended to each colname of \code{y}. +#' @param ... additional argument(s) passed to the method. #' @details If all.x and all.y are set to FALSE, a natural join will be returned. If #' all.x is set to TRUE and all.y is set to FALSE, a left outer join will #' be returned. If all.x is set to FALSE and all.y is set to TRUE, a right @@ -2311,7 +2336,7 @@ setMethod("merge", signature(x = "SparkDataFrame", y = "SparkDataFrame"), function(x, y, by = intersect(names(x), names(y)), by.x = by, by.y = by, all = FALSE, all.x = all, all.y = all, - sort = TRUE, suffixes = c("_x", "_y"), ... ) { + sort = TRUE, suffixes = c("_x", "_y"), ...) { if (length(suffixes) != 2) { stop("suffixes must have length 2") @@ -2464,8 +2489,10 @@ setMethod("unionAll", #' Union two or more SparkDataFrames. This is equivalent to `UNION ALL` in SQL. #' Note that this does not remove duplicate rows across the two SparkDataFrames. #' -#' @param x A SparkDataFrame -#' @param ... Additional SparkDataFrame +#' @param x a SparkDataFrame. +#' @param ... additional SparkDataFrame(s). +#' @param deparse.level currently not used (put here to match the signature of +#' the base implementation). #' @return A SparkDataFrame containing the result of the union. #' @family SparkDataFrame functions #' @aliases rbind,SparkDataFrame-method @@ -2522,8 +2549,8 @@ setMethod("intersect", #' Return a new SparkDataFrame containing rows in this SparkDataFrame #' but not in another SparkDataFrame. This is equivalent to `EXCEPT` in SQL. #' -#' @param x A SparkDataFrame -#' @param y A SparkDataFrame +#' @param x a SparkDataFrame. +#' @param y a SparkDataFrame. #' @return A SparkDataFrame containing the result of the except operation. #' @family SparkDataFrame functions #' @aliases except,SparkDataFrame,SparkDataFrame-method @@ -2564,10 +2591,11 @@ setMethod("except", #' and to not change the existing data. #' } #' -#' @param df A SparkDataFrame -#' @param path A name for the table -#' @param source A name for external data source -#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default) +#' @param df a SparkDataFrame. +#' @param path a name for the table. +#' @param source a name for external data source. +#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default) +#' @param ... additional argument(s) passed to the method. #' #' @family SparkDataFrame functions #' @aliases write.df,SparkDataFrame,character-method @@ -2626,10 +2654,11 @@ setMethod("saveDF", #' ignore: The save operation is expected to not save the contents of the SparkDataFrame #' and to not change the existing data. \cr #' -#' @param df A SparkDataFrame -#' @param tableName A name for the table -#' @param source A name for external data source -#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default) +#' @param df a SparkDataFrame. +#' @param tableName a name for the table. +#' @param source a name for external data source. +#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default). +#' @param ... additional option(s) passed to the method. #' #' @family SparkDataFrame functions #' @aliases saveAsTable,SparkDataFrame,character-method @@ -2665,10 +2694,10 @@ setMethod("saveAsTable", #' Computes statistics for numeric and string columns. #' If no columns are given, this function computes statistics for all numerical or string columns. #' -#' @param x A SparkDataFrame to be computed. -#' @param col A string of name -#' @param ... Additional expressions -#' @return A SparkDataFrame +#' @param x a SparkDataFrame to be computed. +#' @param col a string of name. +#' @param ... additional expressions. +#' @return A SparkDataFrame. #' @family SparkDataFrame functions #' @aliases describe,SparkDataFrame,character-method describe,SparkDataFrame,ANY-method #' @rdname summary @@ -2703,6 +2732,7 @@ setMethod("describe", dataFrame(sdf) }) +#' @param object a SparkDataFrame to be summarized. #' @rdname summary #' @name summary #' @aliases summary,SparkDataFrame-method @@ -2718,16 +2748,20 @@ setMethod("summary", #' #' dropna, na.omit - Returns a new SparkDataFrame omitting rows with null values. #' -#' @param x A SparkDataFrame. +#' @param x a SparkDataFrame. #' @param how "any" or "all". #' if "any", drop a row if it contains any nulls. #' if "all", drop a row only if all its values are null. #' if minNonNulls is specified, how is ignored. -#' @param minNonNulls If specified, drop rows that have less than +#' @param minNonNulls if specified, drop rows that have less than #' minNonNulls non-null values. #' This overwrites the how parameter. -#' @param cols Optional list of column names to consider. -#' @return A SparkDataFrame +#' @param cols optional list of column names to consider. In `fillna`, +#' columns specified in cols that do not have matching data +#' type are ignored. For example, if value is a character, and +#' subset contains a non-character column, then the non-character +#' column is simply ignored. +#' @return A SparkDataFrame. #' #' @family SparkDataFrame functions #' @rdname nafunctions @@ -2759,6 +2793,8 @@ setMethod("dropna", dataFrame(sdf) }) +#' @param object a SparkDataFrame. +#' @param ... further arguments to be passed to or from other methods. #' @rdname nafunctions #' @name na.omit #' @aliases na.omit,SparkDataFrame-method @@ -2772,18 +2808,12 @@ setMethod("na.omit", #' fillna - Replace null values. #' -#' @param x A SparkDataFrame. -#' @param value Value to replace null values with. +#' @param value value to replace null values with. #' Should be an integer, numeric, character or named list. #' If the value is a named list, then cols is ignored and #' value must be a mapping from column name (character) to #' replacement value. The replacement value must be an #' integer, numeric or character. -#' @param cols optional list of column names to consider. -#' Columns specified in cols that do not have matching data -#' type are ignored. For example, if value is a character, and -#' subset contains a non-character column, then the non-character -#' column is simply ignored. #' #' @rdname nafunctions #' @name fillna @@ -2848,8 +2878,11 @@ setMethod("fillna", #' Since data.frames are held in memory, ensure that you have enough memory #' in your system to accommodate the contents. #' -#' @param x a SparkDataFrame -#' @return a data.frame +#' @param x a SparkDataFrame. +#' @param row.names NULL or a character vector giving the row names for the data frame. +#' @param optional If `TRUE`, converting column names is optional. +#' @param ... additional arguments to pass to base::as.data.frame. +#' @return A data.frame. #' @family SparkDataFrame functions #' @aliases as.data.frame,SparkDataFrame-method #' @rdname as.data.frame @@ -3003,9 +3036,10 @@ setMethod("str", #' Returns a new SparkDataFrame with columns dropped. #' This is a no-op if schema doesn't contain column name(s). #' -#' @param x A SparkDataFrame. -#' @param cols A character vector of column names or a Column. -#' @return A SparkDataFrame +#' @param x a SparkDataFrame. +#' @param col a character vector of column names or a Column. +#' @param ... further arguments to be passed to or from other methods. +#' @return A SparkDataFrame. #' #' @family SparkDataFrame functions #' @rdname drop @@ -3024,7 +3058,7 @@ setMethod("str", #' @note drop since 2.0.0 setMethod("drop", signature(x = "SparkDataFrame"), - function(x, col) { + function(x, col, ...) { stopifnot(class(col) == "character" || class(col) == "Column") if (class(col) == "Column") { @@ -3052,8 +3086,8 @@ setMethod("drop", #' #' @name histogram #' @param nbins the number of bins (optional). Default value is 10. +#' @param col the column as Character string or a Column to build the histogram from. #' @param df the SparkDataFrame containing the Column to build the histogram from. -#' @param colname the name of the column to build the histogram from. #' @return a data.frame with the histogram statistics, i.e., counts and centroids. #' @rdname histogram #' @aliases histogram,SparkDataFrame,characterOrColumn-method @@ -3184,10 +3218,11 @@ setMethod("histogram", #' and to not change the existing data. #' } #' -#' @param x A SparkDataFrame -#' @param url JDBC database url of the form `jdbc:subprotocol:subname` -#' @param tableName The name of the table in the external database -#' @param mode One of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default) +#' @param x s SparkDataFrame. +#' @param url JDBC database url of the form `jdbc:subprotocol:subname`. +#' @param tableName yhe name of the table in the external database. +#' @param mode one of 'append', 'overwrite', 'error', 'ignore' save mode (it is 'error' by default). +#' @param ... additional JDBC database connection properties. #' @family SparkDataFrame functions #' @rdname write.jdbc #' @name write.jdbc http://git-wip-us.apache.org/repos/asf/spark/blob/01401e96/R/pkg/R/SQLContext.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 0c06bba..a9cd2d8 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -165,9 +165,9 @@ getDefaultSqlSource <- function() { #' #' Converts R data.frame or list into SparkDataFrame. #' -#' @param data An RDD or list or data.frame -#' @param schema a list of column names or named list (StructType), optional -#' @return a SparkDataFrame +#' @param data an RDD or list or data.frame. +#' @param schema a list of column names or named list (StructType), optional. +#' @return A SparkDataFrame. #' @rdname createDataFrame #' @export #' @examples @@ -257,23 +257,25 @@ createDataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { } createDataFrame <- function(x, ...) { - dispatchFunc("createDataFrame(data, schema = NULL, samplingRatio = 1.0)", x, ...) + dispatchFunc("createDataFrame(data, schema = NULL)", x, ...) } +#' @param samplingRatio Currently not used. #' @rdname createDataFrame #' @aliases createDataFrame #' @export #' @method as.DataFrame default #' @note as.DataFrame since 1.6.0 as.DataFrame.default <- function(data, schema = NULL, samplingRatio = 1.0) { - createDataFrame(data, schema, samplingRatio) + createDataFrame(data, schema) } +#' @param ... additional argument(s). #' @rdname createDataFrame #' @aliases as.DataFrame #' @export -as.DataFrame <- function(x, ...) { - dispatchFunc("as.DataFrame(data, schema = NULL, samplingRatio = 1.0)", x, ...) +as.DataFrame <- function(data, ...) { + dispatchFunc("as.DataFrame(data, schema = NULL)", data, ...) } #' toDF @@ -398,7 +400,7 @@ read.orc <- function(path) { #' #' Loads a Parquet file, returning the result as a SparkDataFrame. #' -#' @param path Path of file to read. A vector of multiple paths is allowed. +#' @param path path of file to read. A vector of multiple paths is allowed. #' @return SparkDataFrame #' @rdname read.parquet #' @export @@ -418,6 +420,7 @@ read.parquet <- function(x, ...) { dispatchFunc("read.parquet(...)", x, ...) } +#' @param ... argument(s) passed to the method. #' @rdname read.parquet #' @name parquetFile #' @export @@ -727,6 +730,7 @@ dropTempView <- function(viewName) { #' @param source The name of external data source #' @param schema The data schema defined in structType #' @param na.strings Default string value for NA when source is "csv" +#' @param ... additional external data source specific named properties. #' @return SparkDataFrame #' @rdname read.df #' @name read.df @@ -791,10 +795,11 @@ loadDF <- function(x, ...) { #' If `source` is not specified, the default data source configured by #' "spark.sql.sources.default" will be used. #' -#' @param tableName A name of the table -#' @param path The path of files to load -#' @param source the name of external data source -#' @return SparkDataFrame +#' @param tableName a name of the table. +#' @param path the path of files to load. +#' @param source the name of external data source. +#' @param ... additional argument(s) passed to the method. +#' @return A SparkDataFrame. #' @rdname createExternalTable #' @export #' @examples @@ -840,6 +845,7 @@ createExternalTable <- function(x, ...) { #' clause expressions used to split the column `partitionColumn` evenly. #' This defaults to SparkContext.defaultParallelism when unset. #' @param predicates a list of conditions in the where clause; each one defines one partition +#' @param ... additional JDBC database connection named propertie(s). #' @return SparkDataFrame #' @rdname read.jdbc #' @name read.jdbc http://git-wip-us.apache.org/repos/asf/spark/blob/01401e96/R/pkg/R/WindowSpec.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R index 751ba3f..b55356b 100644 --- a/R/pkg/R/WindowSpec.R +++ b/R/pkg/R/WindowSpec.R @@ -54,8 +54,10 @@ setMethod("show", "WindowSpec", #' #' Defines the partitioning columns in a WindowSpec. #' -#' @param x a WindowSpec -#' @return a WindowSpec +#' @param x a WindowSpec. +#' @param col a column to partition on (desribed by the name or Column). +#' @param ... additional column(s) to partition on. +#' @return A WindowSpec. #' @rdname partitionBy #' @name partitionBy #' @aliases partitionBy,WindowSpec-method @@ -86,7 +88,7 @@ setMethod("partitionBy", #' #' Defines the ordering columns in a WindowSpec. #' @param x a WindowSpec -#' @param col a character or Column object indicating an ordering column +#' @param col a character or Column indicating an ordering column #' @param ... additional sorting fields #' @return A WindowSpec. #' @name orderBy @@ -192,6 +194,9 @@ setMethod("rangeBetween", #' #' Define a windowing column. #' +#' @param x a Column, usually one returned by window function(s). +#' @param window a WindowSpec object. Can be created by `windowPartitionBy` or +#' `windowOrderBy` and configured by other WindowSpec methods. #' @rdname over #' @name over #' @aliases over,Column,WindowSpec-method http://git-wip-us.apache.org/repos/asf/spark/blob/01401e96/R/pkg/R/column.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 0edb9d2..af486e1 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -163,8 +163,9 @@ setMethod("alias", #' @family colum_func #' @aliases substr,Column-method #' -#' @param start starting position -#' @param stop ending position +#' @param x a Column. +#' @param start starting position. +#' @param stop ending position. #' @note substr since 1.4.0 setMethod("substr", signature(x = "Column"), function(x, start, stop) { @@ -219,6 +220,7 @@ setMethod("endsWith", signature(x = "Column"), #' @family colum_func #' @aliases between,Column-method #' +#' @param x a Column #' @param bounds lower and upper bounds #' @note between since 1.5.0 setMethod("between", signature(x = "Column"), @@ -233,6 +235,11 @@ setMethod("between", signature(x = "Column"), #' Casts the column to a different data type. #' +#' @param x a Column. +#' @param dataType a character object describing the target data type. +#' See +#' \href{https://spark.apache.org/docs/latest/sparkr.html#data-type-mapping-between-r-and-spark}{ +#' Spark Data Types} for available data types. #' @rdname cast #' @name cast #' @family colum_func @@ -254,10 +261,12 @@ setMethod("cast", #' Match a column with given values. #' +#' @param x a Column. +#' @param table a collection of values (coercible to list) to compare with. #' @rdname match #' @name %in% #' @aliases %in%,Column-method -#' @return a matched values as a result of comparing with given values. +#' @return A matched values as a result of comparing with given values. #' @export #' @examples #' \dontrun{ @@ -277,6 +286,9 @@ setMethod("%in%", #' If values in the specified column are null, returns the value. #' Can be used in conjunction with `when` to specify a default value for expressions. #' +#' @param x a Column. +#' @param value value to replace when the corresponding entry in \code{x} is NA. +#' Can be a single value or a Column. #' @rdname otherwise #' @name otherwise #' @family colum_func http://git-wip-us.apache.org/repos/asf/spark/blob/01401e96/R/pkg/R/functions.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 573c915..b3c10de 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -23,6 +23,7 @@ NULL #' A new \linkS4class{Column} is created to represent the literal value. #' If the parameter is a \linkS4class{Column}, it is returned unchanged. #' +#' @param x a literal value or a Column. #' @family normal_funcs #' @rdname lit #' @name lit @@ -89,8 +90,6 @@ setMethod("acos", #' Returns the approximate number of distinct items in a group. This is a column #' aggregate function. #' -#' @param x Column to compute on. -#' #' @rdname approxCountDistinct #' @name approxCountDistinct #' @return the approximate number of distinct items in a group. @@ -171,8 +170,6 @@ setMethod("atan", #' #' Aggregate function: returns the average of the values in a group. #' -#' @param x Column to compute on. -#' #' @rdname avg #' @name avg #' @family agg_funcs @@ -319,7 +316,7 @@ setMethod("column", #' #' Computes the Pearson Correlation Coefficient for two Columns. #' -#' @param x Column to compute on. +#' @param col2 a (second) Column. #' #' @rdname corr #' @name corr @@ -339,8 +336,6 @@ setMethod("corr", signature(x = "Column"), #' #' Compute the sample covariance between two expressions. #' -#' @param x Column to compute on. -#' #' @rdname cov #' @name cov #' @family math_funcs @@ -362,8 +357,8 @@ setMethod("cov", signature(x = "characterOrColumn"), #' @rdname cov #' -#' @param col1 First column to compute cov_samp. -#' @param col2 Second column to compute cov_samp. +#' @param col1 the first Column. +#' @param col2 the second Column. #' @name covar_samp #' @aliases covar_samp,characterOrColumn,characterOrColumn-method #' @note covar_samp since 2.0.0 @@ -451,9 +446,7 @@ setMethod("cosh", #' #' Returns the number of items in a group. This is a column aggregate function. #' -#' @param x Column to compute on. -#' -#' @rdname nrow +#' @rdname count #' @name count #' @family agg_funcs #' @aliases count,Column-method @@ -493,6 +486,7 @@ setMethod("crc32", #' Calculates the hash code of given columns, and returns the result as a int column. #' #' @param x Column to compute on. +#' @param ... additional Column(s) to be included. #' #' @rdname hash #' @name hash @@ -663,7 +657,8 @@ setMethod("factorial", #' The function by default returns the first values it sees. It will return the first non-missing #' value it sees when na.rm is set to true. If all values are missing, then NA is returned. #' -#' @param x Column to compute on. +#' @param na.rm a logical value indicating whether NA values should be stripped +#' before the computation proceeds. #' #' @rdname first #' @name first @@ -832,7 +827,10 @@ setMethod("kurtosis", #' The function by default returns the last values it sees. It will return the last non-missing #' value it sees when na.rm is set to true. If all values are missing, then NA is returned. #' -#' @param x Column to compute on. +#' @param x column to compute on. +#' @param na.rm a logical value indicating whether NA values should be stripped +#' before the computation proceeds. +#' @param ... further arguments to be passed to or from other methods. #' #' @rdname last #' @name last @@ -1143,7 +1141,7 @@ setMethod("minute", #' @export #' @examples \dontrun{select(df, monotonically_increasing_id())} setMethod("monotonically_increasing_id", - signature(x = "missing"), + signature("missing"), function() { jc <- callJStatic("org.apache.spark.sql.functions", "monotonically_increasing_id") column(jc) @@ -1272,13 +1270,16 @@ setMethod("round", #' bround #' -#' Returns the value of the column `e` rounded to `scale` decimal places using HALF_EVEN rounding -#' mode if `scale` >= 0 or at integral part when `scale` < 0. +#' Returns the value of the column \code{e} rounded to \code{scale} decimal places using HALF_EVEN rounding +#' mode if \code{scale} >= 0 or at integer part when \code{scale} < 0. #' Also known as Gaussian rounding or bankers' rounding that rounds to the nearest even number. #' bround(2.5, 0) = 2, bround(3.5, 0) = 4. #' #' @param x Column to compute on. -#' +#' @param scale round to \code{scale} digits to the right of the decimal point when \code{scale} > 0, +#' the nearest even number when \code{scale} = 0, and \code{scale} digits to the left +#' of the decimal point when \code{scale} < 0. +#' @param ... further arguments to be passed to or from other methods. #' @rdname bround #' @name bround #' @family math_funcs @@ -1319,7 +1320,7 @@ setMethod("rtrim", #' Aggregate function: alias for \link{stddev_samp} #' #' @param x Column to compute on. -#' +#' @param na.rm currently not used. #' @rdname sd #' @name sd #' @family agg_funcs @@ -1497,7 +1498,7 @@ setMethod("soundex", #' \dontrun{select(df, spark_partition_id())} #' @note spark_partition_id since 2.0.0 setMethod("spark_partition_id", - signature(x = "missing"), + signature("missing"), function() { jc <- callJStatic("org.apache.spark.sql.functions", "spark_partition_id") column(jc) @@ -1560,7 +1561,8 @@ setMethod("stddev_samp", #' #' Creates a new struct column that composes multiple input columns. #' -#' @param x Column to compute on. +#' @param x a column to compute on. +#' @param ... optional column(s) to be included. #' #' @rdname struct #' @name struct @@ -1831,8 +1833,8 @@ setMethod("upper", #' #' Aggregate function: alias for \link{var_samp}. #' -#' @param x Column to compute on. -#' +#' @param x a Column to compute on. +#' @param y,na.rm,use currently not used. #' @rdname var #' @name var #' @family agg_funcs @@ -2114,7 +2116,9 @@ setMethod("pmod", signature(y = "Column"), #' @rdname approxCountDistinct #' @name approxCountDistinct #' +#' @param x Column to compute on. #' @param rsd maximum estimation error allowed (default = 0.05) +#' @param ... further arguments to be passed to or from other methods. #' #' @aliases approxCountDistinct,Column-method #' @export @@ -2127,7 +2131,7 @@ setMethod("approxCountDistinct", column(jc) }) -#' Count Distinct +#' Count Distinct Values #' #' @param x Column to compute on #' @param ... other columns @@ -2156,7 +2160,7 @@ setMethod("countDistinct", #' concat #' #' Concatenates multiple input string columns together into a single string column. -#' +#' #' @param x Column to compute on #' @param ... other columns #' @@ -2246,7 +2250,6 @@ setMethod("ceiling", }) #' @rdname sign -#' @param x Column to compute on #' #' @name sign #' @aliases sign,Column-method @@ -2262,9 +2265,6 @@ setMethod("sign", signature(x = "Column"), #' #' Aggregate function: returns the number of distinct items in a group. #' -#' @param x Column to compute on -#' @param ... other columns -#' #' @rdname countDistinct #' @name n_distinct #' @aliases n_distinct,Column-method @@ -2276,9 +2276,7 @@ setMethod("n_distinct", signature(x = "Column"), countDistinct(x, ...) }) -#' @rdname nrow -#' @param x Column to compute on -#' +#' @rdname count #' @name n #' @aliases n,Column-method #' @export @@ -2300,8 +2298,8 @@ setMethod("n", signature(x = "Column"), #' NOTE: Use when ever possible specialized functions like \code{year}. These benefit from a #' specialized implementation. #' -#' @param y Column to compute on -#' @param x date format specification +#' @param y Column to compute on. +#' @param x date format specification. #' #' @family datetime_funcs #' @rdname date_format @@ -2320,8 +2318,8 @@ setMethod("date_format", signature(y = "Column", x = "character"), #' #' Assumes given timestamp is UTC and converts to given timezone. #' -#' @param y Column to compute on -#' @param x time zone to use +#' @param y Column to compute on. +#' @param x time zone to use. #' #' @family datetime_funcs #' @rdname from_utc_timestamp @@ -2370,8 +2368,8 @@ setMethod("instr", signature(y = "Column", x = "character"), #' Day of the week parameter is case insensitive, and accepts first three or two characters: #' "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun". #' -#' @param y Column to compute on -#' @param x Day of the week string +#' @param y Column to compute on. +#' @param x Day of the week string. #' #' @family datetime_funcs #' @rdname next_day @@ -2637,6 +2635,7 @@ setMethod("conv", signature(x = "Column", fromBase = "numeric", toBase = "numeri #' Parses the expression string into the column that it represents, similar to #' SparkDataFrame.selectExpr #' +#' @param x an expression character object to be parsed. #' @family normal_funcs #' @rdname expr #' @aliases expr,character-method @@ -2654,6 +2653,9 @@ setMethod("expr", signature(x = "character"), #' #' Formats the arguments in printf-style and returns the result as a string column. #' +#' @param format a character object of format strings. +#' @param x a Column. +#' @param ... additional Column(s). #' @family string_funcs #' @rdname format_string #' @name format_string @@ -2676,6 +2678,11 @@ setMethod("format_string", signature(format = "character", x = "Column"), #' representing the timestamp of that moment in the current system time zone in the given #' format. #' +#' @param x a Column of unix timestamp. +#' @param format the target format. See +#' \href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{ +#' Customizing Formats} for available options. +#' @param ... further arguments to be passed to or from other methods. #' @family datetime_funcs #' @rdname from_unixtime #' @name from_unixtime @@ -2702,19 +2709,21 @@ setMethod("from_unixtime", signature(x = "Column"), #' [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in #' the order of months are not supported. #' -#' The time column must be of TimestampType. -#' -#' Durations are provided as strings, e.g. '1 second', '1 day 12 hours', '2 minutes'. Valid -#' interval strings are 'week', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond'. -#' If the `slideDuration` is not provided, the windows will be tumbling windows. -#' -#' The startTime is the offset with respect to 1970-01-01 00:00:00 UTC with which to start -#' window intervals. For example, in order to have hourly tumbling windows that start 15 minutes -#' past the hour, e.g. 12:15-13:15, 13:15-14:15... provide `startTime` as `15 minutes`. -#' -#' The output column will be a struct called 'window' by default with the nested columns 'start' -#' and 'end'. -#' +#' @param x a time Column. Must be of TimestampType. +#' @param windowDuration a string specifying the width of the window, e.g. '1 second', +#' '1 day 12 hours', '2 minutes'. Valid interval strings are 'week', +#' 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond'. +#' @param slideDuration a string specifying the sliding interval of the window. Same format as +#' \code{windowDuration}. A new window will be generated every +#' \code{slideDuration}. Must be less than or equal to +#' the \code{windowDuration}. +#' @param startTime the offset with respect to 1970-01-01 00:00:00 UTC with which to start +#' window intervals. For example, in order to have hourly tumbling windows +#' that start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15... provide +#' \code{startTime} as \code{"15 minutes"}. +#' @param ... further arguments to be passed to or from other methods. +#' @return An output column of struct called 'window' by default with the nested columns 'start' +#' and 'end'. #' @family datetime_funcs #' @rdname window #' @name window @@ -2766,6 +2775,10 @@ setMethod("window", signature(x = "Column"), #' NOTE: The position is not zero based, but 1 based index, returns 0 if substr #' could not be found in str. #' +#' @param substr a character string to be matched. +#' @param str a Column where matches are sought for each entry. +#' @param pos start position of search. +#' @param ... further arguments to be passed to or from other methods. #' @family string_funcs #' @rdname locate #' @aliases locate,character,Column-method @@ -2785,6 +2798,9 @@ setMethod("locate", signature(substr = "character", str = "Column"), #' #' Left-pad the string column with #' +#' @param x the string Column to be left-padded. +#' @param len maximum length of each output result. +#' @param pad a character string to be padded with. #' @family string_funcs #' @rdname lpad #' @aliases lpad,Column,numeric,character-method @@ -2804,6 +2820,7 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"), #' #' Generate a random column with i.i.d. samples from U[0.0, 1.0]. #' +#' @param seed a random seed. Can be missing. #' @family normal_funcs #' @rdname rand #' @name rand @@ -2832,6 +2849,7 @@ setMethod("rand", signature(seed = "numeric"), #' #' Generate a column with i.i.d. samples from the standard normal distribution. #' +#' @param seed a random seed. Can be missing. #' @family normal_funcs #' @rdname randn #' @name randn @@ -2860,6 +2878,9 @@ setMethod("randn", signature(seed = "numeric"), #' #' Extract a specific(idx) group identified by a java regex, from the specified string column. #' +#' @param x a string Column. +#' @param pattern a regular expression. +#' @param idx a group index. #' @family string_funcs #' @rdname regexp_extract #' @name regexp_extract @@ -2880,6 +2901,9 @@ setMethod("regexp_extract", #' #' Replace all substrings of the specified string value that match regexp with rep. #' +#' @param x a string Column. +#' @param pattern a regular expression. +#' @param replacement a character string that a matched \code{pattern} is replaced with. #' @family string_funcs #' @rdname regexp_replace #' @name regexp_replace @@ -2900,6 +2924,9 @@ setMethod("regexp_replace", #' #' Right-padded with pad to a length of len. #' +#' @param x the string Column to be right-padded. +#' @param len maximum length of each output result. +#' @param pad a character string to be padded with. #' @family string_funcs #' @rdname rpad #' @name rpad @@ -2922,6 +2949,11 @@ setMethod("rpad", signature(x = "Column", len = "numeric", pad = "character"), #' returned. If count is negative, every to the right of the final delimiter (counting from the #' right) is returned. substring_index performs a case-sensitive match when searching for delim. #' +#' @param x a Column. +#' @param delim a delimiter string. +#' @param count number of occurrences of \code{delim} before the substring is returned. +#' A positive number means counting from the left, while negative means +#' counting from the right. #' @family string_funcs #' @rdname substring_index #' @aliases substring_index,Column,character,numeric-method @@ -2949,6 +2981,11 @@ setMethod("substring_index", #' The translate will happen when any character in the string matching with the character #' in the matchingString. #' +#' @param x a string Column. +#' @param matchingString a source string where each character will be translated. +#' @param replaceString a target string where each \code{matchingString} character will +#' be replaced by the character in \code{replaceString} +#' at the same location, if any. #' @family string_funcs #' @rdname translate #' @name translate @@ -2997,6 +3034,10 @@ setMethod("unix_timestamp", signature(x = "Column", format = "missing"), column(jc) }) +#' @param x a Column of date, in string, date or timestamp type. +#' @param format the target format. See +#' \href{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}{ +#' Customizing Formats} for available options. #' @rdname unix_timestamp #' @name unix_timestamp #' @aliases unix_timestamp,Column,character-method @@ -3012,6 +3053,8 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"), #' Evaluates a list of conditions and returns one of multiple possible result expressions. #' For unmatched expressions null is returned. #' +#' @param condition the condition to test on. Must be a Column expression. +#' @param value result expression. #' @family normal_funcs #' @rdname when #' @name when @@ -3033,6 +3076,9 @@ setMethod("when", signature(condition = "Column", value = "ANY"), #' Evaluates a list of conditions and returns \code{yes} if the conditions are satisfied. #' Otherwise \code{no} is returned for unmatched conditions. #' +#' @param test a Column expression that describes the condition. +#' @param yes return values for \code{TRUE} elements of test. +#' @param no return values for \code{FALSE} elements of test. #' @family normal_funcs #' @rdname ifelse #' @name ifelse @@ -3074,10 +3120,14 @@ setMethod("ifelse", #' @family window_funcs #' @aliases cume_dist,missing-method #' @export -#' @examples \dontrun{cume_dist()} +#' @examples \dontrun{ +#' df <- createDataFrame(iris) +#' ws <- orderBy(windowPartitionBy("Species"), "Sepal_Length") +#' out <- select(df, over(cume_dist(), ws), df$Sepal_Length, df$Species) +#' } #' @note cume_dist since 1.6.0 setMethod("cume_dist", - signature(x = "missing"), + signature("missing"), function() { jc <- callJStatic("org.apache.spark.sql.functions", "cume_dist") column(jc) @@ -3101,7 +3151,7 @@ setMethod("cume_dist", #' @examples \dontrun{dense_rank()} #' @note dense_rank since 1.6.0 setMethod("dense_rank", - signature(x = "missing"), + signature("missing"), function() { jc <- callJStatic("org.apache.spark.sql.functions", "dense_rank") column(jc) @@ -3115,6 +3165,11 @@ setMethod("dense_rank", #' #' This is equivalent to the LAG function in SQL. #' +#' @param x the column as a character string or a Column to compute on. +#' @param offset the number of rows back from the current row from which to obtain a value. +#' If not specified, the default is 1. +#' @param defaultValue default to use when the offset row does not exist. +#' @param ... further arguments to be passed to or from other methods. #' @rdname lag #' @name lag #' @aliases lag,characterOrColumn-method @@ -3143,7 +3198,7 @@ setMethod("lag", #' an `offset` of one will return the next row at any given point in the window partition. #' #' This is equivalent to the LEAD function in SQL. -#' +#' #' @param x Column to compute on #' @param offset Number of rows to offset #' @param defaultValue (Optional) default value to use @@ -3211,7 +3266,7 @@ setMethod("ntile", #' @examples \dontrun{percent_rank()} #' @note percent_rank since 1.6.0 setMethod("percent_rank", - signature(x = "missing"), + signature("missing"), function() { jc <- callJStatic("org.apache.spark.sql.functions", "percent_rank") column(jc) @@ -3243,6 +3298,8 @@ setMethod("rank", }) # Expose rank() in the R base package +#' @param x a numeric, complex, character or logical vector. +#' @param ... additional argument(s) passed to the method. #' @name rank #' @rdname rank #' @aliases rank,ANY-method @@ -3267,7 +3324,7 @@ setMethod("rank", #' @examples \dontrun{row_number()} #' @note row_number since 1.6.0 setMethod("row_number", - signature(x = "missing"), + signature("missing"), function() { jc <- callJStatic("org.apache.spark.sql.functions", "row_number") column(jc) @@ -3318,7 +3375,7 @@ setMethod("explode", #' size #' #' Returns length of array or map. -#' +#' #' @param x Column to compute on #' #' @rdname size http://git-wip-us.apache.org/repos/asf/spark/blob/01401e96/R/pkg/R/generics.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 693aa31..6610a25 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -380,6 +380,9 @@ setGeneric("value", function(bcast) { standardGeneric("value") }) #################### SparkDataFrame Methods ######################## +#' @param x a SparkDataFrame or GroupedData. +#' @param ... further arguments to be passed to or from other methods. +#' @return A SparkDataFrame. #' @rdname summarize #' @export setGeneric("agg", function (x, ...) { standardGeneric("agg") }) @@ -407,6 +410,8 @@ setGeneric("cache", function(x) { standardGeneric("cache") }) #' @export setGeneric("collect", function(x, ...) { standardGeneric("collect") }) +#' @param do.NULL currently not used. +#' @param prefix currently not used. #' @rdname columns #' @export setGeneric("colnames", function(x, do.NULL = TRUE, prefix = "col") { standardGeneric("colnames") }) @@ -427,15 +432,24 @@ setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") }) #' @export setGeneric("columns", function(x) {standardGeneric("columns") }) -#' @rdname nrow +#' @param x a GroupedData or Column. +#' @rdname count #' @export setGeneric("count", function(x) { standardGeneric("count") }) #' @rdname cov +#' @param x a Column object or a SparkDataFrame. +#' @param ... additional argument(s). If `x` is a Column object, a Column object +#' should be provided. If `x` is a SparkDataFrame, two column names should +#' be provided. #' @export setGeneric("cov", function(x, ...) {standardGeneric("cov") }) #' @rdname corr +#' @param x a Column object or a SparkDataFrame. +#' @param ... additional argument(s). If `x` is a Column object, a Column object +#' should be provided. If `x` is a SparkDataFrame, two column names should +#' be provided. #' @export setGeneric("corr", function(x, ...) {standardGeneric("corr") }) @@ -462,10 +476,14 @@ setGeneric("dapply", function(x, func, schema) { standardGeneric("dapply") }) #' @export setGeneric("dapplyCollect", function(x, func) { standardGeneric("dapplyCollect") }) +#' @param x a SparkDataFrame or GroupedData. +#' @param ... additional argument(s) passed to the method. #' @rdname gapply #' @export setGeneric("gapply", function(x, ...) { standardGeneric("gapply") }) +#' @param x a SparkDataFrame or GroupedData. +#' @param ... additional argument(s) passed to the method. #' @rdname gapplyCollect #' @export setGeneric("gapplyCollect", function(x, ...) { standardGeneric("gapplyCollect") }) @@ -667,8 +685,8 @@ setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr") #' @export setGeneric("showDF", function(x, ...) { standardGeneric("showDF") }) -# @rdname subset -# @export +#' @rdname subset +#' @export setGeneric("subset", function(x, ...) { standardGeneric("subset") }) #' @rdname summarize @@ -735,6 +753,8 @@ setGeneric("between", function(x, bounds) { standardGeneric("between") }) setGeneric("cast", function(x, dataType) { standardGeneric("cast") }) #' @rdname columnfunctions +#' @param x a Column object. +#' @param ... additional argument(s). #' @export setGeneric("contains", function(x, ...) { standardGeneric("contains") }) @@ -830,6 +850,8 @@ setGeneric("array_contains", function(x, value) { standardGeneric("array_contain #' @export setGeneric("ascii", function(x) { standardGeneric("ascii") }) +#' @param x Column to compute on or a GroupedData object. +#' @param ... additional argument(s) when `x` is a GroupedData object. #' @rdname avg #' @export setGeneric("avg", function(x, ...) { standardGeneric("avg") }) @@ -886,9 +908,10 @@ setGeneric("crc32", function(x) { standardGeneric("crc32") }) #' @export setGeneric("hash", function(x, ...) { standardGeneric("hash") }) +#' @param x empty. Should be used with no argument. #' @rdname cume_dist #' @export -setGeneric("cume_dist", function(x) { standardGeneric("cume_dist") }) +setGeneric("cume_dist", function(x = "missing") { standardGeneric("cume_dist") }) #' @rdname datediff #' @export @@ -918,9 +941,10 @@ setGeneric("dayofyear", function(x) { standardGeneric("dayofyear") }) #' @export setGeneric("decode", function(x, charset) { standardGeneric("decode") }) +#' @param x empty. Should be used with no argument. #' @rdname dense_rank #' @export -setGeneric("dense_rank", function(x) { standardGeneric("dense_rank") }) +setGeneric("dense_rank", function(x = "missing") { standardGeneric("dense_rank") }) #' @rdname encode #' @export @@ -1034,10 +1058,11 @@ setGeneric("md5", function(x) { standardGeneric("md5") }) #' @export setGeneric("minute", function(x) { standardGeneric("minute") }) +#' @param x empty. Should be used with no argument. #' @rdname monotonically_increasing_id #' @export setGeneric("monotonically_increasing_id", - function(x) { standardGeneric("monotonically_increasing_id") }) + function(x = "missing") { standardGeneric("monotonically_increasing_id") }) #' @rdname month #' @export @@ -1047,7 +1072,7 @@ setGeneric("month", function(x) { standardGeneric("month") }) #' @export setGeneric("months_between", function(y, x) { standardGeneric("months_between") }) -#' @rdname nrow +#' @rdname count #' @export setGeneric("n", function(x) { standardGeneric("n") }) @@ -1071,9 +1096,10 @@ setGeneric("ntile", function(x) { standardGeneric("ntile") }) #' @export setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") }) +#' @param x empty. Should be used with no argument. #' @rdname percent_rank #' @export -setGeneric("percent_rank", function(x) { standardGeneric("percent_rank") }) +setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_rank") }) #' @rdname pmod #' @export @@ -1114,11 +1140,12 @@ setGeneric("reverse", function(x) { standardGeneric("reverse") }) #' @rdname rint #' @export -setGeneric("rint", function(x, ...) { standardGeneric("rint") }) +setGeneric("rint", function(x) { standardGeneric("rint") }) +#' @param x empty. Should be used with no argument. #' @rdname row_number #' @export -setGeneric("row_number", function(x) { standardGeneric("row_number") }) +setGeneric("row_number", function(x = "missing") { standardGeneric("row_number") }) #' @rdname rpad #' @export @@ -1176,9 +1203,10 @@ setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array") #' @export setGeneric("soundex", function(x) { standardGeneric("soundex") }) +#' @param x empty. Should be used with no argument. #' @rdname spark_partition_id #' @export -setGeneric("spark_partition_id", function(x) { standardGeneric("spark_partition_id") }) +setGeneric("spark_partition_id", function(x = "missing") { standardGeneric("spark_partition_id") }) #' @rdname sd #' @export @@ -1276,10 +1304,16 @@ setGeneric("year", function(x) { standardGeneric("year") }) #' @export setGeneric("spark.glm", function(data, formula, ...) { standardGeneric("spark.glm") }) +#' @param x,y For \code{glm}: logical values indicating whether the response vector +#' and model matrix used in the fitting process should be returned as +#' components of the returned value. +#' @inheritParams stats::glm #' @rdname glm #' @export setGeneric("glm") +#' @param object a fitted ML model object. +#' @param ... additional argument(s) passed to the method. #' @rdname predict #' @export setGeneric("predict", function(object, ...) { standardGeneric("predict") }) @@ -1302,7 +1336,7 @@ setGeneric("spark.naiveBayes", function(data, formula, ...) { standardGeneric("s #' @rdname spark.survreg #' @export -setGeneric("spark.survreg", function(data, formula, ...) { standardGeneric("spark.survreg") }) +setGeneric("spark.survreg", function(data, formula) { standardGeneric("spark.survreg") }) #' @rdname spark.lda #' @param ... Additional parameters to tune LDA. @@ -1328,7 +1362,9 @@ setGeneric("spark.gaussianMixture", standardGeneric("spark.gaussianMixture") }) -#' write.ml +#' @param object a fitted ML model object. +#' @param path the directory where the model is saved. +#' @param ... additional argument(s) passed to the method. #' @rdname write.ml #' @export setGeneric("write.ml", function(object, path, ...) { standardGeneric("write.ml") }) http://git-wip-us.apache.org/repos/asf/spark/blob/01401e96/R/pkg/R/group.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index 85348ae..3c85ada 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -59,8 +59,7 @@ setMethod("show", "GroupedData", #' Count the number of rows for each group. #' The resulting SparkDataFrame will also contain the grouping columns. #' -#' @param x a GroupedData -#' @return a SparkDataFrame +#' @return A SparkDataFrame. #' @rdname count #' @aliases count,GroupedData-method #' @export @@ -83,8 +82,6 @@ setMethod("count", #' df2 <- agg(df, <column> = <aggFunction>) #' df2 <- agg(df, newColName = aggFunction(column)) #' -#' @param x a GroupedData -#' @return a SparkDataFrame #' @rdname summarize #' @aliases agg,GroupedData-method #' @name agg @@ -201,7 +198,6 @@ createMethods() #' gapply #' -#' @param x A GroupedData #' @rdname gapply #' @aliases gapply,GroupedData-method #' @name gapply @@ -216,7 +212,6 @@ setMethod("gapply", #' gapplyCollect #' -#' @param x A GroupedData #' @rdname gapplyCollect #' @aliases gapplyCollect,GroupedData-method #' @name gapplyCollect http://git-wip-us.apache.org/repos/asf/spark/blob/01401e96/R/pkg/R/mllib.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R index 36f38fc..9a53c80 100644 --- a/R/pkg/R/mllib.R +++ b/R/pkg/R/mllib.R @@ -113,17 +113,18 @@ NULL #' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models. #' -#' @param data SparkDataFrame for training. -#' @param formula A symbolic description of the model to be fitted. Currently only a few formula +#' @param data a SparkDataFrame for training. +#' @param formula a symbolic description of the model to be fitted. Currently only a few formula #' operators are supported, including '~', '.', ':', '+', and '-'. -#' @param family A description of the error distribution and link function to be used in the model. +#' @param family a description of the error distribution and link function to be used in the model. #' This can be a character string naming a family function, a family function or #' the result of a call to a family function. Refer R family at #' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. -#' @param tol Positive convergence tolerance of iterations. -#' @param maxIter Integer giving the maximal number of IRLS iterations. -#' @param weightCol The weight column name. If this is not set or NULL, we treat all instance +#' @param weightCol the weight column name. If this is not set or NULL, we treat all instance #' weights as 1.0. +#' @param tol positive convergence tolerance of iterations. +#' @param maxIter integer giving the maximal number of IRLS iterations. +#' @param ... additional arguments passed to the method. #' @aliases spark.glm,SparkDataFrame,formula-method #' @return \code{spark.glm} returns a fitted generalized linear model #' @rdname spark.glm @@ -178,17 +179,17 @@ setMethod("spark.glm", signature(data = "SparkDataFrame", formula = "formula"), #' Generalized Linear Models (R-compliant) #' #' Fits a generalized linear model, similarly to R's glm(). -#' @param formula A symbolic description of the model to be fitted. Currently only a few formula +#' @param formula a symbolic description of the model to be fitted. Currently only a few formula #' operators are supported, including '~', '.', ':', '+', and '-'. -#' @param data SparkDataFrame for training. -#' @param family A description of the error distribution and link function to be used in the model. +#' @param data a SparkDataFrame or R's glm data for training. +#' @param family a description of the error distribution and link function to be used in the model. #' This can be a character string naming a family function, a family function or #' the result of a call to a family function. Refer R family at #' \url{https://stat.ethz.ch/R-manual/R-devel/library/stats/html/family.html}. -#' @param epsilon Positive convergence tolerance of iterations. -#' @param maxit Integer giving the maximal number of IRLS iterations. -#' @param weightCol The weight column name. If this is not set or NULL, we treat all instance +#' @param weightCol the weight column name. If this is not set or NULL, we treat all instance #' weights as 1.0. +#' @param epsilon positive convergence tolerance of iterations. +#' @param maxit integer giving the maximal number of IRLS iterations. #' @return \code{glm} returns a fitted generalized linear model. #' @rdname glm #' @export @@ -209,7 +210,7 @@ setMethod("glm", signature(formula = "formula", family = "ANY", data = "SparkDat # Returns the summary of a model produced by glm() or spark.glm(), similarly to R's summary(). -#' @param object A fitted generalized linear model +#' @param object a fitted generalized linear model. #' @return \code{summary} returns a summary object of the fitted model, a list of components #' including at least the coefficients, null/residual deviance, null/residual degrees #' of freedom, AIC and number of iterations IRLS takes. @@ -250,7 +251,7 @@ setMethod("summary", signature(object = "GeneralizedLinearRegressionModel"), # Prints the summary of GeneralizedLinearRegressionModel #' @rdname spark.glm -#' @param x Summary object of fitted generalized linear model returned by \code{summary} function +#' @param x summary object of fitted generalized linear model returned by \code{summary} function #' @export #' @note print.summary.GeneralizedLinearRegressionModel since 2.0.0 print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { @@ -282,7 +283,7 @@ print.summary.GeneralizedLinearRegressionModel <- function(x, ...) { # Makes predictions from a generalized linear model produced by glm() or spark.glm(), # similarly to R's predict(). -#' @param newData SparkDataFrame for testing +#' @param newData a SparkDataFrame for testing. #' @return \code{predict} returns a SparkDataFrame containing predicted labels in a column named #' "prediction" #' @rdname spark.glm @@ -296,7 +297,7 @@ setMethod("predict", signature(object = "GeneralizedLinearRegressionModel"), # Makes predictions from a naive Bayes model or a model produced by spark.naiveBayes(), # similarly to R package e1071's predict. -#' @param newData A SparkDataFrame for testing +#' @param newData a SparkDataFrame for testing. #' @return \code{predict} returns a SparkDataFrame containing predicted labeled in a column named #' "prediction" #' @rdname spark.naiveBayes @@ -309,9 +310,9 @@ setMethod("predict", signature(object = "NaiveBayesModel"), # Returns the summary of a naive Bayes model produced by \code{spark.naiveBayes} -#' @param object A naive Bayes model fitted by \code{spark.naiveBayes} +#' @param object a naive Bayes model fitted by \code{spark.naiveBayes}. #' @return \code{summary} returns a list containing \code{apriori}, the label distribution, and -#' \code{tables}, conditional probabilities given the target label +#' \code{tables}, conditional probabilities given the target label. #' @rdname spark.naiveBayes #' @export #' @note summary(NaiveBayesModel) since 2.0.0 @@ -491,7 +492,6 @@ setMethod("predict", signature(object = "IsotonicRegressionModel"), # Get the summary of an IsotonicRegressionModel model -#' @param object a fitted IsotonicRegressionModel #' @param ... Other optional arguments to summary of an IsotonicRegressionModel #' @return \code{summary} returns the model's boundaries and prediction as lists #' @rdname spark.isoreg @@ -512,14 +512,15 @@ setMethod("summary", signature(object = "IsotonicRegressionModel"), #' Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models. #' -#' @param data SparkDataFrame for training -#' @param formula A symbolic description of the model to be fitted. Currently only a few formula +#' @param data a SparkDataFrame for training. +#' @param formula a symbolic description of the model to be fitted. Currently only a few formula #' operators are supported, including '~', '.', ':', '+', and '-'. #' Note that the response variable of formula is empty in spark.kmeans. -#' @param k Number of centers -#' @param maxIter Maximum iteration number -#' @param initMode The initialization algorithm choosen to fit the model -#' @return \code{spark.kmeans} returns a fitted k-means model +#' @param k number of centers. +#' @param maxIter maximum iteration number. +#' @param initMode the initialization algorithm choosen to fit the model. +#' @param ... additional argument(s) passed to the method. +#' @return \code{spark.kmeans} returns a fitted k-means model. #' @rdname spark.kmeans #' @aliases spark.kmeans,SparkDataFrame,formula-method #' @name spark.kmeans @@ -560,8 +561,11 @@ setMethod("spark.kmeans", signature(data = "SparkDataFrame", formula = "formula" #' Get fitted result from a k-means model, similarly to R's fitted(). #' Note: A saved-loaded model does not support this method. #' -#' @param object A fitted k-means model -#' @return \code{fitted} returns a SparkDataFrame containing fitted values +#' @param object a fitted k-means model. +#' @param method type of fitted results, \code{"centers"} for cluster centers +#' or \code{"classes"} for assigned classes. +#' @param ... additional argument(s) passed to the method. +#' @return \code{fitted} returns a SparkDataFrame containing fitted values. #' @rdname fitted #' @export #' @examples @@ -585,8 +589,8 @@ setMethod("fitted", signature(object = "KMeansModel"), # Get the summary of a k-means model -#' @param object A fitted k-means model -#' @return \code{summary} returns the model's coefficients, size and cluster +#' @param object a fitted k-means model. +#' @return \code{summary} returns the model's coefficients, size and cluster. #' @rdname spark.kmeans #' @export #' @note summary(KMeansModel) since 2.0.0 @@ -612,7 +616,8 @@ setMethod("summary", signature(object = "KMeansModel"), # Predicted values based on a k-means model -#' @return \code{predict} returns the predicted values based on a k-means model +#' @param newData a SparkDataFrame for testing. +#' @return \code{predict} returns the predicted values based on a k-means model. #' @rdname spark.kmeans #' @export #' @note predict(KMeansModel) since 2.0.0 @@ -628,11 +633,12 @@ setMethod("predict", signature(object = "KMeansModel"), #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models. #' Only categorical data is supported. #' -#' @param data A \code{SparkDataFrame} of observations and labels for model fitting -#' @param formula A symbolic description of the model to be fitted. Currently only a few formula +#' @param data a \code{SparkDataFrame} of observations and labels for model fitting. +#' @param formula a symbolic description of the model to be fitted. Currently only a few formula #' operators are supported, including '~', '.', ':', '+', and '-'. -#' @param smoothing Smoothing parameter -#' @return \code{spark.naiveBayes} returns a fitted naive Bayes model +#' @param smoothing smoothing parameter. +#' @param ... additional argument(s) passed to the method. Currently only \code{smoothing}. +#' @return \code{spark.naiveBayes} returns a fitted naive Bayes model. #' @rdname spark.naiveBayes #' @aliases spark.naiveBayes,SparkDataFrame,formula-method #' @name spark.naiveBayes @@ -668,8 +674,8 @@ setMethod("spark.naiveBayes", signature(data = "SparkDataFrame", formula = "form # Saves the Bernoulli naive Bayes model to the input path. -#' @param path The directory where the model is saved -#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE +#' @param path the directory where the model is saved +#' @param overwrite overwrites or not if the output path already exists. Default is FALSE #' which means throw exception if the output path exists. #' #' @rdname spark.naiveBayes @@ -687,10 +693,9 @@ setMethod("write.ml", signature(object = "NaiveBayesModel", path = "character"), # Saves the AFT survival regression model to the input path. -#' @param path The directory where the model is saved -#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE +#' @param path the directory where the model is saved. +#' @param overwrite overwrites or not if the output path already exists. Default is FALSE #' which means throw exception if the output path exists. -#' #' @rdname spark.survreg #' @export #' @note write.ml(AFTSurvivalRegressionModel, character) since 2.0.0 @@ -706,8 +711,8 @@ setMethod("write.ml", signature(object = "AFTSurvivalRegressionModel", path = "c # Saves the generalized linear model to the input path. -#' @param path The directory where the model is saved -#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE +#' @param path the directory where the model is saved. +#' @param overwrite overwrites or not if the output path already exists. Default is FALSE #' which means throw exception if the output path exists. #' #' @rdname spark.glm @@ -724,8 +729,8 @@ setMethod("write.ml", signature(object = "GeneralizedLinearRegressionModel", pat # Save fitted MLlib model to the input path -#' @param path The directory where the model is saved -#' @param overwrite Overwrites or not if the output path already exists. Default is FALSE +#' @param path the directory where the model is saved. +#' @param overwrite overwrites or not if the output path already exists. Default is FALSE #' which means throw exception if the output path exists. #' #' @rdname spark.kmeans @@ -780,8 +785,8 @@ setMethod("write.ml", signature(object = "GaussianMixtureModel", path = "charact #' Load a fitted MLlib model from the input path. #' -#' @param path Path of the model to read. -#' @return a fitted MLlib model +#' @param path path of the model to read. +#' @return A fitted MLlib model. #' @rdname read.ml #' @name read.ml #' @export @@ -823,11 +828,11 @@ read.ml <- function(path) { #' \code{predict} to make predictions on new data, and \code{write.ml}/\code{read.ml} to #' save/load fitted models. #' -#' @param data A SparkDataFrame for training -#' @param formula A symbolic description of the model to be fitted. Currently only a few formula +#' @param data a SparkDataFrame for training. +#' @param formula a symbolic description of the model to be fitted. Currently only a few formula #' operators are supported, including '~', ':', '+', and '-'. -#' Note that operator '.' is not supported currently -#' @return \code{spark.survreg} returns a fitted AFT survival regression model +#' Note that operator '.' is not supported currently. +#' @return \code{spark.survreg} returns a fitted AFT survival regression model. #' @rdname spark.survreg #' @seealso survival: \url{https://cran.r-project.org/web/packages/survival/} #' @export @@ -851,7 +856,7 @@ read.ml <- function(path) { #' } #' @note spark.survreg since 2.0.0 setMethod("spark.survreg", signature(data = "SparkDataFrame", formula = "formula"), - function(data, formula, ...) { + function(data, formula) { formula <- paste(deparse(formula), collapse = "") jobj <- callJStatic("org.apache.spark.ml.r.AFTSurvivalRegressionWrapper", "fit", formula, data@sdf) @@ -927,14 +932,14 @@ setMethod("spark.lda", signature(data = "SparkDataFrame"), # Returns a summary of the AFT survival regression model produced by spark.survreg, # similarly to R's summary(). -#' @param object A fitted AFT survival regression model +#' @param object a fitted AFT survival regression model. #' @return \code{summary} returns a list containing the model's coefficients, #' intercept and log(scale) #' @rdname spark.survreg #' @export #' @note summary(AFTSurvivalRegressionModel) since 2.0.0 setMethod("summary", signature(object = "AFTSurvivalRegressionModel"), - function(object, ...) { + function(object) { jobj <- object@jobj features <- callJMethod(jobj, "rFeatures") coefficients <- callJMethod(jobj, "rCoefficients") @@ -947,9 +952,9 @@ setMethod("summary", signature(object = "AFTSurvivalRegressionModel"), # Makes predictions from an AFT survival regression model or a model produced by # spark.survreg, similarly to R package survival's predict. -#' @param newData A SparkDataFrame for testing +#' @param newData a SparkDataFrame for testing. #' @return \code{predict} returns a SparkDataFrame containing predicted values -#' on the original scale of the data (mean predicted value at scale = 1.0) +#' on the original scale of the data (mean predicted value at scale = 1.0). #' @rdname spark.survreg #' @export #' @note predict(AFTSurvivalRegressionModel) since 2.0.0 http://git-wip-us.apache.org/repos/asf/spark/blob/01401e96/R/pkg/R/schema.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R index b429f5d..cb5bdb9 100644 --- a/R/pkg/R/schema.R +++ b/R/pkg/R/schema.R @@ -92,8 +92,9 @@ print.structType <- function(x, ...) { #' #' Create a structField object that contains the metadata for a single field in a schema. #' -#' @param x The name of the field -#' @return a structField object +#' @param x the name of the field. +#' @param ... additional argument(s) passed to the method. +#' @return A structField object. #' @rdname structField #' @export #' @examples http://git-wip-us.apache.org/repos/asf/spark/blob/01401e96/R/pkg/R/sparkR.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index f8bdee7..85815af 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -320,14 +320,15 @@ sparkRHive.init <- function(jsc = NULL) { #' For details on how to initialize and use SparkR, refer to SparkR programming guide at #' \url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}. #' -#' @param master The Spark master URL -#' @param appName Application name to register with cluster manager -#' @param sparkHome Spark Home directory -#' @param sparkConfig Named list of Spark configuration to set on worker nodes -#' @param sparkJars Character vector of jar files to pass to the worker nodes -#' @param sparkPackages Character vector of packages from spark-packages.org -#' @param enableHiveSupport Enable support for Hive, fallback if not built with Hive support; once +#' @param master the Spark master URL. +#' @param appName application name to register with cluster manager. +#' @param sparkHome Spark Home directory. +#' @param sparkConfig named list of Spark configuration to set on worker nodes. +#' @param sparkJars character vector of jar files to pass to the worker nodes. +#' @param sparkPackages character vector of packages from spark-packages.org +#' @param enableHiveSupport enable support for Hive, fallback if not built with Hive support; once #' set, this cannot be turned off on an existing session +#' @param ... named Spark properties passed to the method. #' @export #' @examples #'\dontrun{ @@ -413,9 +414,9 @@ sparkR.session <- function( #' Assigns a group ID to all the jobs started by this thread until the group ID is set to a #' different value or cleared. #' -#' @param groupid the ID to be assigned to job groups -#' @param description description for the job group ID -#' @param interruptOnCancel flag to indicate if the job is interrupted on job cancellation +#' @param groupId the ID to be assigned to job groups. +#' @param description description for the job group ID. +#' @param interruptOnCancel flag to indicate if the job is interrupted on job cancellation. #' @rdname setJobGroup #' @name setJobGroup #' @examples http://git-wip-us.apache.org/repos/asf/spark/blob/01401e96/R/pkg/R/stats.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R index 2b4ce19..8ea24d8 100644 --- a/R/pkg/R/stats.R +++ b/R/pkg/R/stats.R @@ -25,6 +25,7 @@ setOldClass("jobj") #' table. The number of distinct values for each column should be less than 1e4. At most 1e6 #' non-zero pair frequencies will be returned. #' +#' @param x a SparkDataFrame #' @param col1 name of the first column. Distinct items will make the first item of each row. #' @param col2 name of the second column. Distinct items will make the column names of the output. #' @return a local R data.frame representing the contingency table. The first column of each row @@ -53,10 +54,9 @@ setMethod("crosstab", #' Calculate the sample covariance of two numerical columns of a SparkDataFrame. #' -#' @param x A SparkDataFrame -#' @param col1 the name of the first column -#' @param col2 the name of the second column -#' @return the covariance of the two columns. +#' @param colName1 the name of the first column +#' @param colName2 the name of the second column +#' @return The covariance of the two columns. #' #' @rdname cov #' @name cov @@ -71,19 +71,18 @@ setMethod("crosstab", #' @note cov since 1.6.0 setMethod("cov", signature(x = "SparkDataFrame"), - function(x, col1, col2) { - stopifnot(class(col1) == "character" && class(col2) == "character") + function(x, colName1, colName2) { + stopifnot(class(colName1) == "character" && class(colName2) == "character") statFunctions <- callJMethod(x@sdf, "stat") - callJMethod(statFunctions, "cov", col1, col2) + callJMethod(statFunctions, "cov", colName1, colName2) }) #' Calculates the correlation of two columns of a SparkDataFrame. #' Currently only supports the Pearson Correlation Coefficient. #' For Spearman Correlation, consider using RDD methods found in MLlib's Statistics. #' -#' @param x A SparkDataFrame -#' @param col1 the name of the first column -#' @param col2 the name of the second column +#' @param colName1 the name of the first column +#' @param colName2 the name of the second column #' @param method Optional. A character specifying the method for calculating the correlation. #' only "pearson" is allowed now. #' @return The Pearson Correlation Coefficient as a Double. @@ -102,10 +101,10 @@ setMethod("cov", #' @note corr since 1.6.0 setMethod("corr", signature(x = "SparkDataFrame"), - function(x, col1, col2, method = "pearson") { - stopifnot(class(col1) == "character" && class(col2) == "character") + function(x, colName1, colName2, method = "pearson") { + stopifnot(class(colName1) == "character" && class(colName2) == "character") statFunctions <- callJMethod(x@sdf, "stat") - callJMethod(statFunctions, "corr", col1, col2, method) + callJMethod(statFunctions, "corr", colName1, colName2, method) }) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org