spark git commit: [SPARKR][DOCS] R code doc cleanup

shivaram Mon, 20 Jun 2016 23:52:10 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 4e193d3da -> 38f3b76bd



[SPARKR][DOCS] R code doc cleanup

## What changes were proposed in this pull request?

I ran a full pass from A to Z and fixed the obvious duplications, improper 
grouping etc.

There are still more doc issues to be cleaned up.

## How was this patch tested?

manual tests

Author: Felix Cheung <felixcheun...@hotmail.com>

Closes #13798 from felixcheung/rdocseealso.

(cherry picked from commit 09f4ceaeb0a99874f774e09d868fdf907ecf256f)
Signed-off-by: Shivaram Venkataraman <shiva...@cs.berkeley.edu>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/38f3b76b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/38f3b76b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/38f3b76b

Branch: refs/heads/branch-2.0
Commit: 38f3b76bd6b4a3e4d20048beeb92275ebf93c8d8
Parents: 4e193d3
Author: Felix Cheung <felixcheun...@hotmail.com>
Authored: Mon Jun 20 23:51:08 2016 -0700
Committer: Shivaram Venkataraman <shiva...@cs.berkeley.edu>
Committed: Mon Jun 20 23:51:20 2016 -0700

----------------------------------------------------------------------
 R/pkg/R/DataFrame.R  | 39 ++++++++++++++++++---------------------
 R/pkg/R/SQLContext.R |  6 +++---
 R/pkg/R/column.R     |  6 ++++++
 R/pkg/R/context.R    |  5 +++--
 R/pkg/R/functions.R  | 40 +++++++++++++---------------------------
 R/pkg/R/generics.R   | 44 ++++++++++++++++++++++----------------------
 R/pkg/R/mllib.R      |  6 ------
 R/pkg/R/sparkR.R     |  8 +++++---
 8 files changed, 70 insertions(+), 84 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/38f3b76b/R/pkg/R/DataFrame.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index b3f2dd8..a8ade1a 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -463,6 +463,7 @@ setMethod("createOrReplaceTempView",
           })
 
 #' (Deprecated) Register Temporary Table
+#'
 #' Registers a SparkDataFrame as a Temporary Table in the SQLContext
 #' @param x A SparkDataFrame
 #' @param tableName A character vector containing the name of the table
@@ -606,10 +607,10 @@ setMethod("unpersist",
 #'
 #' The following options for repartition are possible:
 #' \itemize{
-#'  \item{"Option 1"} {Return a new SparkDataFrame partitioned by
+#'  \item{1.} {Return a new SparkDataFrame partitioned by
 #'                      the given columns into `numPartitions`.}
-#'  \item{"Option 2"} {Return a new SparkDataFrame that has exactly 
`numPartitions`.}
-#'  \item{"Option 3"} {Return a new SparkDataFrame partitioned by the given 
column(s),
+#'  \item{2.} {Return a new SparkDataFrame that has exactly `numPartitions`.}
+#'  \item{3.} {Return a new SparkDataFrame partitioned by the given column(s),
 #'                      using `spark.sql.shuffle.partitions` as number of 
partitions.}
 #'}
 #' @param x A SparkDataFrame
@@ -1053,7 +1054,7 @@ setMethod("limit",
             dataFrame(res)
           })
 
-#' Take the first NUM rows of a SparkDataFrame and return a the results as a 
data.frame
+#' Take the first NUM rows of a SparkDataFrame and return a the results as a R 
data.frame
 #'
 #' @family SparkDataFrame functions
 #' @rdname take
@@ -1076,7 +1077,7 @@ setMethod("take",
 
 #' Head
 #'
-#' Return the first NUM rows of a SparkDataFrame as a data.frame. If NUM is 
NULL,
+#' Return the first NUM rows of a SparkDataFrame as a R data.frame. If NUM is 
NULL,
 #' then head() returns the first 6 rows in keeping with the current data.frame
 #' convention in R.
 #'
@@ -1157,7 +1158,6 @@ setMethod("toRDD",
 #'
 #' @param x a SparkDataFrame
 #' @return a GroupedData
-#' @seealso GroupedData
 #' @family SparkDataFrame functions
 #' @rdname groupBy
 #' @name groupBy
@@ -1242,9 +1242,9 @@ dapplyInternal <- function(x, func, schema) {
 #'
 #' @param x A SparkDataFrame
 #' @param func A function to be applied to each partition of the 
SparkDataFrame.
-#'             func should have only one parameter, to which a data.frame 
corresponds
+#'             func should have only one parameter, to which a R data.frame 
corresponds
 #'             to each partition will be passed.
-#'             The output of func should be a data.frame.
+#'             The output of func should be a R data.frame.
 #' @param schema The schema of the resulting SparkDataFrame after the function 
is applied.
 #'               It must match the output of func.
 #' @family SparkDataFrame functions
@@ -1291,9 +1291,9 @@ setMethod("dapply",
 #'
 #' @param x A SparkDataFrame
 #' @param func A function to be applied to each partition of the 
SparkDataFrame.
-#'             func should have only one parameter, to which a data.frame 
corresponds
+#'             func should have only one parameter, to which a R data.frame 
corresponds
 #'             to each partition will be passed.
-#'             The output of func should be a data.frame.
+#'             The output of func should be a R data.frame.
 #' @family SparkDataFrame functions
 #' @rdname dapplyCollect
 #' @name dapplyCollect
@@ -1641,7 +1641,6 @@ setMethod("select", signature(x = "SparkDataFrame", col = 
"character"),
             }
           })
 
-#' @family SparkDataFrame functions
 #' @rdname select
 #' @export
 #' @note select(SparkDataFrame, Column) since 1.4.0
@@ -1654,7 +1653,6 @@ setMethod("select", signature(x = "SparkDataFrame", col = 
"Column"),
             dataFrame(sdf)
           })
 
-#' @family SparkDataFrame functions
 #' @rdname select
 #' @export
 #' @note select(SparkDataFrame, list) since 1.4.0
@@ -2001,7 +1999,6 @@ setMethod("filter",
             dataFrame(sdf)
           })
 
-#' @family SparkDataFrame functions
 #' @rdname filter
 #' @name where
 #' @note where since 1.4.0
@@ -2222,11 +2219,13 @@ setMethod("merge",
             joinRes
           })
 
+#' Creates a list of columns by replacing the intersected ones with aliases
+#'
 #' Creates a list of columns by replacing the intersected ones with aliases.
 #' The name of the alias column is formed by concatanating the original column 
name and a suffix.
 #'
-#' @param x a SparkDataFrame on which the
-#' @param intersectedColNames a list of intersected column names
+#' @param x a SparkDataFrame
+#' @param intersectedColNames a list of intersected column names of the 
SparkDataFrame
 #' @param suffix a suffix for the column name
 #' @return list of columns
 #'
@@ -2513,9 +2512,9 @@ setMethod("summary",
           })
 
 
-#' dropna
+#' A set of SparkDataFrame functions working with NA values
 #'
-#' Returns a new SparkDataFrame omitting rows with null values.
+#' dropna, na.omit - Returns a new SparkDataFrame omitting rows with null 
values.
 #'
 #' @param x A SparkDataFrame.
 #' @param how "any" or "all".
@@ -2567,9 +2566,7 @@ setMethod("na.omit",
             dropna(object, how, minNonNulls, cols)
           })
 
-#' fillna
-#'
-#' Replace null values.
+#' fillna - Replace null values.
 #'
 #' @param x A SparkDataFrame.
 #' @param value Value to replace null values with.
@@ -2640,7 +2637,7 @@ setMethod("fillna",
             dataFrame(sdf)
           })
 
-#' Download data from a SparkDataFrame into a data.frame
+#' Download data from a SparkDataFrame into a R data.frame
 #'
 #' This function downloads the contents of a SparkDataFrame into an R's 
data.frame.
 #' Since data.frames are held in memory, ensure that you have enough memory

http://git-wip-us.apache.org/repos/asf/spark/blob/38f3b76b/R/pkg/R/SQLContext.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 8d2c4ac..ee3a41c 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -67,7 +67,7 @@ dispatchFunc <- function(newFuncSig, x, ...) {
 }
 
 #' return the SparkSession
-#' @note getSparkSession since 2.0.0
+#' @noRd
 getSparkSession <- function() {
   if (exists(".sparkRsession", envir = .sparkREnv)) {
     get(".sparkRsession", envir = .sparkREnv)
@@ -77,7 +77,7 @@ getSparkSession <- function() {
 }
 
 #' infer the SQL type
-#' @note infer_type since 1.4.0
+#' @noRd
 infer_type <- function(x) {
   if (is.null(x)) {
     stop("can not infer type from NULL")
@@ -451,7 +451,7 @@ sql <- function(x, ...) {
 #' Create a SparkDataFrame from a SparkSQL Table
 #'
 #' Returns the specified Table as a SparkDataFrame.  The Table must have 
already been registered
-#' in the SQLContext.
+#' in the SparkSession.
 #'
 #' @param tableName The SparkSQL Table to convert to a SparkDataFrame.
 #' @return SparkDataFrame

http://git-wip-us.apache.org/repos/asf/spark/blob/38f3b76b/R/pkg/R/column.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 1af65d5..1a65912 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -34,6 +34,11 @@ setOldClass("jobj")
 setClass("Column",
          slots = list(jc = "jobj"))
 
+#' A set of operations working with SparkDataFrame columns
+#' @rdname columnfunctions
+#' @name columnfunctions
+NULL
+
 setMethod("initialize", "Column", function(.Object, jc) {
   .Object@jc <- jc
   .Object
@@ -47,6 +52,7 @@ setMethod("column",
 
 #' @rdname show
 #' @name show
+#' @export
 #' @note show(Column) since 1.4.0
 setMethod("show", "Column",
           function(object) {

http://git-wip-us.apache.org/repos/asf/spark/blob/38f3b76b/R/pkg/R/context.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 42f89c8..96ef943 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -225,9 +225,10 @@ setCheckpointDir <- function(sc, dirName) {
   invisible(callJMethod(sc, "setCheckpointDir", 
suppressWarnings(normalizePath(dirName))))
 }
 
-#' Run a function over a list of elements, distributing the computations with 
Spark.
+#' Run a function over a list of elements, distributing the computations with 
Spark
 #'
-#' Applies a function in a manner that is similar to doParallel or lapply to 
elements of a list.
+#' Run a function over a list of elements, distributing the computations with 
Spark. Applies a
+#' function in a manner that is similar to doParallel or lapply to elements of 
a list.
 #' The computations are distributed using Spark. It is conceptually the same 
as the following code:
 #'   lapply(list, func)
 #'

http://git-wip-us.apache.org/repos/asf/spark/blob/38f3b76b/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index ce23869..6e0009f 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -77,13 +77,14 @@ setMethod("acos",
             column(jc)
           })
 
-#' approxCountDistinct
+#' Returns the approximate number of distinct items in a group
 #'
-#' Aggregate function: returns the approximate number of distinct items in a 
group.
+#' Returns the approximate number of distinct items in a group. This is a 
column
+#' aggregate function.
 #'
 #' @rdname approxCountDistinct
 #' @name approxCountDistinct
-#' @family agg_funcs
+#' @return the approximate number of distinct items in a group.
 #' @export
 #' @examples \dontrun{approxCountDistinct(df$c)}
 #' @note approxCountDistinct(Column) since 1.4.0
@@ -234,7 +235,7 @@ setMethod("cbrt",
             column(jc)
           })
 
-#' ceil
+#' Computes the ceiling of the given value
 #'
 #' Computes the ceiling of the given value.
 #'
@@ -254,15 +255,16 @@ setMethod("ceil",
 #' Though scala functions has "col" function, we don't expose it in SparkR
 #' because we don't want to conflict with the "col" function in the R base
 #' package and we also have "column" function exported which is an alias of 
"col".
+#' @noRd
 col <- function(x) {
   column(callJStatic("org.apache.spark.sql.functions", "col", x))
 }
 
-#' column
+#' Returns a Column based on the given column name
 #'
 #' Returns a Column based on the given column name.
 #'
-#' @rdname col
+#' @rdname column
 #' @name column
 #' @family normal_funcs
 #' @export
@@ -385,9 +387,9 @@ setMethod("cosh",
             column(jc)
           })
 
-#' count
+#' Returns the number of items in a group
 #'
-#' Aggregate function: returns the number of items in a group.
+#' Returns the number of items in a group. This is a column aggregate function.
 #'
 #' @rdname count
 #' @name count
@@ -1193,7 +1195,7 @@ setMethod("sha1",
 #'
 #' Computes the signum of the given value.
 #'
-#' @rdname signum
+#' @rdname sign
 #' @name signum
 #' @family math_funcs
 #' @export
@@ -1717,7 +1719,7 @@ setMethod("datediff", signature(y = "Column"),
 
 #' hypot
 #'
-#' Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
+#' Computes "sqrt(a^2 + b^2)" without intermediate overflow or underflow.
 #'
 #' @rdname hypot
 #' @name hypot
@@ -1813,12 +1815,8 @@ setMethod("pmod", signature(y = "Column"),
           })
 
 
-#' Approx Count Distinct
-#'
-#' @family agg_funcs
 #' @rdname approxCountDistinct
 #' @name approxCountDistinct
-#' @return the approximate number of distinct items in a group.
 #' @export
 #' @examples \dontrun{approxCountDistinct(df$c, 0.02)}
 #' @note approxCountDistinct(Column, numeric) since 1.4.0
@@ -1918,10 +1916,6 @@ setMethod("least",
             column(jc)
           })
 
-#' ceiling
-#'
-#' Computes the ceiling of the given value.
-#'
 #' @rdname ceil
 #' @name ceiling
 #' @export
@@ -1933,11 +1927,7 @@ setMethod("ceiling",
             ceil(x)
           })
 
-#' sign
-#'
-#' Computes the signum of the given value.
-#'
-#' @rdname signum
+#' @rdname sign
 #' @name sign
 #' @export
 #' @examples \dontrun{sign(df$c)}
@@ -1961,10 +1951,6 @@ setMethod("n_distinct", signature(x = "Column"),
             countDistinct(x, ...)
           })
 
-#' n
-#'
-#' Aggregate function: returns the number of items in a group.
-#'
 #' @rdname count
 #' @name n
 #' @export

http://git-wip-us.apache.org/repos/asf/spark/blob/38f3b76b/R/pkg/R/generics.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index c307de7..ead403b 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -430,7 +430,7 @@ setGeneric("coltypes", function(x) { 
standardGeneric("coltypes") })
 #' @export
 setGeneric("coltypes<-", function(x, value) { standardGeneric("coltypes<-") })
 
-#' @rdname schema
+#' @rdname columns
 #' @export
 setGeneric("columns", function(x) {standardGeneric("columns") })
 
@@ -495,7 +495,7 @@ setGeneric("na.omit",
              standardGeneric("na.omit")
            })
 
-#' @rdname schema
+#' @rdname dtypes
 #' @export
 setGeneric("dtypes", function(x) { standardGeneric("dtypes") })
 
@@ -551,7 +551,7 @@ setGeneric("mutate", function(.data, ...) 
{standardGeneric("mutate") })
 #' @export
 setGeneric("orderBy", function(x, col, ...) { standardGeneric("orderBy") })
 
-#' @rdname schema
+#' @rdname printSchema
 #' @export
 setGeneric("printSchema", function(x) { standardGeneric("printSchema") })
 
@@ -638,7 +638,7 @@ setGeneric("schema", function(x) { 
standardGeneric("schema") })
 #' @export
 setGeneric("select", function(x, col, ...) { standardGeneric("select") } )
 
-#' @rdname select
+#' @rdname selectExpr
 #' @export
 setGeneric("selectExpr", function(x, expr, ...) { 
standardGeneric("selectExpr") })
 
@@ -693,67 +693,67 @@ setGeneric("randomSplit", function(x, weights, seed) { 
standardGeneric("randomSp
 
 ###################### Column Methods ##########################
 
-#' @rdname column
+#' @rdname columnfunctions
 #' @export
 setGeneric("asc", function(x) { standardGeneric("asc") })
 
-#' @rdname column
+#' @rdname between
 #' @export
 setGeneric("between", function(x, bounds) { standardGeneric("between") })
 
-#' @rdname column
+#' @rdname cast
 #' @export
 setGeneric("cast", function(x, dataType) { standardGeneric("cast") })
 
-#' @rdname column
+#' @rdname columnfunctions
 #' @export
 setGeneric("contains", function(x, ...) { standardGeneric("contains") })
 
-#' @rdname column
+#' @rdname columnfunctions
 #' @export
 setGeneric("desc", function(x) { standardGeneric("desc") })
 
-#' @rdname column
+#' @rdname endsWith
 #' @export
 setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") })
 
-#' @rdname column
+#' @rdname columnfunctions
 #' @export
 setGeneric("getField", function(x, ...) { standardGeneric("getField") })
 
-#' @rdname column
+#' @rdname columnfunctions
 #' @export
 setGeneric("getItem", function(x, ...) { standardGeneric("getItem") })
 
-#' @rdname column
+#' @rdname columnfunctions
 #' @export
 setGeneric("isNaN", function(x) { standardGeneric("isNaN") })
 
-#' @rdname column
+#' @rdname columnfunctions
 #' @export
 setGeneric("isNull", function(x) { standardGeneric("isNull") })
 
-#' @rdname column
+#' @rdname columnfunctions
 #' @export
 setGeneric("isNotNull", function(x) { standardGeneric("isNotNull") })
 
-#' @rdname column
+#' @rdname columnfunctions
 #' @export
 setGeneric("like", function(x, ...) { standardGeneric("like") })
 
-#' @rdname column
+#' @rdname columnfunctions
 #' @export
 setGeneric("rlike", function(x, ...) { standardGeneric("rlike") })
 
-#' @rdname column
+#' @rdname startsWith
 #' @export
 setGeneric("startsWith", function(x, prefix) { standardGeneric("startsWith") })
 
-#' @rdname column
+#' @rdname when
 #' @export
 setGeneric("when", function(condition, value) { standardGeneric("when") })
 
-#' @rdname column
+#' @rdname otherwise
 #' @export
 setGeneric("otherwise", function(x, value) { standardGeneric("otherwise") })
 
@@ -825,7 +825,7 @@ setGeneric("cbrt", function(x) { standardGeneric("cbrt") })
 #' @export
 setGeneric("ceil", function(x) { standardGeneric("ceil") })
 
-#' @rdname col
+#' @rdname column
 #' @export
 setGeneric("column", function(x) { standardGeneric("column") })
 
@@ -1119,7 +1119,7 @@ setGeneric("shiftRight", function(y, x) { 
standardGeneric("shiftRight") })
 #' @export
 setGeneric("shiftRightUnsigned", function(y, x) { 
standardGeneric("shiftRightUnsigned") })
 
-#' @rdname signum
+#' @rdname sign
 #' @export
 setGeneric("signum", function(x) { standardGeneric("signum") })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/38f3b76b/R/pkg/R/mllib.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/mllib.R b/R/pkg/R/mllib.R
index d6ff2aa..74dba8f 100644
--- a/R/pkg/R/mllib.R
+++ b/R/pkg/R/mllib.R
@@ -235,8 +235,6 @@ setMethod("predict", signature(object = 
"GeneralizedLinearRegressionModel"),
 #' similarly to R package e1071's predict.
 #'
 #' @param object A fitted naive Bayes model
-#' @param newData SparkDataFrame for testing
-#' @return SparkDataFrame containing predicted labels in a column named 
"prediction"
 #' @rdname predict
 #' @export
 #' @examples
@@ -378,8 +376,6 @@ setMethod("summary", signature(object = "KMeansModel"),
 #' Makes predictions from a k-means model or a model produced by 
spark.kmeans().
 #'
 #' @param object A fitted k-means model
-#' @param newData SparkDataFrame for testing
-#' @return SparkDataFrame containing predicted labels in a column named 
"prediction"
 #' @rdname predict
 #' @export
 #' @examples
@@ -621,8 +617,6 @@ setMethod("summary", signature(object = 
"AFTSurvivalRegressionModel"),
 #' similarly to R package survival's predict.
 #'
 #' @param object A fitted AFT survival regression model
-#' @param newData SparkDataFrame for testing
-#' @return SparkDataFrame containing predicted labels in a column named 
"prediction"
 #' @rdname predict
 #' @export
 #' @examples

http://git-wip-us.apache.org/repos/asf/spark/blob/38f3b76b/R/pkg/R/sparkR.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 94d0e63..2b6e124 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -36,6 +36,8 @@ sparkR.stop <- function() {
   sparkR.session.stop()
 }
 
+#' Stop the Spark Session and Spark Context
+#'
 #' Stop the Spark Session and Spark Context.
 #'
 #' Also terminates the backend this R session is connected to.
@@ -88,7 +90,7 @@ sparkR.session.stop <- function() {
   clearJobjs()
 }
 
-#' (Deprecated) Initialize a new Spark Context.
+#' (Deprecated) Initialize a new Spark Context
 #'
 #' This function initializes a new SparkContext.
 #'
@@ -249,7 +251,7 @@ sparkR.sparkContext <- function(
   sc
 }
 
-#' (Deprecated) Initialize a new SQLContext.
+#' (Deprecated) Initialize a new SQLContext
 #'
 #' This function creates a SparkContext from an existing JavaSparkContext and
 #' then uses it to initialize a new SQLContext
@@ -278,7 +280,7 @@ sparkRSQL.init <- function(jsc = NULL) {
   sparkR.session(enableHiveSupport = FALSE)
 }
 
-#' (Deprecated) Initialize a new HiveContext.
+#' (Deprecated) Initialize a new HiveContext
 #'
 #' This function creates a HiveContext from an existing JavaSparkContext
 #'


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARKR][DOCS] R code doc cleanup

Reply via email to