Repository: spark Updated Branches: refs/heads/master 9909f6d36 -> 1f6c090c1
[SPARK-19818][SPARKR] rbind should check for name consistency of input data frames ## What changes were proposed in this pull request? Added checks for name consistency of input data frames in union. ## How was this patch tested? new test. Author: actuaryzhang <actuaryzhan...@gmail.com> Closes #17159 from actuaryzhang/sparkRUnion. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1f6c090c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1f6c090c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1f6c090c Branch: refs/heads/master Commit: 1f6c090c15f355a0c2aad736f8291fcdee5c556d Parents: 9909f6d Author: actuaryzhang <actuaryzhan...@gmail.com> Authored: Mon Mar 6 21:55:11 2017 -0800 Committer: Felix Cheung <felixche...@apache.org> Committed: Mon Mar 6 21:55:11 2017 -0800 ---------------------------------------------------------------------- R/pkg/R/DataFrame.R | 8 +++++++- R/pkg/inst/tests/testthat/test_sparkSQL.R | 7 +++++++ 2 files changed, 14 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/1f6c090c/R/pkg/R/DataFrame.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index e33d0d8..97e0c9e 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2642,6 +2642,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) { #' #' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame #' and another SparkDataFrame. This is equivalent to \code{UNION ALL} in SQL. +#' Input SparkDataFrames can have different schemas (names and data types). #' #' Note: This does not remove duplicate rows across the two SparkDataFrames. #' @@ -2685,7 +2686,8 @@ setMethod("unionAll", #' Union two or more SparkDataFrames #' -#' Union two or more SparkDataFrames. This is equivalent to \code{UNION ALL} in SQL. +#' Union two or more SparkDataFrames by row. As in R's \code{rbind}, this method +#' requires that the input SparkDataFrames have the same column names. #' #' Note: This does not remove duplicate rows across the two SparkDataFrames. #' @@ -2709,6 +2711,10 @@ setMethod("unionAll", setMethod("rbind", signature(... = "SparkDataFrame"), function(x, ..., deparse.level = 1) { + nm <- lapply(list(x, ...), names) + if (length(unique(nm)) != 1) { + stop("Names of input data frames are different.") + } if (nargs() == 3) { union(x, ...) } else { http://git-wip-us.apache.org/repos/asf/spark/blob/1f6c090c/R/pkg/inst/tests/testthat/test_sparkSQL.R ---------------------------------------------------------------------- diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 7c09659..620b633 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -1850,6 +1850,13 @@ test_that("union(), rbind(), except(), and intersect() on a DataFrame", { expect_equal(count(unioned2), 12) expect_equal(first(unioned2)$name, "Michael") + df3 <- df2 + names(df3)[1] <- "newName" + expect_error(rbind(df, df3), + "Names of input data frames are different.") + expect_error(rbind(df, df2, df3), + "Names of input data frames are different.") + excepted <- arrange(except(df, df2), desc(df$age)) expect_is(unioned, "SparkDataFrame") expect_equal(count(excepted), 2) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org