spark git commit: [SPARK-8844] [SPARKR] head/collect is broken in SparkR.
Repository: spark Updated Branches: refs/heads/master 182f9b7a6 -> 5f9ce738f [SPARK-8844] [SPARKR] head/collect is broken in SparkR. This is a WIP patch for SPARK-8844 for collecting reviews. This bug is about reading an empty DataFrame. in readCol(), lapply(1:numRows, function(x) { does not take into consideration the case where numRows = 0. Will add unit test case. Author: Sun Rui Closes #7419 from sun-rui/SPARK-8844. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5f9ce738 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5f9ce738 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5f9ce738 Branch: refs/heads/master Commit: 5f9ce738fe6bab3f0caffad0df1d3876178cf469 Parents: 182f9b7 Author: Sun Rui Authored: Sun Aug 16 00:30:02 2015 -0700 Committer: Shivaram Venkataraman Committed: Sun Aug 16 00:30:02 2015 -0700 -- R/pkg/R/deserialize.R| 16 ++-- R/pkg/inst/tests/test_sparkSQL.R | 20 2 files changed, 30 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5f9ce738/R/pkg/R/deserialize.R -- diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index 6d364f7..33bf13e 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -176,10 +176,14 @@ readRow <- function(inputCon) { # Take a single column as Array[Byte] and deserialize it into an atomic vector readCol <- function(inputCon, numRows) { - # sapply can not work with POSIXlt - do.call(c, lapply(1:numRows, function(x) { -value <- readObject(inputCon) -# Replace NULL with NA so we can coerce to vectors -if (is.null(value)) NA else value - })) + if (numRows > 0) { +# sapply can not work with POSIXlt +do.call(c, lapply(1:numRows, function(x) { + value <- readObject(inputCon) + # Replace NULL with NA so we can coerce to vectors + if (is.null(value)) NA else value +})) + } else { +vector() + } } http://git-wip-us.apache.org/repos/asf/spark/blob/5f9ce738/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index e6d3b21..c77f633 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -408,6 +408,14 @@ test_that("collect() returns a data.frame", { expect_equal(names(rdf)[1], "age") expect_equal(nrow(rdf), 3) expect_equal(ncol(rdf), 2) + + # collect() returns data correctly from a DataFrame with 0 row + df0 <- limit(df, 0) + rdf <- collect(df0) + expect_true(is.data.frame(rdf)) + expect_equal(names(rdf)[1], "age") + expect_equal(nrow(rdf), 0) + expect_equal(ncol(rdf), 2) }) test_that("limit() returns DataFrame with the correct number of rows", { @@ -492,6 +500,18 @@ test_that("head() and first() return the correct data", { testFirst <- first(df) expect_equal(nrow(testFirst), 1) + + # head() and first() return the correct data on + # a DataFrame with 0 row + df0 <- limit(df, 0) + + testHead <- head(df0) + expect_equal(nrow(testHead), 0) + expect_equal(ncol(testHead), 2) + + testFirst <- first(df0) + expect_equal(nrow(testFirst), 0) + expect_equal(ncol(testFirst), 2) }) test_that("distinct() and unique on DataFrames", { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8844] [SPARKR] head/collect is broken in SparkR.
Repository: spark Updated Branches: refs/heads/branch-1.5 881baf100 -> 4f75ce2e1 [SPARK-8844] [SPARKR] head/collect is broken in SparkR. This is a WIP patch for SPARK-8844 for collecting reviews. This bug is about reading an empty DataFrame. in readCol(), lapply(1:numRows, function(x) { does not take into consideration the case where numRows = 0. Will add unit test case. Author: Sun Rui Closes #7419 from sun-rui/SPARK-8844. (cherry picked from commit 5f9ce738fe6bab3f0caffad0df1d3876178cf469) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f75ce2e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f75ce2e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f75ce2e Branch: refs/heads/branch-1.5 Commit: 4f75ce2e193c813f4e3ad067749b6e7b4f0ee135 Parents: 881baf1 Author: Sun Rui Authored: Sun Aug 16 00:30:02 2015 -0700 Committer: Shivaram Venkataraman Committed: Sun Aug 16 00:30:10 2015 -0700 -- R/pkg/R/deserialize.R| 16 ++-- R/pkg/inst/tests/test_sparkSQL.R | 20 2 files changed, 30 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4f75ce2e/R/pkg/R/deserialize.R -- diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index 6d364f7..33bf13e 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -176,10 +176,14 @@ readRow <- function(inputCon) { # Take a single column as Array[Byte] and deserialize it into an atomic vector readCol <- function(inputCon, numRows) { - # sapply can not work with POSIXlt - do.call(c, lapply(1:numRows, function(x) { -value <- readObject(inputCon) -# Replace NULL with NA so we can coerce to vectors -if (is.null(value)) NA else value - })) + if (numRows > 0) { +# sapply can not work with POSIXlt +do.call(c, lapply(1:numRows, function(x) { + value <- readObject(inputCon) + # Replace NULL with NA so we can coerce to vectors + if (is.null(value)) NA else value +})) + } else { +vector() + } } http://git-wip-us.apache.org/repos/asf/spark/blob/4f75ce2e/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index e6d3b21..c77f633 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -408,6 +408,14 @@ test_that("collect() returns a data.frame", { expect_equal(names(rdf)[1], "age") expect_equal(nrow(rdf), 3) expect_equal(ncol(rdf), 2) + + # collect() returns data correctly from a DataFrame with 0 row + df0 <- limit(df, 0) + rdf <- collect(df0) + expect_true(is.data.frame(rdf)) + expect_equal(names(rdf)[1], "age") + expect_equal(nrow(rdf), 0) + expect_equal(ncol(rdf), 2) }) test_that("limit() returns DataFrame with the correct number of rows", { @@ -492,6 +500,18 @@ test_that("head() and first() return the correct data", { testFirst <- first(df) expect_equal(nrow(testFirst), 1) + + # head() and first() return the correct data on + # a DataFrame with 0 row + df0 <- limit(df, 0) + + testHead <- head(df0) + expect_equal(nrow(testHead), 0) + expect_equal(ncol(testHead), 2) + + testFirst <- first(df0) + expect_equal(nrow(testFirst), 0) + expect_equal(ncol(testFirst), 2) }) test_that("distinct() and unique on DataFrames", { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org