spark git commit: [SPARK-8844] [SPARKR] head/collect is broken in SparkR.

2015-08-16 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/master 182f9b7a6 -> 5f9ce738f


[SPARK-8844] [SPARKR] head/collect is broken in SparkR.

This is a WIP patch for SPARK-8844  for collecting reviews.

This bug is about reading an empty DataFrame. in readCol(),
  lapply(1:numRows, function(x) {
does not take into consideration the case where numRows = 0.

Will add unit test case.

Author: Sun Rui 

Closes #7419 from sun-rui/SPARK-8844.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5f9ce738
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5f9ce738
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5f9ce738

Branch: refs/heads/master
Commit: 5f9ce738fe6bab3f0caffad0df1d3876178cf469
Parents: 182f9b7
Author: Sun Rui 
Authored: Sun Aug 16 00:30:02 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Sun Aug 16 00:30:02 2015 -0700

--
 R/pkg/R/deserialize.R| 16 ++--
 R/pkg/inst/tests/test_sparkSQL.R | 20 
 2 files changed, 30 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5f9ce738/R/pkg/R/deserialize.R
--
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 6d364f7..33bf13e 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -176,10 +176,14 @@ readRow <- function(inputCon) {
 
 # Take a single column as Array[Byte] and deserialize it into an atomic vector
 readCol <- function(inputCon, numRows) {
-  # sapply can not work with POSIXlt
-  do.call(c, lapply(1:numRows, function(x) {
-value <- readObject(inputCon)
-# Replace NULL with NA so we can coerce to vectors
-if (is.null(value)) NA else value
-  }))
+  if (numRows > 0) {
+# sapply can not work with POSIXlt
+do.call(c, lapply(1:numRows, function(x) {
+  value <- readObject(inputCon)
+  # Replace NULL with NA so we can coerce to vectors
+  if (is.null(value)) NA else value
+}))
+  } else {
+vector()
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/5f9ce738/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index e6d3b21..c77f633 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -408,6 +408,14 @@ test_that("collect() returns a data.frame", {
   expect_equal(names(rdf)[1], "age")
   expect_equal(nrow(rdf), 3)
   expect_equal(ncol(rdf), 2)
+
+  # collect() returns data correctly from a DataFrame with 0 row
+  df0 <- limit(df, 0)
+  rdf <- collect(df0)
+  expect_true(is.data.frame(rdf))
+  expect_equal(names(rdf)[1], "age")
+  expect_equal(nrow(rdf), 0)
+  expect_equal(ncol(rdf), 2)
 })
 
 test_that("limit() returns DataFrame with the correct number of rows", {
@@ -492,6 +500,18 @@ test_that("head() and first() return the correct data", {
 
   testFirst <- first(df)
   expect_equal(nrow(testFirst), 1)
+
+  # head() and first() return the correct data on
+  # a DataFrame with 0 row
+  df0 <- limit(df, 0)
+
+  testHead <- head(df0)
+  expect_equal(nrow(testHead), 0)
+  expect_equal(ncol(testHead), 2)
+
+  testFirst <- first(df0)
+  expect_equal(nrow(testFirst), 0)
+  expect_equal(ncol(testFirst), 2)
 })
 
 test_that("distinct() and unique on DataFrames", {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-8844] [SPARKR] head/collect is broken in SparkR.

2015-08-16 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 881baf100 -> 4f75ce2e1


[SPARK-8844] [SPARKR] head/collect is broken in SparkR.

This is a WIP patch for SPARK-8844  for collecting reviews.

This bug is about reading an empty DataFrame. in readCol(),
  lapply(1:numRows, function(x) {
does not take into consideration the case where numRows = 0.

Will add unit test case.

Author: Sun Rui 

Closes #7419 from sun-rui/SPARK-8844.

(cherry picked from commit 5f9ce738fe6bab3f0caffad0df1d3876178cf469)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f75ce2e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f75ce2e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f75ce2e

Branch: refs/heads/branch-1.5
Commit: 4f75ce2e193c813f4e3ad067749b6e7b4f0ee135
Parents: 881baf1
Author: Sun Rui 
Authored: Sun Aug 16 00:30:02 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Sun Aug 16 00:30:10 2015 -0700

--
 R/pkg/R/deserialize.R| 16 ++--
 R/pkg/inst/tests/test_sparkSQL.R | 20 
 2 files changed, 30 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4f75ce2e/R/pkg/R/deserialize.R
--
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 6d364f7..33bf13e 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -176,10 +176,14 @@ readRow <- function(inputCon) {
 
 # Take a single column as Array[Byte] and deserialize it into an atomic vector
 readCol <- function(inputCon, numRows) {
-  # sapply can not work with POSIXlt
-  do.call(c, lapply(1:numRows, function(x) {
-value <- readObject(inputCon)
-# Replace NULL with NA so we can coerce to vectors
-if (is.null(value)) NA else value
-  }))
+  if (numRows > 0) {
+# sapply can not work with POSIXlt
+do.call(c, lapply(1:numRows, function(x) {
+  value <- readObject(inputCon)
+  # Replace NULL with NA so we can coerce to vectors
+  if (is.null(value)) NA else value
+}))
+  } else {
+vector()
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/4f75ce2e/R/pkg/inst/tests/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R
index e6d3b21..c77f633 100644
--- a/R/pkg/inst/tests/test_sparkSQL.R
+++ b/R/pkg/inst/tests/test_sparkSQL.R
@@ -408,6 +408,14 @@ test_that("collect() returns a data.frame", {
   expect_equal(names(rdf)[1], "age")
   expect_equal(nrow(rdf), 3)
   expect_equal(ncol(rdf), 2)
+
+  # collect() returns data correctly from a DataFrame with 0 row
+  df0 <- limit(df, 0)
+  rdf <- collect(df0)
+  expect_true(is.data.frame(rdf))
+  expect_equal(names(rdf)[1], "age")
+  expect_equal(nrow(rdf), 0)
+  expect_equal(ncol(rdf), 2)
 })
 
 test_that("limit() returns DataFrame with the correct number of rows", {
@@ -492,6 +500,18 @@ test_that("head() and first() return the correct data", {
 
   testFirst <- first(df)
   expect_equal(nrow(testFirst), 1)
+
+  # head() and first() return the correct data on
+  # a DataFrame with 0 row
+  df0 <- limit(df, 0)
+
+  testHead <- head(df0)
+  expect_equal(nrow(testHead), 0)
+  expect_equal(ncol(testHead), 2)
+
+  testFirst <- first(df0)
+  expect_equal(nrow(testFirst), 0)
+  expect_equal(ncol(testFirst), 2)
 })
 
 test_that("distinct() and unique on DataFrames", {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org