This is an automated email from the ASF dual-hosted git repository. npr pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 1d7907e ARROW-9544: [R] Fix version argument of write_parquet() 1d7907e is described below commit 1d7907ee349b96c9cf453b0c69df24a14bba467b Author: Matthias <matthias.gomo...@posteo.de> AuthorDate: Tue Jul 28 08:09:13 2020 -0700 ARROW-9544: [R] Fix version argument of write_parquet() Setting the version argument in `write_parquet()` did not work due to an incorrect function name. This PR fixes the bug, adds tests and amends the documentation. Closes #7831 from Plebejer/master Authored-by: Matthias <matthias.gomo...@posteo.de> Signed-off-by: Neal Richardson <neal.p.richard...@gmail.com> --- r/NEWS.md | 4 ++++ r/R/parquet.R | 5 +++-- r/man/write_parquet.Rd | 3 ++- r/tests/testthat/test-parquet.R | 14 ++++++++++++++ 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/r/NEWS.md b/r/NEWS.md index be78cac..3af512e 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -19,6 +19,10 @@ # arrow 1.0.0.9000 +## Bug fixes + +* The `version` option to `write_parquet()` is now correctly implemented. + # arrow 1.0.0 ## Arrow format conversion diff --git a/r/R/parquet.R b/r/R/parquet.R index e504a5c..b44cf18 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -62,7 +62,8 @@ read_parquet <- function(file, #' @param sink an [arrow::io::OutputStream][OutputStream] or a string which is interpreted as a file path #' @param chunk_size chunk size in number of rows. If NULL, the total number of rows is used. #' -#' @param version parquet version, "1.0" or "2.0". Default "1.0" +#' @param version parquet version, "1.0" or "2.0". Default "1.0". Numeric values +#' are coerced to character. #' @param compression compression algorithm. Default "snappy". See details. #' @param compression_level compression level. Meaning depends on compression algorithm #' @param use_dictionary Specify if we should use dictionary encoding. Default `TRUE` @@ -257,7 +258,7 @@ ParquetWriterProperties <- R6Class("ParquetWriterProperties", inherit = ArrowObj ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder", inherit = ArrowObject, public = list( set_version = function(version) { - parquet___ArrowWriterProperties___Builder__version(self, make_valid_version(version)) + parquet___WriterProperties___Builder__version(self, make_valid_version(version)) }, set_compression = function(table, compression) { compression <- compression_from_name(compression) diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd index 3ff60df..e005dfb 100644 --- a/r/man/write_parquet.Rd +++ b/r/man/write_parquet.Rd @@ -33,7 +33,8 @@ write_parquet( \item{chunk_size}{chunk size in number of rows. If NULL, the total number of rows is used.} -\item{version}{parquet version, "1.0" or "2.0". Default "1.0"} +\item{version}{parquet version, "1.0" or "2.0". Default "1.0". Numeric values +are coerced to character.} \item{compression}{compression algorithm. Default "snappy". See details.} diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R index 71484db..7ddf4cc 100644 --- a/r/tests/testthat/test-parquet.R +++ b/r/tests/testthat/test-parquet.R @@ -177,3 +177,17 @@ test_that("write_parquet() returns its input", { df_out <- write_parquet(df, tf) expect_equivalent(df, df_out) }) + +test_that("write_parquet() handles version argument", { + df <- tibble::tibble(x = 1:5) + tf <- tempfile() + on.exit(unlink(tf)) + + purrr::walk(list("1.0", "2.0", 1.0, 2.0, 1L, 2L), ~ { + write_parquet(df, tf, version = .x) + expect_identical(read_parquet(tf), df) + }) + purrr::walk(list("3.0", 3.0, 3L, "A"), ~ { + expect_error(write_parquet(df, tf, version = .x)) + }) +})