Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/22954#discussion_r232473669 --- Diff: R/pkg/R/SQLContext.R --- @@ -147,6 +147,55 @@ getDefaultSqlSource <- function() { l[["spark.sql.sources.default"]] } +writeToTempFileInArrow <- function(rdf, numPartitions) { + # R API in Arrow is not yet released. CRAN requires to add the package in requireNamespace + # at DESCRIPTION. Later, CRAN checks if the package is available or not. Therefore, it works + # around by avoiding direct requireNamespace. + requireNamespace1 <- requireNamespace + if (requireNamespace1("arrow", quietly = TRUE)) { + record_batch <- get("record_batch", envir = asNamespace("arrow"), inherits = FALSE) + record_batch_stream_writer <- get( + "record_batch_stream_writer", envir = asNamespace("arrow"), inherits = FALSE) + file_output_stream <- get( + "file_output_stream", envir = asNamespace("arrow"), inherits = FALSE) + write_record_batch <- get( + "write_record_batch", envir = asNamespace("arrow"), inherits = FALSE) + + # Currently arrow requires withr; otherwise, write APIs don't work. + # Direct 'require' is not recommended by CRAN. Here's a workaround. + require1 <- require + if (require1("withr", quietly = TRUE)) { + numPartitions <- if (!is.null(numPartitions)) { + numToInt(numPartitions) + } else { + 1 --- End diff -- We should; however, it follows the original code path's behaviour. I matched it as the same so that we can compare the performances in the same conditions. If you don't mind, I will fix both in a separate PR.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org