Github user felixcheung commented on a diff in the pull request: https://github.com/apache/spark/pull/22954#discussion_r232172546 --- Diff: R/pkg/R/SQLContext.R --- @@ -147,6 +147,55 @@ getDefaultSqlSource <- function() { l[["spark.sql.sources.default"]] } +writeToTempFileInArrow <- function(rdf, numPartitions) { + # R API in Arrow is not yet released. CRAN requires to add the package in requireNamespace + # at DESCRIPTION. Later, CRAN checks if the package is available or not. Therefore, it works + # around by avoiding direct requireNamespace. + requireNamespace1 <- requireNamespace + if (requireNamespace1("arrow", quietly = TRUE)) { + record_batch <- get("record_batch", envir = asNamespace("arrow"), inherits = FALSE) + record_batch_stream_writer <- get( + "record_batch_stream_writer", envir = asNamespace("arrow"), inherits = FALSE) + file_output_stream <- get( + "file_output_stream", envir = asNamespace("arrow"), inherits = FALSE) + write_record_batch <- get( + "write_record_batch", envir = asNamespace("arrow"), inherits = FALSE) + + # Currently arrow requires withr; otherwise, write APIs don't work. + # Direct 'require' is not recommended by CRAN. Here's a workaround. + require1 <- require + if (require1("withr", quietly = TRUE)) { + numPartitions <- if (!is.null(numPartitions)) { + numToInt(numPartitions) + } else { + 1 + } + fileName <- tempfile(pattern = "spark-arrow", fileext = ".tmp") + chunk <- as.integer(ceiling(nrow(rdf) / numPartitions)) + rdf_slices <- split(rdf, rep(1:ceiling(nrow(rdf) / chunk), each = chunk)[1:nrow(rdf)]) + stream_writer <- NULL + for (rdf_slice in rdf_slices) { + batch <- record_batch(rdf_slice) + if (is.null(stream_writer)) { + # We should avoid private calls like 'close_on_exit' (CRAN disallows) but looks + # there's no exposed API for it. Here's a workaround but ideally this should + # be removed. + close_on_exit <- get("close_on_exit", envir = asNamespace("arrow"), inherits = FALSE) --- End diff -- so is this an API missing in Arrow?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org