Github user felixcheung commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22954#discussion_r232172546
  
    --- Diff: R/pkg/R/SQLContext.R ---
    @@ -147,6 +147,55 @@ getDefaultSqlSource <- function() {
       l[["spark.sql.sources.default"]]
     }
     
    +writeToTempFileInArrow <- function(rdf, numPartitions) {
    +  # R API in Arrow is not yet released. CRAN requires to add the package 
in requireNamespace
    +  # at DESCRIPTION. Later, CRAN checks if the package is available or not. 
Therefore, it works
    +  # around by avoiding direct requireNamespace.
    +  requireNamespace1 <- requireNamespace
    +  if (requireNamespace1("arrow", quietly = TRUE)) {
    +    record_batch <- get("record_batch", envir = asNamespace("arrow"), 
inherits = FALSE)
    +    record_batch_stream_writer <- get(
    +      "record_batch_stream_writer", envir = asNamespace("arrow"), inherits 
= FALSE)
    +    file_output_stream <- get(
    +      "file_output_stream", envir = asNamespace("arrow"), inherits = FALSE)
    +    write_record_batch <- get(
    +      "write_record_batch", envir = asNamespace("arrow"), inherits = FALSE)
    +
    +    # Currently arrow requires withr; otherwise, write APIs don't work.
    +    # Direct 'require' is not recommended by CRAN. Here's a workaround.
    +    require1 <- require
    +    if (require1("withr", quietly = TRUE)) {
    +      numPartitions <- if (!is.null(numPartitions)) {
    +        numToInt(numPartitions)
    +      } else {
    +        1
    +      }
    +      fileName <- tempfile(pattern = "spark-arrow", fileext = ".tmp")
    +      chunk <- as.integer(ceiling(nrow(rdf) / numPartitions))
    +      rdf_slices <- split(rdf, rep(1:ceiling(nrow(rdf) / chunk), each = 
chunk)[1:nrow(rdf)])
    +      stream_writer <- NULL
    +      for (rdf_slice in rdf_slices) {
    +        batch <- record_batch(rdf_slice)
    +        if (is.null(stream_writer)) {
    +          # We should avoid private calls like 'close_on_exit' (CRAN 
disallows) but looks
    +          # there's no exposed API for it. Here's a workaround but ideally 
this should
    +          # be removed.
    +          close_on_exit <- get("close_on_exit", envir = 
asNamespace("arrow"), inherits = FALSE)
    --- End diff --
    
    so is this an API missing in Arrow?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to