Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/22954#discussion_r232895848 --- Diff: R/pkg/R/SQLContext.R --- @@ -172,36 +257,72 @@ getDefaultSqlSource <- function() { createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0, numPartitions = NULL) { sparkSession <- getSparkSession() - + arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true" + shouldUseArrow <- FALSE + firstRow <- NULL if (is.data.frame(data)) { - # Convert data into a list of rows. Each row is a list. - - # get the names of columns, they will be put into RDD - if (is.null(schema)) { - schema <- names(data) - } + # get the names of columns, they will be put into RDD + if (is.null(schema)) { + schema <- names(data) + } - # get rid of factor type - cleanCols <- function(x) { - if (is.factor(x)) { - as.character(x) - } else { - x - } + # get rid of factor type + cleanCols <- function(x) { + if (is.factor(x)) { + as.character(x) + } else { + x } + } + data[] <- lapply(data, cleanCols) + + args <- list(FUN = list, SIMPLIFY = FALSE, USE.NAMES = FALSE) + if (arrowEnabled) { + shouldUseArrow <- tryCatch({ --- End diff -- Yup, correct. Let me address other comments as well.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org