nealrichardson commented on a change in pull request #10269:
URL: https://github.com/apache/arrow/pull/10269#discussion_r635572831



##########
File path: r/R/table.R
##########
@@ -175,12 +175,17 @@ Table$create <- function(..., schema = NULL) {
     return(dplyr::group_by(out, !!!dplyr::groups(dots[[1]])))

Review comment:
       This if block needs to be handled--as it currently stands, if you have a 
grouped_df you won't get scalar recycling. (In general I'd like to see this 
code refactored so that there's only one `Table__from_dots` call.)

##########
File path: r/R/util.R
##########
@@ -110,3 +110,36 @@ handle_embedded_nul_error <- function(e) {
   }
   stop(e)
 }
+
+#' Take an object of length 1 and repeat it.
+#' 
+#' @param object Object of length 1 to be repeated - vector, `Scalar`, 
`Array`, or `ChunkedArray`
+#' @param n Number of repetitions
+#' 
+#' @return `Array` of length `n`
+#' 
+#' @keywords internal
+repeat_value_as_array <- function(object, n) {
+  if (inherits(object, "ChunkedArray")) {
+    return(MakeArrayFromScalar(Scalar$create(object$chunks[[1]]), n))
+  }
+  return(MakeArrayFromScalar(Scalar$create(object), n))
+}
+
+#' Recycle scalar values in a list of arrays
+#' 
+#' @param arrays List of arrays
+#' @return List of arrays with any vector/Scalar/Array/ChunkedArray values of 
length 1 recycled 
+#' @keywords internal
+recycle_scalars <- function(arrays){
+  # Get lengths of items in arrays
+  is_df <- map_lgl(arrays, ~inherits(.x, "data.frame"))
+  arr_lens <- lengths(arrays)
+  arr_lens[is_df] <- map_int(arrays[is_df], nrow)

Review comment:
       ```suggestion
     arr_lens <- map_int(arrays, NROW)
   ```

##########
File path: r/R/table.R
##########
@@ -175,12 +175,17 @@ Table$create <- function(..., schema = NULL) {
     return(dplyr::group_by(out, !!!dplyr::groups(dots[[1]])))
   }
   
+  # If any arrays are length 1, recycle them  
+  dots <- recycle_scalars(dots)

Review comment:
       This probably belongs inside the "else" block below

##########
File path: r/R/util.R
##########
@@ -110,3 +110,36 @@ handle_embedded_nul_error <- function(e) {
   }
   stop(e)
 }
+
+#' Take an object of length 1 and repeat it.
+#' 
+#' @param object Object of length 1 to be repeated - vector, `Scalar`, 
`Array`, or `ChunkedArray`
+#' @param n Number of repetitions
+#' 
+#' @return `Array` of length `n`
+#' 
+#' @keywords internal
+repeat_value_as_array <- function(object, n) {
+  if (inherits(object, "ChunkedArray")) {
+    return(MakeArrayFromScalar(Scalar$create(object$chunks[[1]]), n))
+  }
+  return(MakeArrayFromScalar(Scalar$create(object), n))
+}
+
+#' Recycle scalar values in a list of arrays
+#' 
+#' @param arrays List of arrays
+#' @return List of arrays with any vector/Scalar/Array/ChunkedArray values of 
length 1 recycled 
+#' @keywords internal
+recycle_scalars <- function(arrays){
+  # Get lengths of items in arrays
+  is_df <- map_lgl(arrays, ~inherits(.x, "data.frame"))
+  arr_lens <- lengths(arrays)
+  arr_lens[is_df] <- map_int(arrays[is_df], nrow)
+  
+  if (length(arrays) > 1 && any(arr_lens == 1) && !all(arr_lens==1)) {
+    max_array_len <- max(arr_lens)
+    arrays[arr_lens == 1 & !is_df] <- lapply(arrays[arr_lens == 1 & !is_df], 
repeat_value_as_array, max_array_len)

Review comment:
       Why not data frames? they turn into struct arrays:
   
   ```
   > Scalar$create(tibble::tibble(a=1))$as_array()
   StructArray
   <struct<a: int32>>
   -- is_valid: all not null
   -- child 0 type: int32
     [
       1
     ]
   ``

##########
File path: r/R/util.R
##########
@@ -110,3 +110,36 @@ handle_embedded_nul_error <- function(e) {
   }
   stop(e)
 }
+
+#' Take an object of length 1 and repeat it.
+#' 
+#' @param object Object of length 1 to be repeated - vector, `Scalar`, 
`Array`, or `ChunkedArray`
+#' @param n Number of repetitions
+#' 
+#' @return `Array` of length `n`
+#' 
+#' @keywords internal
+repeat_value_as_array <- function(object, n) {
+  if (inherits(object, "ChunkedArray")) {
+    return(MakeArrayFromScalar(Scalar$create(object$chunks[[1]]), n))
+  }
+  return(MakeArrayFromScalar(Scalar$create(object), n))

Review comment:
       Since this is wired up as the as_array method on Scalar, we should use it
   
   ```suggestion
       return(Scalar$create(object$chunks[[1]])$as_array(n))
     }
     return(Scalar$create(object)$as_array(n))
   ```
   
   Also, how might the ChunkedArray case go wrong here?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to