paleolimbot commented on code in PR #12751:
URL: https://github.com/apache/arrow/pull/12751#discussion_r848349565


##########
r/R/table.R:
##########
@@ -149,6 +149,75 @@ Table$create <- function(..., schema = NULL) {
 #' @export
 names.Table <- function(x) x$ColumnNames()
 
+#' @export
+rbind.Table <- function(...) {
+  tables <- list(...)
+
+  # assert they have same schema
+  schema <- tables[[1]]$schema
+  unequal_schema_idx <- which.min(lapply(tables, function(x) x$schema == 
schema))
+  if (unequal_schema_idx != 1) {
+    abort(c(
+      sprintf("Schema at index %i does not match the first schema", 
unequal_schema_idx),
+      i = paste0("Schema 1:\n", schema$ToString()),
+      i = paste0(sprintf("Schema %d:\n", unequal_schema_idx),
+                 tables[[unequal_schema_idx]]$schema$ToString())
+    ))
+  }
+
+  # create chunked array for each column
+  columns <- map(seq_len(tables[[1]]$num_columns), function(i) {
+    do.call(c, map(tables, ~ .[[i]]))
+  })
+
+  Table$create(!!!set_names(columns, names(schema)), schema = schema)
+}
+
+#' @export
+cbind.Table <- function(..., call = caller_env()) {

Review Comment:
   I think you want to move `call <- caller_env()` to inside the 
function...it's not clear to me that anybody would ever pass it as an argument. 
If you do need it as an argument, use `.call` instead of `call` for the name 
(in the off chance anybody uses this to add a column named `call`...).



##########
r/R/record-batch.R:
##########
@@ -189,3 +189,61 @@ record_batch <- RecordBatch$create
 
 #' @export
 names.RecordBatch <- function(x) x$names()
+
+#' @export
+rbind.RecordBatch <- function(...) {
+  abort("Use `Table$create()` to combine record batches")
+}
+
+cbind_check_length <- function(target_length, length, idx) {
+  if (length != target_length) {
+    abort(
+      sprintf(
+        "Non-scalar inputs must have an equal number of rows. ..1 has %d, ..%d 
has %d",

Review Comment:
   Not at all necessary, but you make good use of the rlang `c(..., "i" = ...)` 
trick below and the second sentence here might look pretty as an `"i"` bullet.



##########
r/R/record-batch.R:
##########
@@ -189,3 +189,52 @@ record_batch <- RecordBatch$create
 
 #' @export
 names.RecordBatch <- function(x) x$names()
+
+#' @export
+rbind.RecordBatch <- function(...) {
+  abort("Use Table$create to combine record batches")
+}
+
+cbind_check_length <- function(target_length, length, idx) {
+  if (length != target_length) {
+    abort(
+      sprintf(
+        "Non-scalar inputs must have an equal number of rows. ..1 has %i, ..%i 
has %i",
+        target_length,
+        idx,
+        length
+      )
+    )
+  }
+}
+
+#' @export
+cbind.RecordBatch <- function(...) {
+  inputs <- list(...)
+  num_rows <- inputs[[1]]$num_rows
+
+  batches <- imap(inputs, function(input, idx) {
+    if (inherits(input, "RecordBatch")) {
+      cbind_check_length(num_rows, input$num_rows, idx)
+      input
+    } else if (is.atomic(input) && length(input) == 1) {
+      RecordBatch$create("{idx}" := rep(input, num_rows))
+    } else {
+      tryCatch(
+        {
+          cbind_check_length(num_rows, length(input), idx)
+          RecordBatch$create("{idx}" := input)
+        },
+        error = function(err) {
+          abort(sprintf("Input ..%i cannot be converted to an Arrow Array: 
%s", idx, err))
+        }
+      )
+    }

Review Comment:
   Nice!



##########
r/R/record-batch.R:
##########
@@ -189,3 +189,61 @@ record_batch <- RecordBatch$create
 
 #' @export
 names.RecordBatch <- function(x) x$names()
+
+#' @export
+rbind.RecordBatch <- function(...) {
+  abort("Use `Table$create()` to combine record batches")
+}
+
+cbind_check_length <- function(target_length, length, idx) {
+  if (length != target_length) {
+    abort(
+      sprintf(
+        "Non-scalar inputs must have an equal number of rows. ..1 has %d, ..%d 
has %d",
+        target_length,
+        idx,
+        length
+      )
+    )
+  }
+}
+
+#' @export
+cbind.RecordBatch <- function(..., call = caller_env()) {
+  inputs <- list(...)
+  num_rows <- inputs[[1]]$num_rows
+
+  batches <- map(seq_along(inputs), function(i) {
+    input <- inputs[[i]]
+    name <- names(inputs)[i]
+    name <- ifelse(name == "", paste0("X", as.character(i)), name)

Review Comment:
   The "name repair" operation is actually pretty complicated and I think you 
probably want to avoid it (maybe error for unnamed Array/R vector?). If you do 
want to support unnamed array-like things, you could use `inputs <- 
rlang::enquos(..., .named = TRUE)`, `base::make.names()`, or 
`vctrs::vec_as_names()` to avoid rolling your own logic.



##########
r/R/table.R:
##########
@@ -149,6 +149,75 @@ Table$create <- function(..., schema = NULL) {
 #' @export
 names.Table <- function(x) x$ColumnNames()
 
+#' @export
+rbind.Table <- function(...) {
+  tables <- list(...)
+
+  # assert they have same schema
+  schema <- tables[[1]]$schema
+  unequal_schema_idx <- which.min(lapply(tables, function(x) x$schema == 
schema))
+  if (unequal_schema_idx != 1) {
+    abort(c(
+      sprintf("Schema at index %i does not match the first schema", 
unequal_schema_idx),
+      i = paste0("Schema 1:\n", schema$ToString()),
+      i = paste0(sprintf("Schema %d:\n", unequal_schema_idx),
+                 tables[[unequal_schema_idx]]$schema$ToString())
+    ))
+  }
+
+  # create chunked array for each column
+  columns <- map(seq_len(tables[[1]]$num_columns), function(i) {
+    do.call(c, map(tables, ~ .[[i]]))
+  })
+
+  Table$create(!!!set_names(columns, names(schema)), schema = schema)
+}
+
+#' @export
+cbind.Table <- function(..., call = caller_env()) {

Review Comment:
   Actually, on re-reading where you use this, I think you can avoid it 
entirely (I *think* rlang is smart enough to jump over the tryCatch when 
signaling an error condition with a parent but definitely check). If you do 
need it, I think you want `sys.call(1)` rather than `caller_env()`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to