Dewey Dunnington created ARROW-16503:
----------------------------------------

             Summary: [C++] Can't concatenate extension arrays
                 Key: ARROW-16503
                 URL: https://issues.apache.org/jira/browse/ARROW-16503
             Project: Apache Arrow
          Issue Type: Improvement
          Components: C++
            Reporter: Dewey Dunnington


It looks like Arrays with an extension type can't be concatenated. From the R 
bindings:

{code:R}
library(arrow, warn.conflicts = FALSE)

arr <- vctrs_extension_array(1:10)
concat_arrays(arr, arr)
#> Error: NotImplemented: concatenation of integer(0)
#> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/array/concatenate.cc:195
  VisitTypeInline(*out_->type, this)
#> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/array/concatenate.cc:590
  ConcatenateImpl(data, pool).Concatenate(&out_data)
{code}

This shows up more practically when using the query engine:

{code:R}
library(arrow, warn.conflicts = FALSE)

table <- arrow_table(
  group = rep(c("a", "b"), 5),
  col1 = 1:10,
  col2 = vctrs_extension_array(1:10)
)

tf <- tempfile()
table |> dplyr::group_by(group) |> write_dataset(tf)
open_dataset(tf) |>
  dplyr::arrange(col1) |> 
  dplyr::collect()
#> Error in `dplyr::collect()`:
#> ! NotImplemented: concatenation of extension<arrow.r.vctrs>
#> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/array/concatenate.cc:195
  VisitTypeInline(*out_->type, this)
#> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/array/concatenate.cc:590
  ConcatenateImpl(data, pool).Concatenate(&out_data)
#> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/compute/kernels/vector_selection.cc:2025
  Concatenate(values.chunks(), ctx->memory_pool())
#> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/compute/kernels/vector_selection.cc:2084
  TakeCA(*table.column(j), indices, options, ctx)
#> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/compute/exec/sink_node.cc:527
  impl_->DoFinish()
#> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/compute/exec/exec_plan.cc:467
  iterator_.Next()
#> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/record_batch.cc:337 
 ReadNext(&batch)
#> 
/Users/deweydunnington/Desktop/rscratch/arrow/cpp/src/arrow/record_batch.cc:351 
 ToRecordBatches()
{code}




--
This message was sent by Atlassian Jira
(v8.20.7#820007)

Reply via email to