[ 
https://issues.apache.org/jira/browse/ARROW-16649?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Nicola Crane updated ARROW-16649:
---------------------------------
    Description: 
The streaming execution engine supports sorting (I believe, as a sink node 
option?), but the Substrait consumer does not currently consume sort relations. 
 Please can we have support for this?

Here's the example code/plan I tested with (in R, using the in-development 
[substrait|https://github.com/voltrondata/substrait-r] package):

 
{code:java}
library(dplyr)
library(substrait)

# create a basic table and order it
out <- tibble::tibble(a = 1, b = 2) %>%
  arrow_substrait_compiler() %>%
  arrange(a)

# take a look at the plan created
out$plan()
#> message of type 'substrait.Plan' with 2 fields set
#> extension_uris {
#>   extension_uri_anchor: 1
#> }
#> relations {
#>   root {
#>     input {
#>       sort {
#>         input {
#>           read {
#>             base_schema {
#>               names: "a"
#>               names: "b"
#>               struct_ {
#>                 types {
#>                   fp64 {
#>                   }
#>                 }
#>                 types {
#>                   fp64 {
#>                   }
#>                 }
#>               }
#>             }
#>             named_table {
#>               names: "named_table_1"
#>             }
#>           }
#>         }
#>         sorts {
#>           expr {
#>             selection {
#>               direct_reference {
#>                 struct_field {
#>                 }
#>               }
#>             }
#>           }
#>           direction: SORT_DIRECTION_ASC_NULLS_LAST
#>         }
#>       }
#>     }
#>     names: "a"
#>     names: "b"
#>   }
#> }

# try to run the plan
collect(out)
#> Error: NotImplemented: conversion to arrow::compute::Declaration from 
Substrait relation sort {
...
#> /home/nic2/arrow/cpp/src/arrow/engine/substrait/serde.cc:73  
FromProto(plan_rel.rel(), ext_set)
{code}

  was:
The streaming execution engine supports sorting (I believe, as a sink node 
option?), but the Substrait consumer does not currently consume sort relations. 
 Please can we have support for this?

Here's the example code/plan I tested with:

 
{code:java}
library(dplyr)
library(substrait)

# create a basic table and order it
out <- tibble::tibble(a = 1, b = 2) %>%
  arrow_substrait_compiler() %>%
  arrange(a)

# take a look at the plan created
out$plan()
#> message of type 'substrait.Plan' with 2 fields set
#> extension_uris {
#>   extension_uri_anchor: 1
#> }
#> relations {
#>   root {
#>     input {
#>       sort {
#>         input {
#>           read {
#>             base_schema {
#>               names: "a"
#>               names: "b"
#>               struct_ {
#>                 types {
#>                   fp64 {
#>                   }
#>                 }
#>                 types {
#>                   fp64 {
#>                   }
#>                 }
#>               }
#>             }
#>             named_table {
#>               names: "named_table_1"
#>             }
#>           }
#>         }
#>         sorts {
#>           expr {
#>             selection {
#>               direct_reference {
#>                 struct_field {
#>                 }
#>               }
#>             }
#>           }
#>           direction: SORT_DIRECTION_ASC_NULLS_LAST
#>         }
#>       }
#>     }
#>     names: "a"
#>     names: "b"
#>   }
#> }

# try to run the plan
collect(out)
#> Error: NotImplemented: conversion to arrow::compute::Declaration from 
Substrait relation sort {
...
#> /home/nic2/arrow/cpp/src/arrow/engine/substrait/serde.cc:73  
FromProto(plan_rel.rel(), ext_set)
{code}


> [C++] Add support for sorting to the Substrait consumer
> -------------------------------------------------------
>
>                 Key: ARROW-16649
>                 URL: https://issues.apache.org/jira/browse/ARROW-16649
>             Project: Apache Arrow
>          Issue Type: Improvement
>          Components: C++
>            Reporter: Nicola Crane
>            Priority: Major
>              Labels: substrait
>
> The streaming execution engine supports sorting (I believe, as a sink node 
> option?), but the Substrait consumer does not currently consume sort 
> relations.  Please can we have support for this?
> Here's the example code/plan I tested with (in R, using the in-development 
> [substrait|https://github.com/voltrondata/substrait-r] package):
>  
> {code:java}
> library(dplyr)
> library(substrait)
> # create a basic table and order it
> out <- tibble::tibble(a = 1, b = 2) %>%
>   arrow_substrait_compiler() %>%
>   arrange(a)
> # take a look at the plan created
> out$plan()
> #> message of type 'substrait.Plan' with 2 fields set
> #> extension_uris {
> #>   extension_uri_anchor: 1
> #> }
> #> relations {
> #>   root {
> #>     input {
> #>       sort {
> #>         input {
> #>           read {
> #>             base_schema {
> #>               names: "a"
> #>               names: "b"
> #>               struct_ {
> #>                 types {
> #>                   fp64 {
> #>                   }
> #>                 }
> #>                 types {
> #>                   fp64 {
> #>                   }
> #>                 }
> #>               }
> #>             }
> #>             named_table {
> #>               names: "named_table_1"
> #>             }
> #>           }
> #>         }
> #>         sorts {
> #>           expr {
> #>             selection {
> #>               direct_reference {
> #>                 struct_field {
> #>                 }
> #>               }
> #>             }
> #>           }
> #>           direction: SORT_DIRECTION_ASC_NULLS_LAST
> #>         }
> #>       }
> #>     }
> #>     names: "a"
> #>     names: "b"
> #>   }
> #> }
> # try to run the plan
> collect(out)
> #> Error: NotImplemented: conversion to arrow::compute::Declaration from 
> Substrait relation sort {
> ...
> #> /home/nic2/arrow/cpp/src/arrow/engine/substrait/serde.cc:73  
> FromProto(plan_rel.rel(), ext_set)
> {code}



--
This message was sent by Atlassian Jira
(v8.20.7#820007)

Reply via email to