thisisnic commented on PR #12826:
URL: https://github.com/apache/arrow/pull/12826#issuecomment-1189078502

   OK, more still needed here than I thought.  I've now added it as an NSE 
func, but get an error when trying to print things (likely as we're looking at 
the schema, which it's not been added to) - as @nealrichardson said above "we 
assume that the schema contains all possible valid field refs".  Not sure if 
there's a better way to get this info that uses something else.  
   
   ``` r
   library(arrow)
   library(dplyr)
   
   tf <- tempfile()
   dir.create(tf)
   write_dataset(mtcars, tf, partitioning = "cyl")
   
   # this works and returns the dataset with the augmented file correctly added
   open_dataset(tf) %>%
     mutate(filename = add_filename()) %>%
     collect()
   #>     mpg  disp  hp drat    wt  qsec vs am gear carb cyl
   #> 1  18.7 360.0 175 3.15 3.440 17.02  0  0    3    2   8
   #> 2  14.3 360.0 245 3.21 3.570 15.84  0  0    3    4   8
   #> 3  16.4 275.8 180 3.07 4.070 17.40  0  0    3    3   8
   #> 4  17.3 275.8 180 3.07 3.730 17.60  0  0    3    3   8
   #> 5  15.2 275.8 180 3.07 3.780 18.00  0  0    3    3   8
   #> 6  10.4 472.0 205 2.93 5.250 17.98  0  0    3    4   8
   #> 7  10.4 460.0 215 3.00 5.424 17.82  0  0    3    4   8
   #> 8  14.7 440.0 230 3.23 5.345 17.42  0  0    3    4   8
   #> 9  15.5 318.0 150 2.76 3.520 16.87  0  0    3    2   8
   #> 10 15.2 304.0 150 3.15 3.435 17.30  0  0    3    2   8
   #> 11 13.3 350.0 245 3.73 3.840 15.41  0  0    3    4   8
   #> 12 19.2 400.0 175 3.08 3.845 17.05  0  0    3    2   8
   #> 13 15.8 351.0 264 4.22 3.170 14.50  0  1    5    4   8
   #> 14 15.0 301.0 335 3.54 3.570 14.60  0  1    5    8   8
   #> 15 22.8 108.0  93 3.85 2.320 18.61  1  1    4    1   4
   #> 16 24.4 146.7  62 3.69 3.190 20.00  1  0    4    2   4
   #> 17 22.8 140.8  95 3.92 3.150 22.90  1  0    4    2   4
   #> 18 32.4  78.7  66 4.08 2.200 19.47  1  1    4    1   4
   #> 19 30.4  75.7  52 4.93 1.615 18.52  1  1    4    2   4
   #> 20 33.9  71.1  65 4.22 1.835 19.90  1  1    4    1   4
   #> 21 21.5 120.1  97 3.70 2.465 20.01  1  0    3    1   4
   #> 22 27.3  79.0  66 4.08 1.935 18.90  1  1    4    1   4
   #> 23 26.0 120.3  91 4.43 2.140 16.70  0  1    5    2   4
   #> 24 30.4  95.1 113 3.77 1.513 16.90  1  1    5    2   4
   #> 25 21.4 121.0 109 4.11 2.780 18.60  1  1    4    2   4
   #> 26 21.0 160.0 110 3.90 2.620 16.46  0  1    4    4   6
   #> 27 21.0 160.0 110 3.90 2.875 17.02  0  1    4    4   6
   #> 28 21.4 258.0 110 3.08 3.215 19.44  1  0    3    1   6
   #> 29 18.1 225.0 105 2.76 3.460 20.22  1  0    3    1   6
   #> 30 19.2 167.6 123 3.92 3.440 18.30  1  0    4    4   6
   #> 31 17.8 167.6 123 3.92 3.440 18.90  1  0    4    4   6
   #> 32 19.7 145.0 175 3.62 2.770 15.50  0  1    5    6   6
   #>                                                  filename
   #> 1  /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 2  /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 3  /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 4  /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 5  /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 6  /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 7  /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 8  /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 9  /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 10 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 11 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 12 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 13 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 14 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=8/part-0.parquet
   #> 15 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=4/part-0.parquet
   #> 16 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=4/part-0.parquet
   #> 17 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=4/part-0.parquet
   #> 18 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=4/part-0.parquet
   #> 19 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=4/part-0.parquet
   #> 20 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=4/part-0.parquet
   #> 21 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=4/part-0.parquet
   #> 22 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=4/part-0.parquet
   #> 23 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=4/part-0.parquet
   #> 24 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=4/part-0.parquet
   #> 25 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=4/part-0.parquet
   #> 26 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=6/part-0.parquet
   #> 27 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=6/part-0.parquet
   #> 28 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=6/part-0.parquet
   #> 29 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=6/part-0.parquet
   #> 30 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=6/part-0.parquet
   #> 31 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=6/part-0.parquet
   #> 32 /tmp/RtmpzzDK4v/file32c352040ed57/cyl=6/part-0.parquet
   
   # this doesn't - as it tries to print it
   open_dataset(tf) %>%
     mutate(filename = add_filename())
   #> Error in schm$GetFieldByName(name)$type$ToString(): attempt to apply 
non-function
   
   # if we try it on a table, we get an error message - 
   # I can look to catch this and raise an error with more context
   arrow_table(mtcars) %>% mutate(filename = add_filename()) %>% collect()
   #> Error in `collect()`:
   #> ! Invalid: No match for FieldRef.Name(__filename) in mpg: double
   #> cyl: double
   #> disp: double
   #> hp: double
   #> drat: double
   #> wt: double
   #> qsec: double
   #> vs: double
   #> am: double
   #> gear: double
   #> carb: double
   #> /home/nic2/arrow/cpp/src/arrow/type.h:1800  CheckNonEmpty(matches, root)
   #> /home/nic2/arrow/cpp/src/arrow/compute/exec/expression.cc:429  
ref->FindOne(in)
   #> /home/nic2/arrow/cpp/src/arrow/compute/exec/project_node.cc:67  
expr.Bind(*inputs[0]->output_schema(), plan->exec_context())
   
   # same error as with the dataset - "attempt to apply non-function"
   arrow_table(mtcars) %>% mutate(filename = add_filename())
   #> Error in schm$GetFieldByName(name)$type$ToString(): attempt to apply 
non-function
   ```
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to