[ 
https://issues.apache.org/jira/browse/ARROW-16421?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17531402#comment-17531402
 ] 

Will Jones commented on ARROW-16421:
------------------------------------

It seems like this issue isn't resolved by {{rm()}} and {{gc()}} if there is a 
large number of partitions:
{code:r}
library(arrow)
#> Warning: package 'arrow' was built under R version 4.1.3
#> 
#> Attaching package: 'arrow'
#> The following object is masked from 'package:utils':
#> 
#>     timestamp
library(readr)

# warning: 5 MB
test_data <- read_csv("https://cn.dataone.org/cn/v2/resolve/knb.92098.1";)
#> Rows: 104497 Columns: 9
#> -- Column specification 
--------------------------------------------------------
#> Delimiter: ","
#> chr (7): Y_CNAREA, CITY, A_RESID, local, Y_Rural, P_FSHY, P_TYPE
#> dbl (2): YEAR, Permit_Count
#> 
#> i Use `spec()` to retrieve the full column specification for this data.
#> i Specify the column types or set `show_col_types = FALSE` to quiet this 
message.

write_dataset(test_data, "test_data", partitioning = "CITY")

# Original example was with DuckDB, but that's not necessarily the issue
# con <- open_dataset("test_dataset") |> to_duckdb()
con <- open_dataset("test_data")$NewScan()$Finish()$ToRecordBatchReader()

files <- dir("test_data", full.names = TRUE, recursive = TRUE)

rm(con)
gc()
#>           used (Mb) gc trigger (Mb) max used (Mb)
#> Ncells 1052314 56.2    2227298  119  1220182 65.2
#> Vcells 2795421 21.4    8388608   64  3736313 28.6

lapply(files, file.remove)
#> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=CORNER BAY/
#> part-0.parquet', reason 'Permission denied'
#> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=COUNCIL/
#> part-0.parquet', reason 'Permission denied'
#> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=CRAIG/
#> part-0.parquet', reason 'Permission denied'
#> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=CROOKED 
CREEK/
#> part-0.parquet', reason 'Permission denied'
#> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=DEEP BAY/
#> part-0.parquet', reason 'Permission denied'
#> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=DEERING/
#> part-0.parquet', reason 'Permission denied'
#> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=DELTA 
JUNCTION/
#> part-0.parquet', reason 'Permission denied'
#> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=DENALI PARK/
#> part-0.parquet', reason 'Permission denied'
#> [[1]]
#> [1] TRUE
#> 
#> [[2]]
#> [1] TRUE
#> 
#> [[3]]
#> [1] TRUE
#> 
#> ...
#> 
#> [[343]]
#> [1] TRUE
#> 
#> [[344]]
#> [1] TRUE
{code}

> [R] Permission error on Windows when deleting file in dataset
> -------------------------------------------------------------
>
>                 Key: ARROW-16421
>                 URL: https://issues.apache.org/jira/browse/ARROW-16421
>             Project: Apache Arrow
>          Issue Type: Improvement
>          Components: R
>    Affects Versions: 7.0.0
>            Reporter: Will Jones
>            Assignee: Will Jones
>            Priority: Major
>
> On Windows this fails:
> {code:R}
> library(arrow)
> write_dataset(iris, "test_dataset")
> # Original example was with DuckDB, but that's not necessarily the issue
> # con <- open_dataset("test_dataset") |> to_duckdb()
> con <- open_dataset("test_dataset")$NewScan()$Finish()$ToRecordBatchReader()
> file.remove("test_dataset/part-0.parquet")
> #> Warning in file.remove("test_dataset/part-0.parquet"): cannot remove file
> #> 'test_dataset/part-0.parquet', reason 'Permission denied'
> #> [1] FALSE
> {code}
> But on MacOS it does not:
> {code:r}
> library(arrow)
> write_dataset(iris, "test_dataset")
> # Original example was with DuckDB, but that's not necessarily the issue
> # con <- open_dataset("test_dataset") |> to_duckdb()
> con <- open_dataset("test_dataset")$NewScan()$Finish()$ToRecordBatchReader()
> file.remove("test_dataset/part-0.parquet")
> #> [1] TRUE
> {code}



--
This message was sent by Atlassian Jira
(v8.20.7#820007)

Reply via email to