[ https://issues.apache.org/jira/browse/ARROW-16421?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17531402#comment-17531402 ]
Will Jones commented on ARROW-16421: ------------------------------------ It seems like this issue isn't resolved by {{rm()}} and {{gc()}} if there is a large number of partitions: {code:r} library(arrow) #> Warning: package 'arrow' was built under R version 4.1.3 #> #> Attaching package: 'arrow' #> The following object is masked from 'package:utils': #> #> timestamp library(readr) # warning: 5 MB test_data <- read_csv("https://cn.dataone.org/cn/v2/resolve/knb.92098.1") #> Rows: 104497 Columns: 9 #> -- Column specification -------------------------------------------------------- #> Delimiter: "," #> chr (7): Y_CNAREA, CITY, A_RESID, local, Y_Rural, P_FSHY, P_TYPE #> dbl (2): YEAR, Permit_Count #> #> i Use `spec()` to retrieve the full column specification for this data. #> i Specify the column types or set `show_col_types = FALSE` to quiet this message. write_dataset(test_data, "test_data", partitioning = "CITY") # Original example was with DuckDB, but that's not necessarily the issue # con <- open_dataset("test_dataset") |> to_duckdb() con <- open_dataset("test_data")$NewScan()$Finish()$ToRecordBatchReader() files <- dir("test_data", full.names = TRUE, recursive = TRUE) rm(con) gc() #> used (Mb) gc trigger (Mb) max used (Mb) #> Ncells 1052314 56.2 2227298 119 1220182 65.2 #> Vcells 2795421 21.4 8388608 64 3736313 28.6 lapply(files, file.remove) #> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=CORNER BAY/ #> part-0.parquet', reason 'Permission denied' #> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=COUNCIL/ #> part-0.parquet', reason 'Permission denied' #> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=CRAIG/ #> part-0.parquet', reason 'Permission denied' #> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=CROOKED CREEK/ #> part-0.parquet', reason 'Permission denied' #> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=DEEP BAY/ #> part-0.parquet', reason 'Permission denied' #> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=DEERING/ #> part-0.parquet', reason 'Permission denied' #> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=DELTA JUNCTION/ #> part-0.parquet', reason 'Permission denied' #> Warning in FUN(X[[i]], ...): cannot remove file 'test_data/CITY=DENALI PARK/ #> part-0.parquet', reason 'Permission denied' #> [[1]] #> [1] TRUE #> #> [[2]] #> [1] TRUE #> #> [[3]] #> [1] TRUE #> #> ... #> #> [[343]] #> [1] TRUE #> #> [[344]] #> [1] TRUE {code} > [R] Permission error on Windows when deleting file in dataset > ------------------------------------------------------------- > > Key: ARROW-16421 > URL: https://issues.apache.org/jira/browse/ARROW-16421 > Project: Apache Arrow > Issue Type: Improvement > Components: R > Affects Versions: 7.0.0 > Reporter: Will Jones > Assignee: Will Jones > Priority: Major > > On Windows this fails: > {code:R} > library(arrow) > write_dataset(iris, "test_dataset") > # Original example was with DuckDB, but that's not necessarily the issue > # con <- open_dataset("test_dataset") |> to_duckdb() > con <- open_dataset("test_dataset")$NewScan()$Finish()$ToRecordBatchReader() > file.remove("test_dataset/part-0.parquet") > #> Warning in file.remove("test_dataset/part-0.parquet"): cannot remove file > #> 'test_dataset/part-0.parquet', reason 'Permission denied' > #> [1] FALSE > {code} > But on MacOS it does not: > {code:r} > library(arrow) > write_dataset(iris, "test_dataset") > # Original example was with DuckDB, but that's not necessarily the issue > # con <- open_dataset("test_dataset") |> to_duckdb() > con <- open_dataset("test_dataset")$NewScan()$Finish()$ToRecordBatchReader() > file.remove("test_dataset/part-0.parquet") > #> [1] TRUE > {code} -- This message was sent by Atlassian Jira (v8.20.7#820007)