This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 9a0afd9e95 GH-40742: [R] fix max_rows_per_group must be a positive
number (#49709)
9a0afd9e95 is described below
commit 9a0afd9e9545444b492237cc9db7eb4549ce09f5
Author: Nic Crane <[email protected]>
AuthorDate: Tue May 5 10:24:54 2026 -0400
GH-40742: [R] fix max_rows_per_group must be a positive number (#49709)
### Rationale for this change
Error message due to faulty checking
### What changes are included in this PR?
Change checking circumstances
### Are these changes tested?
Yes
### Are there any user-facing changes?
No
### AI usage
Used Claude to generate this - will double check before marking ready for
review
* GitHub Issue: #40742
Authored-by: Nic Crane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/R/dataset-write.R | 28 ++++++++++++++++++++++++----
r/tests/testthat/test-dataset-write.R | 10 ++++++++++
2 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index 663e1b8f08..dd675b40d0 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -218,7 +218,12 @@ write_dataset <- function(
existing_data_behavior_opts <- c("delete_matching", "overwrite", "error")
existing_data_behavior <- match(match.arg(existing_data_behavior),
existing_data_behavior_opts) - 1L
- if (!missing(max_rows_per_file) && missing(max_rows_per_group) &&
max_rows_per_group > max_rows_per_file) {
+ if (
+ !missing(max_rows_per_file) &&
+ missing(max_rows_per_group) &&
+ max_rows_per_file > 0 &&
+ max_rows_per_group > max_rows_per_file
+ ) {
max_rows_per_group <- max_rows_per_file
}
@@ -290,7 +295,12 @@ write_delim_dataset <- function(
quote = c("needed", "all", "none"),
preserve_order = FALSE
) {
- if (!missing(max_rows_per_file) && missing(max_rows_per_group) &&
max_rows_per_group > max_rows_per_file) {
+ if (
+ !missing(max_rows_per_file) &&
+ missing(max_rows_per_group) &&
+ max_rows_per_file > 0 &&
+ max_rows_per_group > max_rows_per_file
+ ) {
max_rows_per_group <- max_rows_per_file
}
@@ -343,7 +353,12 @@ write_csv_dataset <- function(
quote = c("needed", "all", "none"),
preserve_order = FALSE
) {
- if (!missing(max_rows_per_file) && missing(max_rows_per_group) &&
max_rows_per_group > max_rows_per_file) {
+ if (
+ !missing(max_rows_per_file) &&
+ missing(max_rows_per_group) &&
+ max_rows_per_file > 0 &&
+ max_rows_per_group > max_rows_per_file
+ ) {
max_rows_per_group <- max_rows_per_file
}
@@ -395,7 +410,12 @@ write_tsv_dataset <- function(
quote = c("needed", "all", "none"),
preserve_order = FALSE
) {
- if (!missing(max_rows_per_file) && missing(max_rows_per_group) &&
max_rows_per_group > max_rows_per_file) {
+ if (
+ !missing(max_rows_per_file) &&
+ missing(max_rows_per_group) &&
+ max_rows_per_file > 0 &&
+ max_rows_per_group > max_rows_per_file
+ ) {
max_rows_per_group <- max_rows_per_file
}
diff --git a/r/tests/testthat/test-dataset-write.R
b/r/tests/testthat/test-dataset-write.R
index d62b888163..8fed358dc3 100644
--- a/r/tests/testthat/test-dataset-write.R
+++ b/r/tests/testthat/test-dataset-write.R
@@ -576,6 +576,16 @@ test_that("max_rows_per_group is adjusted if at odds with
max_rows_per_file", {
)
})
+test_that("max_rows_per_file = 0 does not trigger max_rows_per_group
adjustment (ARROW-40742)", {
+ skip_if_not_available("parquet")
+
+ # max_rows_per_file = 0 means "no limit" and should not error
+ dst_dir <- make_temp_dir()
+ expect_no_error(
+ write_dataset(df1, dst_dir, max_rows_per_file = 0L)
+ )
+})
+
test_that("write_dataset checks for format-specific arguments", {
df <- tibble::tibble(