This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 1a428c902f GH-50163: [R] Bug: Partial matching on $metadata$r causes 
errors with schema metadata keys starting with "r" (#50178)
1a428c902f is described below

commit 1a428c902f0d1d6b9924d6fd55fad2e242fc5b93
Author: Nic Crane <[email protected]>
AuthorDate: Wed Jun 17 10:40:44 2026 +0100

    GH-50163: [R] Bug: Partial matching on $metadata$r causes errors with 
schema metadata keys starting with "r" (#50178)
    
    ### Rationale for this change
    
    Partial matching meant metadata beginning with R was triggering errors
    
    ### What changes are included in this PR?
    
    Access fields via `[[` not `$`
    
    ### Are these changes tested?
    
    Yes
    
    ### Are there any user-facing changes?
    
    No
    * GitHub Issue: #50163
    
    Authored-by: Nic Crane <[email protected]>
    Signed-off-by: Nic Crane <[email protected]>
---
 r/R/arrow-tabular.R              |  6 +++---
 r/R/arrowExports.R               | 16 +++++++--------
 r/R/dataset-write.R              |  2 +-
 r/R/dplyr-collect.R              |  2 +-
 r/R/dplyr-group-by.R             |  8 ++++----
 r/R/feather.R                    |  2 +-
 r/R/metadata.R                   |  6 +++---
 r/R/schema.R                     | 10 ++++-----
 r/extra-tests/test-read-files.R  |  2 +-
 r/man/Schema-class.Rd            |  2 +-
 r/tests/testthat/test-metadata.R | 44 ++++++++++++++++++++++++----------------
 r/vignettes/metadata.Rmd         |  2 +-
 12 files changed, 55 insertions(+), 47 deletions(-)

diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R
index b15a25075f..a83226a728 100644
--- a/r/R/arrow-tabular.R
+++ b/r/R/arrow-tabular.R
@@ -82,10 +82,10 @@ ArrowTabular <- R6Class(
       # Helper for the R metadata that handles the serialization
       # See also method on Schema
       if (missing(new)) {
-        self$metadata$r
+        self$metadata[["r"]]
       } else {
         # Set the R metadata
-        self$metadata$r <- new
+        self$metadata[["r"]] <- new
         self
       }
     }
@@ -95,7 +95,7 @@ ArrowTabular <- R6Class(
 #' @export
 as.data.frame.ArrowTabular <- function(x, row.names = NULL, optional = FALSE, 
...) {
   df <- x$to_data_frame()
-  out <- apply_arrow_r_metadata(df, x$metadata$r)
+  out <- apply_arrow_r_metadata(df, x$metadata[["r"]])
   as.data.frame(out, row.names = row.names, optional = optional, ...)
 }
 
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 52274d29f0..22e66e2243 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1248,14 +1248,6 @@ Field__Equals <- function(field, other, check_metadata) {
   .Call(`_arrow_Field__Equals`, field, other, check_metadata)
 }
 
-Field__nullable <- function(field) {
-  .Call(`_arrow_Field__nullable`, field)
-}
-
-Field__type <- function(field) {
-  .Call(`_arrow_Field__type`, field)
-}
-
 Field__HasMetadata <- function(field) {
   .Call(`_arrow_Field__HasMetadata`, field)
 }
@@ -1272,6 +1264,14 @@ Field__RemoveMetadata <- function(field) {
   .Call(`_arrow_Field__RemoveMetadata`, field)
 }
 
+Field__nullable <- function(field) {
+  .Call(`_arrow_Field__nullable`, field)
+}
+
+Field__type <- function(field) {
+  .Call(`_arrow_Field__type`, field)
+}
+
 fs___FileInfo__type <- function(x) {
   .Call(`_arrow_fs___FileInfo__type`, x)
 }
diff --git a/r/R/dataset-write.R b/r/R/dataset-write.R
index dd675b40d0..dac3ee8798 100644
--- a/r/R/dataset-write.R
+++ b/r/R/dataset-write.R
@@ -154,7 +154,7 @@ write_dataset <- function(
     if (inherits(dataset, "grouped_df")) {
       force(partitioning)
       # Drop the grouping metadata before writing; we've already consumed it
-      # now to construct `partitioning` and don't want it in the metadata$r
+      # now to construct `partitioning` and don't want it in the 
metadata[["r"]]
       dataset <- dplyr::ungroup(dataset)
     }
     dataset <- as_adq(dataset)
diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
index d109b15a62..b9461df20d 100644
--- a/r/R/dplyr-collect.R
+++ b/r/R/dplyr-collect.R
@@ -24,7 +24,7 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = 
TRUE, ...) {
 collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) {
   if (as_data_frame) {
     df <- x$to_data_frame()
-    apply_arrow_r_metadata(df, x$metadata$r)
+    apply_arrow_r_metadata(df, x$metadata[["r"]])
   } else {
     x
   }
diff --git a/r/R/dplyr-group-by.R b/r/R/dplyr-group-by.R
index 268bf064af..52ea38f123 100644
--- a/r/R/dplyr-group-by.R
+++ b/r/R/dplyr-group-by.R
@@ -53,7 +53,7 @@ group_vars.arrow_dplyr_query <- function(x) x$group_by_vars
 group_vars.Dataset <- function(x) character()
 group_vars.RecordBatchReader <- function(x) character()
 group_vars.ArrowTabular <- function(x) {
-  x$metadata$r$attributes$.group_vars %||% character()
+  x$metadata[["r"]]$attributes$.group_vars %||% character()
 }
 
 # the logical literal in the two functions below controls the default value of
@@ -62,7 +62,7 @@ group_by_drop_default.arrow_dplyr_query <- function(.tbl) {
   .tbl$drop_empty_groups %||% TRUE
 }
 group_by_drop_default.ArrowTabular <- function(.tbl) {
-  .tbl$metadata$r$attributes$.group_by_drop %||% TRUE
+  .tbl$metadata[["r"]]$attributes$.group_by_drop %||% TRUE
 }
 group_by_drop_default.Dataset <- group_by_drop_default.RecordBatchReader <-
   function(.tbl) TRUE
@@ -84,11 +84,11 @@ set_group_attributes <- function(tab, group_vars, .drop) {
   # so passing NULL means unset (ungroup)
   if (is.null(group_vars) || length(group_vars)) {
     # Since accessing schema metadata does some work, only overwrite if needed
-    new_atts <- old_atts <- tab$metadata$r$attributes %||% list()
+    new_atts <- old_atts <- tab$metadata[["r"]]$attributes %||% list()
     new_atts[[".group_vars"]] <- group_vars
     new_atts[[".group_by_drop"]] <- .drop
     if (!identical(new_atts, old_atts)) {
-      tab$metadata$r$attributes <- new_atts
+      tab$metadata[["r"]]$attributes <- new_atts
     }
   }
   tab
diff --git a/r/R/feather.R b/r/R/feather.R
index 4008041106..23d5e4ed20 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -205,7 +205,7 @@ read_feather <- function(file, col_select = NULL, 
as_data_frame = TRUE, mmap = T
 
   if (isTRUE(as_data_frame)) {
     df <- out$to_data_frame()
-    out <- apply_arrow_r_metadata(df, out$metadata$r)
+    out <- apply_arrow_r_metadata(df, out$metadata[["r"]])
   }
   out
 }
diff --git a/r/R/metadata.R b/r/R/metadata.R
index dbc8c6b639..b7d7609e23 100644
--- a/r/R/metadata.R
+++ b/r/R/metadata.R
@@ -48,7 +48,7 @@
     if (getOption("arrow.debug", FALSE)) {
       print(conditionMessage(e))
     }
-    warning("Invalid metadata$r", call. = FALSE)
+    warning('Invalid metadata$[["r"]]', call. = FALSE)
     NULL
   })
 }
@@ -228,7 +228,7 @@ apply_arrow_r_metadata <- function(x, r_metadata) {
       }
     },
     error = function(e) {
-      warning("Invalid metadata$r", call. = FALSE)
+      warning('Invalid metadata$[["r"]]', call. = FALSE)
     }
   )
   x
@@ -323,7 +323,7 @@ arrow_attributes <- function(x, only_top_level = FALSE) {
 get_r_metadata_from_old_schema <- function(new_schema, old_schema) {
   # TODO: do we care about other (non-R) metadata preservation?
   # How would we know if it were meaningful?
-  r_meta <- old_schema$metadata$r
+  r_meta <- old_schema$metadata[["r"]]
   if (!is.null(r_meta)) {
     # Filter r_metadata$columns on columns with name _and_ type match
     common_names <- intersect(names(r_meta$columns), names(new_schema))
diff --git a/r/R/schema.R b/r/R/schema.R
index 7a6e38c95e..a02753fac5 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -65,7 +65,7 @@
 #'   schema (column names and types) which is compatible with other Arrow
 #'   clients. The R metadata is only read by R and is ignored by other clients
 #'   (e.g. Pandas has its own custom metadata). This metadata is stored in
-#'   `$metadata$r`.
+#'   `$metadata[["r"]]`.
 #'
 #'   Since Schema metadata keys and values must be strings, this metadata is
 #'   saved by serializing R's attribute list structure to a string. If the
@@ -138,8 +138,8 @@ Schema <- R6Class(
       renamed_schema <- Schema__WithNames(self, names)
 
       # if we have R metadata containing column names, update names there too
-      if (!is.null(existing_metadata$r$columns)) {
-        names(existing_metadata$r$columns) <- names
+      if (!is.null(existing_metadata[["r"]]$columns)) {
+        names(existing_metadata[["r"]]$columns) <- names
       }
       renamed_schema$WithMetadata(existing_metadata)
     }
@@ -176,10 +176,10 @@ Schema <- R6Class(
       # Helper for the R metadata that handles the serialization
       # See also method on ArrowTabular
       if (missing(new)) {
-        self$metadata$r
+        self$metadata[["r"]]
       } else {
         # Set the R metadata
-        self$metadata$r <- new
+        self$metadata[["r"]] <- new
         self
       }
     }
diff --git a/r/extra-tests/test-read-files.R b/r/extra-tests/test-read-files.R
index 4d4ecf85fa..ced366d2f5 100644
--- a/r/extra-tests/test-read-files.R
+++ b/r/extra-tests/test-read-files.R
@@ -183,7 +183,7 @@ test_that("Can see the extra metadata (parquet)", {
   if (if_version_less_than("3.0.0")) {
     expect_warning(
       df <- read_parquet(pq_file),
-      "Invalid metadata$r",
+      "Invalid metadata",
       fixed = TRUE
     )
     expect_s3_class(df, "tbl")
diff --git a/r/man/Schema-class.Rd b/r/man/Schema-class.Rd
index ecd216af07..df4d7d7f75 100644
--- a/r/man/Schema-class.Rd
+++ b/r/man/Schema-class.Rd
@@ -56,7 +56,7 @@ re-create them when pulled back into R. This metadata is 
separate from the
 schema (column names and types) which is compatible with other Arrow
 clients. The R metadata is only read by R and is ignored by other clients
 (e.g. Pandas has its own custom metadata). This metadata is stored in
-\verb{$metadata$r}.
+\verb{$metadata[["r"]]}.
 
 Since Schema metadata keys and values must be strings, this metadata is
 saved by serializing R's attribute list structure to a string. If the
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index 90b9f599ec..fea4578635 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -62,16 +62,16 @@ test_that("Table R metadata", {
 
 test_that("R metadata is not stored for types that map to Arrow types (factor, 
Date, etc.)", {
   tab <- Table$create(example_data[1:6])
-  expect_null(tab$metadata$r)
+  expect_null(tab$metadata[["r"]])
 
-  expect_null(Table$create(example_with_times[1:3])$metadata$r)
+  expect_null(Table$create(example_with_times[1:3])$metadata[["r"]])
 })
 
 test_that("R metadata is not stored for ExtensionType columns", {
   tab <- Table$create(
     x = vctrs::new_vctr(1:5, class = "special_integer")
   )
-  expect_null(tab$metadata$r)
+  expect_null(tab$metadata[["r"]])
 })
 
 test_that("classes are not stored for 
arrow_binary/arrow_large_binary/arrow_fixed_size_binary (ARROW-14140)", {
@@ -81,13 +81,13 @@ test_that("classes are not stored for 
arrow_binary/arrow_large_binary/arrow_fixe
   large_binary <- Array$create(list(raws), large_binary())
   fixed_size_binary <- Array$create(list(raws), fixed_size_binary(7L))
 
-  expect_null(RecordBatch$create(b = binary)$metadata$r)
-  expect_null(RecordBatch$create(b = large_binary)$metadata$r)
-  expect_null(RecordBatch$create(b = fixed_size_binary)$metadata$r)
+  expect_null(RecordBatch$create(b = binary)$metadata[["r"]])
+  expect_null(RecordBatch$create(b = large_binary)$metadata[["r"]])
+  expect_null(RecordBatch$create(b = fixed_size_binary)$metadata[["r"]])
 
-  expect_null(Table$create(b = binary)$metadata$r)
-  expect_null(Table$create(b = large_binary)$metadata$r)
-  expect_null(Table$create(b = fixed_size_binary)$metadata$r)
+  expect_null(Table$create(b = binary)$metadata[["r"]])
+  expect_null(Table$create(b = large_binary)$metadata[["r"]])
+  expect_null(Table$create(b = fixed_size_binary)$metadata[["r"]])
 })
 
 test_that("Garbage R metadata doesn't break things", {
@@ -95,7 +95,7 @@ test_that("Garbage R metadata doesn't break things", {
   tab$metadata$r <- "garbage"
   expect_warning(
     as.data.frame(tab),
-    "Invalid metadata$r",
+    'Invalid metadata$[["r"]]',
     fixed = TRUE
   )
   # serialize data like .serialize_arrow_r_metadata does, but don't call that
@@ -104,7 +104,7 @@ test_that("Garbage R metadata doesn't break things", {
   tab$metadata$r <- rawToChar(serialize("garbage", NULL, ascii = TRUE))
   expect_warning(
     as.data.frame(tab),
-    "Invalid metadata$r",
+    'Invalid metadata$[["r"]]',
     fixed = TRUE
   )
 
@@ -113,7 +113,7 @@ test_that("Garbage R metadata doesn't break things", {
   tab$metadata <- list(r = rawToChar(serialize(bad, NULL, ascii = TRUE)))
   expect_warning(
     as.data.frame(tab),
-    "Invalid metadata$r",
+    'Invalid metadata$[["r"]]',
     fixed = TRUE
   )
 
@@ -144,7 +144,7 @@ arbitrary\040code\040was\040just\040executed
   expect_message(
     expect_warning(
       as.data.frame(tab),
-      "Invalid metadata$r",
+      'Invalid metadata$[["r"]]',
       fixed = TRUE
     ),
     NA
@@ -465,7 +465,7 @@ test_that("grouped_df metadata is recorded (efficiently)", {
   expect_s3_class(grouped, "grouped_df")
   grouped_tab <- Table$create(grouped)
   expect_r6_class(grouped_tab, "Table")
-  expect_equal(grouped_tab$metadata$r$attributes$.group_vars, "a")
+  expect_equal(grouped_tab$metadata[["r"]]$attributes$.group_vars, "a")
 })
 
 test_that("grouped_df non-arrow metadata is preserved", {
@@ -496,10 +496,10 @@ test_that("apply_arrow_r_metadata doesn't add in metadata 
from plain data.frame
   plain_df <- data.frame(x = 1:5)
   plain_df_arrow <- arrow_table(plain_df)
 
-  expect_equal(plain_df_arrow$metadata$r$columns, list(x = NULL))
+  expect_equal(plain_df_arrow$metadata[["r"]]$columns, list(x = NULL))
 
   plain_df_no_metadata <- plain_df_arrow$to_data_frame()
-  plain_df_with_metadata <- apply_arrow_r_metadata(plain_df_no_metadata, 
plain_df_arrow$metadata$r)
+  plain_df_with_metadata <- apply_arrow_r_metadata(plain_df_no_metadata, 
plain_df_arrow$metadata[["r"]])
 
   expect_identical(plain_df_no_metadata, plain_df_with_metadata)
 
@@ -507,13 +507,21 @@ test_that("apply_arrow_r_metadata doesn't add in metadata 
from plain data.frame
   spicy_df_arrow <- arrow_table(haven_data)
 
   expect_equal(
-    spicy_df_arrow$metadata$r$columns,
+    spicy_df_arrow$metadata[["r"]]$columns,
     list(num = list(attributes = list(format.spss = "F8.2"), columns = NULL), 
cat_int = NULL, cat_chr = NULL)
   )
 
   spicy_df_no_metadata <- spicy_df_arrow$to_data_frame()
-  spicy_df_with_metadata <- apply_arrow_r_metadata(spicy_df_no_metadata, 
spicy_df_arrow$metadata$r)
+  spicy_df_with_metadata <- apply_arrow_r_metadata(spicy_df_no_metadata, 
spicy_df_arrow$metadata[["r"]])
 
   expect_null(attr(spicy_df_no_metadata$num, "format.spss"))
   expect_equal(attr(spicy_df_with_metadata$num, "format.spss"), "F8.2")
 })
+
+test_that("metadata keys starting with 'r' don't cause partial matching - 
GH-50163", {
+  tbl <- arrow_table(x = 1:3)
+  tbl <- tbl$cast(tbl$schema$WithMetadata(list(rachel = "some_value")))
+
+  expect_no_warning(as.data.frame(tbl))
+  expect_no_warning(collect.ArrowTabular(tbl))
+})
diff --git a/r/vignettes/metadata.Rmd b/r/vignettes/metadata.Rmd
index 813b1075f2..3c1cd7315b 100644
--- a/r/vignettes/metadata.Rmd
+++ b/r/vignettes/metadata.Rmd
@@ -74,7 +74,7 @@ tb$metadata$new_key <- "new value"
 
 Metadata attached to a Schema is preserved when writing the Table to 
Arrow/Feather or Parquet formats. When reading those files into R, or when 
calling `as.data.frame()` on a Table or RecordBatch, the column attributes are 
restored to the columns of the resulting `data.frame`. This means that custom 
data types, including `haven::labelled`, `vctrs` annotations, and others, are 
preserved when doing a round-trip through Arrow.
 
-Note that the attributes stored in `$metadata$r` are only understood by R. If 
you write a `data.frame` with `haven` columns to a Feather file and read that 
in Pandas, the `haven` metadata won't be recognized there. Similarly, Pandas 
writes its own custom metadata, which the R package does not consume. You are 
free, however, to define custom metadata conventions for your application and 
assign any (string) values you want to other metadata keys. 
+Note that the attributes stored in `$metadata[["r"]]` are only understood by 
R. If you write a `data.frame` with `haven` columns to a Feather file and read 
that in Pandas, the `haven` metadata won't be recognized there. Similarly, 
Pandas writes its own custom metadata, which the R package does not consume. 
You are free, however, to define custom metadata conventions for your 
application and assign any (string) values you want to other metadata keys. 
 
 ## Further reading
 

Reply via email to