This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new b738178e57 GH-33390: [R] Field-level metadata (#49631)
b738178e57 is described below

commit b738178e571eee87f69867d97bbe0769d1fb6dfb
Author: Max Romagnoli <[email protected]>
AuthorDate: Fri Apr 10 15:59:14 2026 +0100

    GH-33390: [R] Field-level metadata (#49631)
    
    ### Rationale for this change
    - `field()` in R unlike Python does not support field-level metadata.
    
    ### What changes are included in this PR?
    - New active bindings on `Field`: `$HasMetadata`, `$metadata`
    - New methods on Field: `$WithMetadata()`, `$RemoveMetadata()`
    - New `check_metadata` parameter in `Field$Equals()`, defaulting to FALSE
    - Tests for the above
    
    ### Are these changes tested?
    - Yes, unit tests have been added, the functionality was also tested 
locally in R
    
    ### Are there any user-facing changes?
    - Yes, metadata= now works without throwing an error.
    - No breaking changes, since this was already included but errored before 
and parameter order was not changed in the implementation.
    
    * GitHub Issue: #33390
    
    Lead-authored-by: Max Romagnoli <[email protected]>
    Co-authored-by: Nic Crane <[email protected]>
    Signed-off-by: Nic Crane <[email protected]>
---
 r/R/arrowExports.R            | 20 ++++++++++++++--
 r/R/field.R                   | 47 ++++++++++++++++++++++++++++++------
 r/src/arrowExports.cpp        | 46 ++++++++++++++++++++++++++++++++----
 r/src/field.cpp               | 43 +++++++++++++++++++++++++++++++--
 r/tests/testthat/test-field.R | 55 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 196 insertions(+), 15 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 778d242023..fe99b91b38 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1240,8 +1240,8 @@ Field__name <- function(field) {
   .Call(`_arrow_Field__name`, field)
 }
 
-Field__Equals <- function(field, other) {
-  .Call(`_arrow_Field__Equals`, field, other)
+Field__Equals <- function(field, other, check_metadata) {
+  .Call(`_arrow_Field__Equals`, field, other, check_metadata)
 }
 
 Field__nullable <- function(field) {
@@ -1252,6 +1252,22 @@ Field__type <- function(field) {
   .Call(`_arrow_Field__type`, field)
 }
 
+Field__HasMetadata <- function(field) {
+  .Call(`_arrow_Field__HasMetadata`, field)
+}
+
+Field__metadata <- function(field) {
+  .Call(`_arrow_Field__metadata`, field)
+}
+
+Field__WithMetadata <- function(field, metadata) {
+  .Call(`_arrow_Field__WithMetadata`, field, metadata)
+}
+
+Field__RemoveMetadata <- function(field) {
+  .Call(`_arrow_Field__RemoveMetadata`, field)
+}
+
 fs___FileInfo__type <- function(x) {
   .Call(`_arrow_fs___FileInfo__type`, x)
 }
diff --git a/r/R/field.R b/r/R/field.R
index 6ae18e06c1..f1a9c8a2ea 100644
--- a/r/R/field.R
+++ b/r/R/field.R
@@ -26,7 +26,18 @@
 #' @section Methods:
 #'
 #' - `f$ToString()`: convert to a string
-#' - `f$Equals(other)`: test for equality. More naturally called as `f == 
other`
+#' - `f$Equals(other, check_metadata = FALSE)`: test for equality.
+#'    More naturally called as `f == other`
+#' - `f$WithMetadata(metadata)`: returns a new `Field` with the key-value
+#'    `metadata` set. Note that all list elements in `metadata` will be coerced
+#'    to `character`.
+#' - `f$RemoveMetadata()`: returns a new `Field` without metadata.
+#'
+#' @section Active bindings:
+#'
+#' - `$HasMetadata`: logical: does this `Field` have extra metadata?
+#' - `$metadata`: returns the key-value metadata as a named list, or `NULL`
+#'    if no metadata is set.
 #'
 #' @name Field
 #' @rdname Field-class
@@ -38,8 +49,15 @@ Field <- R6Class(
     ToString = function() {
       prettier_dictionary_type(Field__ToString(self))
     },
-    Equals = function(other, ...) {
-      inherits(other, "Field") && Field__Equals(self, other)
+    Equals = function(other, check_metadata = FALSE, ...) {
+      inherits(other, "Field") && Field__Equals(self, other, 
isTRUE(check_metadata))
+    },
+    WithMetadata = function(metadata = NULL) {
+      metadata <- prepare_key_value_metadata(metadata)
+      Field__WithMetadata(self, metadata)
+    },
+    RemoveMetadata = function() {
+      Field__RemoveMetadata(self)
     },
     export_to_c = function(ptr) ExportField(self, ptr)
   ),
@@ -52,14 +70,27 @@ Field <- R6Class(
     },
     type = function() {
       Field__type(self)
+    },
+    HasMetadata = function() {
+      Field__HasMetadata(self)
+    },
+    metadata = function() {
+      if (self$HasMetadata) {
+        as.list(Field__metadata(self))
+      } else {
+        NULL
+      }
     }
   )
 )
-Field$create <- function(name, type, metadata, nullable = TRUE) {
+Field$create <- function(name, type, metadata = NULL, nullable = TRUE) {
   assert_that(inherits(name, "character"), length(name) == 1L)
   type <- as_type(type, name)
-  assert_that(missing(metadata), msg = "metadata= is currently ignored")
-  Field__initialize(enc2utf8(name), type, nullable)
+  f <- Field__initialize(enc2utf8(name), type, nullable)
+  if (!is.null(metadata)) {
+    f <- f$WithMetadata(metadata)
+  }
+  f
 }
 #' @include arrowExports.R
 Field$import_from_c <- ImportField
@@ -68,11 +99,13 @@ Field$import_from_c <- ImportField
 #'
 #' @param name field name
 #' @param type logical type, instance of [DataType]
-#' @param metadata currently ignored
+#' @param metadata a named character vector or list to attach as field 
metadata.
+#'   All values will be coerced to `character`.
 #' @param nullable TRUE if field is nullable
 #'
 #' @examples
 #' field("x", int32())
+#' field("x", int32(), metadata = list(key = "value"))
 #' @rdname Field
 #' @seealso [Field]
 #' @export
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 560466495e..242a1632ed 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -3219,12 +3219,13 @@ BEGIN_CPP11
 END_CPP11
 }
 // field.cpp
-bool Field__Equals(const std::shared_ptr<arrow::Field>& field, const 
std::shared_ptr<arrow::Field>& other);
-extern "C" SEXP _arrow_Field__Equals(SEXP field_sexp, SEXP other_sexp){
+bool Field__Equals(const std::shared_ptr<arrow::Field>& field, const 
std::shared_ptr<arrow::Field>& other, bool check_metadata);
+extern "C" SEXP _arrow_Field__Equals(SEXP field_sexp, SEXP other_sexp, SEXP 
check_metadata_sexp){
 BEGIN_CPP11
        arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type 
field(field_sexp);
        arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type 
other(other_sexp);
-       return cpp11::as_sexp(Field__Equals(field, other));
+       arrow::r::Input<bool>::type check_metadata(check_metadata_sexp);
+       return cpp11::as_sexp(Field__Equals(field, other, check_metadata));
 END_CPP11
 }
 // field.cpp
@@ -3243,6 +3244,39 @@ BEGIN_CPP11
        return cpp11::as_sexp(Field__type(field));
 END_CPP11
 }
+// field.cpp
+bool Field__HasMetadata(const std::shared_ptr<arrow::Field>& field);
+extern "C" SEXP _arrow_Field__HasMetadata(SEXP field_sexp){
+BEGIN_CPP11
+       arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type 
field(field_sexp);
+       return cpp11::as_sexp(Field__HasMetadata(field));
+END_CPP11
+}
+// field.cpp
+cpp11::writable::list Field__metadata(const std::shared_ptr<arrow::Field>& 
field);
+extern "C" SEXP _arrow_Field__metadata(SEXP field_sexp){
+BEGIN_CPP11
+       arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type 
field(field_sexp);
+       return cpp11::as_sexp(Field__metadata(field));
+END_CPP11
+}
+// field.cpp
+std::shared_ptr<arrow::Field> Field__WithMetadata(const 
std::shared_ptr<arrow::Field>& field, cpp11::strings metadata);
+extern "C" SEXP _arrow_Field__WithMetadata(SEXP field_sexp, SEXP 
metadata_sexp){
+BEGIN_CPP11
+       arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type 
field(field_sexp);
+       arrow::r::Input<cpp11::strings>::type metadata(metadata_sexp);
+       return cpp11::as_sexp(Field__WithMetadata(field, metadata));
+END_CPP11
+}
+// field.cpp
+std::shared_ptr<arrow::Field> Field__RemoveMetadata(const 
std::shared_ptr<arrow::Field>& field);
+extern "C" SEXP _arrow_Field__RemoveMetadata(SEXP field_sexp){
+BEGIN_CPP11
+       arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type 
field(field_sexp);
+       return cpp11::as_sexp(Field__RemoveMetadata(field));
+END_CPP11
+}
 // filesystem.cpp
 fs::FileType fs___FileInfo__type(const std::shared_ptr<fs::FileInfo>& x);
 extern "C" SEXP _arrow_fs___FileInfo__type(SEXP x_sexp){
@@ -6009,9 +6043,13 @@ static const R_CallMethodDef CallEntries[] = {
                { "_arrow_Field__initialize", (DL_FUNC) 
&_arrow_Field__initialize, 3}, 
                { "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 
1}, 
                { "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, 
-               { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 2}, 
+               { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 3}, 
                { "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 
1}, 
                { "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, 
+               { "_arrow_Field__HasMetadata", (DL_FUNC) 
&_arrow_Field__HasMetadata, 1}, 
+               { "_arrow_Field__metadata", (DL_FUNC) &_arrow_Field__metadata, 
1}, 
+               { "_arrow_Field__WithMetadata", (DL_FUNC) 
&_arrow_Field__WithMetadata, 2}, 
+               { "_arrow_Field__RemoveMetadata", (DL_FUNC) 
&_arrow_Field__RemoveMetadata, 1}, 
                { "_arrow_fs___FileInfo__type", (DL_FUNC) 
&_arrow_fs___FileInfo__type, 1}, 
                { "_arrow_fs___FileInfo__set_type", (DL_FUNC) 
&_arrow_fs___FileInfo__set_type, 2}, 
                { "_arrow_fs___FileInfo__path", (DL_FUNC) 
&_arrow_fs___FileInfo__path, 1}, 
diff --git a/r/src/field.cpp b/r/src/field.cpp
index 87c9f3e2f3..22a87bfbf2 100644
--- a/r/src/field.cpp
+++ b/r/src/field.cpp
@@ -18,6 +18,7 @@
 #include "./arrow_types.h"
 
 #include <arrow/type.h>
+#include <arrow/util/key_value_metadata.h>
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Field> Field__initialize(
@@ -38,8 +39,46 @@ std::string Field__name(const std::shared_ptr<arrow::Field>& 
field) {
 
 // [[arrow::export]]
 bool Field__Equals(const std::shared_ptr<arrow::Field>& field,
-                   const std::shared_ptr<arrow::Field>& other) {
-  return field->Equals(other);
+                   const std::shared_ptr<arrow::Field>& other, bool 
check_metadata) {
+  return field->Equals(other, check_metadata);
+}
+
+// [[arrow::export]]
+bool Field__HasMetadata(const std::shared_ptr<arrow::Field>& field) {
+  return field->HasMetadata();
+}
+
+// [[arrow::export]]
+cpp11::writable::list Field__metadata(const std::shared_ptr<arrow::Field>& 
field) {
+  auto meta = field->metadata();
+  int64_t n = 0;
+  if (field->HasMetadata()) {
+    n = meta->size();
+  }
+  cpp11::writable::list out(n);
+  std::vector<std::string> names_out(n);
+  for (int i = 0; i < n; i++) {
+    out[i] = cpp11::as_sexp(meta->value(i));
+    names_out[i] = meta->key(i);
+  }
+  out.names() = names_out;
+  return out;
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Field> Field__WithMetadata(
+    const std::shared_ptr<arrow::Field>& field, cpp11::strings metadata) {
+  auto values = cpp11::as_cpp<std::vector<std::string>>(metadata);
+  auto names = cpp11::as_cpp<std::vector<std::string>>(metadata.attr("names"));
+  auto kv =
+      std::make_shared<arrow::KeyValueMetadata>(std::move(names), 
std::move(values));
+  return field->WithMetadata(std::move(kv));
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Field> Field__RemoveMetadata(
+    const std::shared_ptr<arrow::Field>& field) {
+  return field->RemoveMetadata();
 }
 
 // [[arrow::export]]
diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R
index 6cef48ad54..e1972180fc 100644
--- a/r/tests/testthat/test-field.R
+++ b/r/tests/testthat/test-field.R
@@ -63,3 +63,58 @@ test_that("Field to C-interface", {
   # must clean up the pointer or we leak
   delete_arrow_schema(ptr)
 })
+
+test_that("Field metadata", {
+  x <- field("x", int32())
+  expect_false(x$HasMetadata)
+  expect_null(x$metadata)
+
+  x_meta <- field("x", int32(), metadata = list(key = "value"))
+  expect_true(x_meta$HasMetadata)
+  expect_identical(x_meta$metadata, list(key = "value"))
+
+  x_meta2 <- x$WithMetadata(list(key = "value"))
+  expect_true(x_meta2$HasMetadata)
+  expect_identical(x_meta2$metadata, list(key = "value"))
+
+  x_no_meta <- x_meta$RemoveMetadata()
+  expect_false(x_no_meta$HasMetadata)
+  expect_null(x_no_meta$metadata)
+})
+
+test_that("Field$Equals with check_metadata", {
+  x <- field("x", int32())
+  x_meta <- field("x", int32(), metadata = list(key = "value"))
+
+  expect_true(x$Equals(x_meta))
+  expect_false(x$Equals(x_meta, check_metadata = TRUE))
+  expect_true(x == x_meta)
+})
+
+test_that("Field WithMetadata(NULL) removes metadata", {
+  x <- field("x", int32(), metadata = list(key = "value"))
+  x_empty <- x$WithMetadata(NULL)
+  expect_false(x_empty$HasMetadata)
+  expect_null(x_empty$metadata)
+})
+
+test_that("Field metadata IPC roundtrip", {
+  x <- field("x", int32(), metadata = list(key = "value"))
+  tab <- Table$create(x = 1L, schema = schema(x))
+  bytes <- write_to_raw(tab)
+  roundtripped <- read_ipc_stream(bytes, as_data_frame = FALSE)
+  expect_true(roundtripped$schema$GetFieldByName("x")$Equals(x, check_metadata 
= TRUE))
+})
+
+test_that("Field metadata with duplicate keys", {
+  x <- field("x", int32(), metadata = list(a = "1", a = "2"))
+  expect_true(x$HasMetadata)
+  expect_length(x$metadata, 2)
+  expect_equal(x$metadata, list(a = "1", a = "2"))
+})
+
+test_that("Field metadata on nested struct child fields", {
+  inner <- field("y", int32(), metadata = list(inner_key = "inner_value"))
+  outer <- field("outer", struct__(list(inner)))
+  expect_true(outer$type$GetFieldByName("y")$Equals(inner, check_metadata = 
TRUE))
+})

Reply via email to