This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new b738178e57 GH-33390: [R] Field-level metadata (#49631)
b738178e57 is described below
commit b738178e571eee87f69867d97bbe0769d1fb6dfb
Author: Max Romagnoli <[email protected]>
AuthorDate: Fri Apr 10 15:59:14 2026 +0100
GH-33390: [R] Field-level metadata (#49631)
### Rationale for this change
- `field()` in R unlike Python does not support field-level metadata.
### What changes are included in this PR?
- New active bindings on `Field`: `$HasMetadata`, `$metadata`
- New methods on Field: `$WithMetadata()`, `$RemoveMetadata()`
- New `check_metadata` parameter in `Field$Equals()`, defaulting to FALSE
- Tests for the above
### Are these changes tested?
- Yes, unit tests have been added, the functionality was also tested
locally in R
### Are there any user-facing changes?
- Yes, metadata= now works without throwing an error.
- No breaking changes, since this was already included but errored before
and parameter order was not changed in the implementation.
* GitHub Issue: #33390
Lead-authored-by: Max Romagnoli <[email protected]>
Co-authored-by: Nic Crane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/R/arrowExports.R | 20 ++++++++++++++--
r/R/field.R | 47 ++++++++++++++++++++++++++++++------
r/src/arrowExports.cpp | 46 ++++++++++++++++++++++++++++++++----
r/src/field.cpp | 43 +++++++++++++++++++++++++++++++--
r/tests/testthat/test-field.R | 55 +++++++++++++++++++++++++++++++++++++++++++
5 files changed, 196 insertions(+), 15 deletions(-)
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 778d242023..fe99b91b38 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1240,8 +1240,8 @@ Field__name <- function(field) {
.Call(`_arrow_Field__name`, field)
}
-Field__Equals <- function(field, other) {
- .Call(`_arrow_Field__Equals`, field, other)
+Field__Equals <- function(field, other, check_metadata) {
+ .Call(`_arrow_Field__Equals`, field, other, check_metadata)
}
Field__nullable <- function(field) {
@@ -1252,6 +1252,22 @@ Field__type <- function(field) {
.Call(`_arrow_Field__type`, field)
}
+Field__HasMetadata <- function(field) {
+ .Call(`_arrow_Field__HasMetadata`, field)
+}
+
+Field__metadata <- function(field) {
+ .Call(`_arrow_Field__metadata`, field)
+}
+
+Field__WithMetadata <- function(field, metadata) {
+ .Call(`_arrow_Field__WithMetadata`, field, metadata)
+}
+
+Field__RemoveMetadata <- function(field) {
+ .Call(`_arrow_Field__RemoveMetadata`, field)
+}
+
fs___FileInfo__type <- function(x) {
.Call(`_arrow_fs___FileInfo__type`, x)
}
diff --git a/r/R/field.R b/r/R/field.R
index 6ae18e06c1..f1a9c8a2ea 100644
--- a/r/R/field.R
+++ b/r/R/field.R
@@ -26,7 +26,18 @@
#' @section Methods:
#'
#' - `f$ToString()`: convert to a string
-#' - `f$Equals(other)`: test for equality. More naturally called as `f ==
other`
+#' - `f$Equals(other, check_metadata = FALSE)`: test for equality.
+#' More naturally called as `f == other`
+#' - `f$WithMetadata(metadata)`: returns a new `Field` with the key-value
+#' `metadata` set. Note that all list elements in `metadata` will be coerced
+#' to `character`.
+#' - `f$RemoveMetadata()`: returns a new `Field` without metadata.
+#'
+#' @section Active bindings:
+#'
+#' - `$HasMetadata`: logical: does this `Field` have extra metadata?
+#' - `$metadata`: returns the key-value metadata as a named list, or `NULL`
+#' if no metadata is set.
#'
#' @name Field
#' @rdname Field-class
@@ -38,8 +49,15 @@ Field <- R6Class(
ToString = function() {
prettier_dictionary_type(Field__ToString(self))
},
- Equals = function(other, ...) {
- inherits(other, "Field") && Field__Equals(self, other)
+ Equals = function(other, check_metadata = FALSE, ...) {
+ inherits(other, "Field") && Field__Equals(self, other,
isTRUE(check_metadata))
+ },
+ WithMetadata = function(metadata = NULL) {
+ metadata <- prepare_key_value_metadata(metadata)
+ Field__WithMetadata(self, metadata)
+ },
+ RemoveMetadata = function() {
+ Field__RemoveMetadata(self)
},
export_to_c = function(ptr) ExportField(self, ptr)
),
@@ -52,14 +70,27 @@ Field <- R6Class(
},
type = function() {
Field__type(self)
+ },
+ HasMetadata = function() {
+ Field__HasMetadata(self)
+ },
+ metadata = function() {
+ if (self$HasMetadata) {
+ as.list(Field__metadata(self))
+ } else {
+ NULL
+ }
}
)
)
-Field$create <- function(name, type, metadata, nullable = TRUE) {
+Field$create <- function(name, type, metadata = NULL, nullable = TRUE) {
assert_that(inherits(name, "character"), length(name) == 1L)
type <- as_type(type, name)
- assert_that(missing(metadata), msg = "metadata= is currently ignored")
- Field__initialize(enc2utf8(name), type, nullable)
+ f <- Field__initialize(enc2utf8(name), type, nullable)
+ if (!is.null(metadata)) {
+ f <- f$WithMetadata(metadata)
+ }
+ f
}
#' @include arrowExports.R
Field$import_from_c <- ImportField
@@ -68,11 +99,13 @@ Field$import_from_c <- ImportField
#'
#' @param name field name
#' @param type logical type, instance of [DataType]
-#' @param metadata currently ignored
+#' @param metadata a named character vector or list to attach as field
metadata.
+#' All values will be coerced to `character`.
#' @param nullable TRUE if field is nullable
#'
#' @examples
#' field("x", int32())
+#' field("x", int32(), metadata = list(key = "value"))
#' @rdname Field
#' @seealso [Field]
#' @export
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 560466495e..242a1632ed 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -3219,12 +3219,13 @@ BEGIN_CPP11
END_CPP11
}
// field.cpp
-bool Field__Equals(const std::shared_ptr<arrow::Field>& field, const
std::shared_ptr<arrow::Field>& other);
-extern "C" SEXP _arrow_Field__Equals(SEXP field_sexp, SEXP other_sexp){
+bool Field__Equals(const std::shared_ptr<arrow::Field>& field, const
std::shared_ptr<arrow::Field>& other, bool check_metadata);
+extern "C" SEXP _arrow_Field__Equals(SEXP field_sexp, SEXP other_sexp, SEXP
check_metadata_sexp){
BEGIN_CPP11
arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type
field(field_sexp);
arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type
other(other_sexp);
- return cpp11::as_sexp(Field__Equals(field, other));
+ arrow::r::Input<bool>::type check_metadata(check_metadata_sexp);
+ return cpp11::as_sexp(Field__Equals(field, other, check_metadata));
END_CPP11
}
// field.cpp
@@ -3243,6 +3244,39 @@ BEGIN_CPP11
return cpp11::as_sexp(Field__type(field));
END_CPP11
}
+// field.cpp
+bool Field__HasMetadata(const std::shared_ptr<arrow::Field>& field);
+extern "C" SEXP _arrow_Field__HasMetadata(SEXP field_sexp){
+BEGIN_CPP11
+ arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type
field(field_sexp);
+ return cpp11::as_sexp(Field__HasMetadata(field));
+END_CPP11
+}
+// field.cpp
+cpp11::writable::list Field__metadata(const std::shared_ptr<arrow::Field>&
field);
+extern "C" SEXP _arrow_Field__metadata(SEXP field_sexp){
+BEGIN_CPP11
+ arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type
field(field_sexp);
+ return cpp11::as_sexp(Field__metadata(field));
+END_CPP11
+}
+// field.cpp
+std::shared_ptr<arrow::Field> Field__WithMetadata(const
std::shared_ptr<arrow::Field>& field, cpp11::strings metadata);
+extern "C" SEXP _arrow_Field__WithMetadata(SEXP field_sexp, SEXP
metadata_sexp){
+BEGIN_CPP11
+ arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type
field(field_sexp);
+ arrow::r::Input<cpp11::strings>::type metadata(metadata_sexp);
+ return cpp11::as_sexp(Field__WithMetadata(field, metadata));
+END_CPP11
+}
+// field.cpp
+std::shared_ptr<arrow::Field> Field__RemoveMetadata(const
std::shared_ptr<arrow::Field>& field);
+extern "C" SEXP _arrow_Field__RemoveMetadata(SEXP field_sexp){
+BEGIN_CPP11
+ arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type
field(field_sexp);
+ return cpp11::as_sexp(Field__RemoveMetadata(field));
+END_CPP11
+}
// filesystem.cpp
fs::FileType fs___FileInfo__type(const std::shared_ptr<fs::FileInfo>& x);
extern "C" SEXP _arrow_fs___FileInfo__type(SEXP x_sexp){
@@ -6009,9 +6043,13 @@ static const R_CallMethodDef CallEntries[] = {
{ "_arrow_Field__initialize", (DL_FUNC)
&_arrow_Field__initialize, 3},
{ "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString,
1},
{ "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1},
- { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 2},
+ { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 3},
{ "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable,
1},
{ "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1},
+ { "_arrow_Field__HasMetadata", (DL_FUNC)
&_arrow_Field__HasMetadata, 1},
+ { "_arrow_Field__metadata", (DL_FUNC) &_arrow_Field__metadata,
1},
+ { "_arrow_Field__WithMetadata", (DL_FUNC)
&_arrow_Field__WithMetadata, 2},
+ { "_arrow_Field__RemoveMetadata", (DL_FUNC)
&_arrow_Field__RemoveMetadata, 1},
{ "_arrow_fs___FileInfo__type", (DL_FUNC)
&_arrow_fs___FileInfo__type, 1},
{ "_arrow_fs___FileInfo__set_type", (DL_FUNC)
&_arrow_fs___FileInfo__set_type, 2},
{ "_arrow_fs___FileInfo__path", (DL_FUNC)
&_arrow_fs___FileInfo__path, 1},
diff --git a/r/src/field.cpp b/r/src/field.cpp
index 87c9f3e2f3..22a87bfbf2 100644
--- a/r/src/field.cpp
+++ b/r/src/field.cpp
@@ -18,6 +18,7 @@
#include "./arrow_types.h"
#include <arrow/type.h>
+#include <arrow/util/key_value_metadata.h>
// [[arrow::export]]
std::shared_ptr<arrow::Field> Field__initialize(
@@ -38,8 +39,46 @@ std::string Field__name(const std::shared_ptr<arrow::Field>&
field) {
// [[arrow::export]]
bool Field__Equals(const std::shared_ptr<arrow::Field>& field,
- const std::shared_ptr<arrow::Field>& other) {
- return field->Equals(other);
+ const std::shared_ptr<arrow::Field>& other, bool
check_metadata) {
+ return field->Equals(other, check_metadata);
+}
+
+// [[arrow::export]]
+bool Field__HasMetadata(const std::shared_ptr<arrow::Field>& field) {
+ return field->HasMetadata();
+}
+
+// [[arrow::export]]
+cpp11::writable::list Field__metadata(const std::shared_ptr<arrow::Field>&
field) {
+ auto meta = field->metadata();
+ int64_t n = 0;
+ if (field->HasMetadata()) {
+ n = meta->size();
+ }
+ cpp11::writable::list out(n);
+ std::vector<std::string> names_out(n);
+ for (int i = 0; i < n; i++) {
+ out[i] = cpp11::as_sexp(meta->value(i));
+ names_out[i] = meta->key(i);
+ }
+ out.names() = names_out;
+ return out;
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Field> Field__WithMetadata(
+ const std::shared_ptr<arrow::Field>& field, cpp11::strings metadata) {
+ auto values = cpp11::as_cpp<std::vector<std::string>>(metadata);
+ auto names = cpp11::as_cpp<std::vector<std::string>>(metadata.attr("names"));
+ auto kv =
+ std::make_shared<arrow::KeyValueMetadata>(std::move(names),
std::move(values));
+ return field->WithMetadata(std::move(kv));
+}
+
+// [[arrow::export]]
+std::shared_ptr<arrow::Field> Field__RemoveMetadata(
+ const std::shared_ptr<arrow::Field>& field) {
+ return field->RemoveMetadata();
}
// [[arrow::export]]
diff --git a/r/tests/testthat/test-field.R b/r/tests/testthat/test-field.R
index 6cef48ad54..e1972180fc 100644
--- a/r/tests/testthat/test-field.R
+++ b/r/tests/testthat/test-field.R
@@ -63,3 +63,58 @@ test_that("Field to C-interface", {
# must clean up the pointer or we leak
delete_arrow_schema(ptr)
})
+
+test_that("Field metadata", {
+ x <- field("x", int32())
+ expect_false(x$HasMetadata)
+ expect_null(x$metadata)
+
+ x_meta <- field("x", int32(), metadata = list(key = "value"))
+ expect_true(x_meta$HasMetadata)
+ expect_identical(x_meta$metadata, list(key = "value"))
+
+ x_meta2 <- x$WithMetadata(list(key = "value"))
+ expect_true(x_meta2$HasMetadata)
+ expect_identical(x_meta2$metadata, list(key = "value"))
+
+ x_no_meta <- x_meta$RemoveMetadata()
+ expect_false(x_no_meta$HasMetadata)
+ expect_null(x_no_meta$metadata)
+})
+
+test_that("Field$Equals with check_metadata", {
+ x <- field("x", int32())
+ x_meta <- field("x", int32(), metadata = list(key = "value"))
+
+ expect_true(x$Equals(x_meta))
+ expect_false(x$Equals(x_meta, check_metadata = TRUE))
+ expect_true(x == x_meta)
+})
+
+test_that("Field WithMetadata(NULL) removes metadata", {
+ x <- field("x", int32(), metadata = list(key = "value"))
+ x_empty <- x$WithMetadata(NULL)
+ expect_false(x_empty$HasMetadata)
+ expect_null(x_empty$metadata)
+})
+
+test_that("Field metadata IPC roundtrip", {
+ x <- field("x", int32(), metadata = list(key = "value"))
+ tab <- Table$create(x = 1L, schema = schema(x))
+ bytes <- write_to_raw(tab)
+ roundtripped <- read_ipc_stream(bytes, as_data_frame = FALSE)
+ expect_true(roundtripped$schema$GetFieldByName("x")$Equals(x, check_metadata
= TRUE))
+})
+
+test_that("Field metadata with duplicate keys", {
+ x <- field("x", int32(), metadata = list(a = "1", a = "2"))
+ expect_true(x$HasMetadata)
+ expect_length(x$metadata, 2)
+ expect_equal(x$metadata, list(a = "1", a = "2"))
+})
+
+test_that("Field metadata on nested struct child fields", {
+ inner <- field("y", int32(), metadata = list(inner_key = "inner_value"))
+ outer <- field("outer", struct__(list(inner)))
+ expect_true(outer$type$GetFieldByName("y")$Equals(inner, check_metadata =
TRUE))
+})