nealrichardson commented on code in PR #41969:
URL: https://github.com/apache/arrow/pull/41969#discussion_r1626863986
##########
r/R/metadata.R:
##########
@@ -44,23 +44,96 @@
}
.deserialize_arrow_r_metadata <- function(x) {
- tryCatch(
- expr = {
- out <- unserialize(charToRaw(x))
-
- # if this is still raw, try decompressing
- if (is.raw(out)) {
- out <- unserialize(memDecompress(out, type = "gzip"))
- }
- out
- },
+ tryCatch(unserialize_r_metadata(x),
error = function(e) {
+ if (getOption("arrow.debug", FALSE)) {
+ print(conditionMessage(e))
+ }
warning("Invalid metadata$r", call. = FALSE)
NULL
}
)
}
+unserialize_r_metadata <- function(x) {
+ # Check that this is ASCII serialized data (as in, what we wrote)
+ if (!identical(substr(unclass(x), 1, 1), "A")) {
+ stop("Invalid serialized data")
+ }
+ out <- safe_unserialize(charToRaw(x))
+ # If it's still raw, check for the gzip magic number and uncompress
+ if (is.raw(out) && identical(out[1:2], as.raw(c(31, 139)))) {
+ decompressed <- memDecompress(out, type = "gzip")
+ if (!identical(substr(decompressed, 1, 1), "A")) {
+ stop("Invalid serialized data")
+ }
+ out <- safe_unserialize(decompressed)
+ }
+ if (!is.list(out)) {
+ stop("Invalid serialized data")
+ }
+ safe_r_metadata(out)
+}
+
+safe_unserialize <- function(x) {
+ # By capturing the data in a list, we can inspect it for promises without
+ # triggering their evaluation.
+ out <- list(unserialize(x))
+ if (typeof(out[[1]]) == "promise") {
+ stop("Serialized data contains a promise object")
Review Comment:
Currently it doesn't matter because this error gets swallowed in
https://github.com/apache/arrow/pull/41969/files#diff-659e9fa6b66e5a72b4e3f9ac79ffddf08f92d9ea3d7aa45bd8c73b9a022fa2e5R52
and in the end the user sees an opaque "Invalid metadata$r" warning. This is a
holdover from how we're currently doing the deserialization, any errors are
just trapped if it fails to deserialize and we return `NULL` with that warning.
Happy to revisit that though if there's interest.
##########
r/R/metadata.R:
##########
@@ -44,23 +44,96 @@
}
.deserialize_arrow_r_metadata <- function(x) {
- tryCatch(
- expr = {
- out <- unserialize(charToRaw(x))
-
- # if this is still raw, try decompressing
- if (is.raw(out)) {
- out <- unserialize(memDecompress(out, type = "gzip"))
- }
- out
- },
+ tryCatch(unserialize_r_metadata(x),
error = function(e) {
+ if (getOption("arrow.debug", FALSE)) {
+ print(conditionMessage(e))
+ }
warning("Invalid metadata$r", call. = FALSE)
NULL
}
)
}
+unserialize_r_metadata <- function(x) {
+ # Check that this is ASCII serialized data (as in, what we wrote)
+ if (!identical(substr(unclass(x), 1, 1), "A")) {
+ stop("Invalid serialized data")
+ }
+ out <- safe_unserialize(charToRaw(x))
+ # If it's still raw, check for the gzip magic number and uncompress
+ if (is.raw(out) && identical(out[1:2], as.raw(c(31, 139)))) {
+ decompressed <- memDecompress(out, type = "gzip")
+ if (!identical(substr(decompressed, 1, 1), "A")) {
+ stop("Invalid serialized data")
+ }
+ out <- safe_unserialize(decompressed)
+ }
+ if (!is.list(out)) {
+ stop("Invalid serialized data")
+ }
+ safe_r_metadata(out)
+}
+
+safe_unserialize <- function(x) {
+ # By capturing the data in a list, we can inspect it for promises without
+ # triggering their evaluation.
+ out <- list(unserialize(x))
+ if (typeof(out[[1]]) == "promise") {
+ stop("Serialized data contains a promise object")
+ }
+ out[[1]]
+}
+
+safe_r_metadata <- function(metadata, on_save = FALSE) {
Review Comment:
Sure, can do.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]