nealrichardson commented on code in PR #41969:
URL: https://github.com/apache/arrow/pull/41969#discussion_r1626867280
##########
r/R/metadata.R:
##########
@@ -44,23 +44,96 @@
}
.deserialize_arrow_r_metadata <- function(x) {
- tryCatch(
- expr = {
- out <- unserialize(charToRaw(x))
-
- # if this is still raw, try decompressing
- if (is.raw(out)) {
- out <- unserialize(memDecompress(out, type = "gzip"))
- }
- out
- },
+ tryCatch(unserialize_r_metadata(x),
error = function(e) {
+ if (getOption("arrow.debug", FALSE)) {
+ print(conditionMessage(e))
+ }
warning("Invalid metadata$r", call. = FALSE)
NULL
}
)
}
+unserialize_r_metadata <- function(x) {
+ # Check that this is ASCII serialized data (as in, what we wrote)
+ if (!identical(substr(unclass(x), 1, 1), "A")) {
+ stop("Invalid serialized data")
+ }
+ out <- safe_unserialize(charToRaw(x))
+ # If it's still raw, check for the gzip magic number and uncompress
+ if (is.raw(out) && identical(out[1:2], as.raw(c(31, 139)))) {
+ decompressed <- memDecompress(out, type = "gzip")
+ if (!identical(substr(decompressed, 1, 1), "A")) {
+ stop("Invalid serialized data")
+ }
+ out <- safe_unserialize(decompressed)
+ }
+ if (!is.list(out)) {
+ stop("Invalid serialized data")
+ }
+ safe_r_metadata(out)
+}
+
+safe_unserialize <- function(x) {
+ # By capturing the data in a list, we can inspect it for promises without
+ # triggering their evaluation.
+ out <- list(unserialize(x))
+ if (typeof(out[[1]]) == "promise") {
+ stop("Serialized data contains a promise object")
+ }
+ out[[1]]
+}
+
+safe_r_metadata <- function(metadata, on_save = FALSE) {
Review Comment:
Done in
https://github.com/apache/arrow/pull/41969/commits/1057b7823e4a0ce7cf5b54d59e9706dc3e966c31
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]