This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 17f416f80f GH-34422: [R] Expose GcsFileSystem$options (#34477)
17f416f80f is described below
commit 17f416f80f0bccd58173308d8e0aa326363bd388
Author: Bryce Mecum <[email protected]>
AuthorDate: Wed Mar 8 17:34:40 2023 -0900
GH-34422: [R] Expose GcsFileSystem$options (#34477)
Closes https://github.com/apache/arrow/issues/34422
### Rationale for this change
Exposing options on GcsFilesystem objects is useful for testing but may
also be useful to R arrow users who are using GCS (gs_bucket). The
implementation here is one-way which might be a good point for in discussion in
code review.
### Are these changes tested?
I've added two tests which pass.
### Are there any user-facing changes?
Yes, though I wasn't sure if it made sense to document it. With this
change, a user can run the following to get the options:
```r
> bucket <- gs_bucket("voltrondata-labs-datasets")
> bucket$base_fs$options
$anonymous
[1] FALSE
$scheme
[1] "https"
$retry_limit_seconds
[1] 15
```
* Closes: #34422
Lead-authored-by: Bryce Mecum <[email protected]>
Co-authored-by: Nic Crane <[email protected]>
Signed-off-by: Dewey Dunnington <[email protected]>
---
r/R/arrowExports.R | 4 +++
r/R/filesystem.R | 29 ++++++++++++++++++---
r/man/FileSystem.Rd | 4 +--
r/src/arrowExports.cpp | 16 ++++++++++++
r/src/filesystem.cpp | 61 +++++++++++++++++++++++++++++++++++++++++++++
r/tests/testthat/test-gcs.R | 50 ++++++++++++++++++++++++++++++-------
6 files changed, 150 insertions(+), 14 deletions(-)
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 5e807fbab1..a318c7a4f3 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1344,6 +1344,10 @@ fs___GcsFileSystem__Make <- function(anonymous, options)
{
.Call(`_arrow_fs___GcsFileSystem__Make`, anonymous, options)
}
+fs___GcsFileSystem__options <- function(fs) {
+ .Call(`_arrow_fs___GcsFileSystem__options`, fs)
+}
+
io___Readable__Read <- function(x, nbytes) {
.Call(`_arrow_io___Readable__Read`, x, nbytes)
}
diff --git a/r/R/filesystem.R b/r/R/filesystem.R
index dc95fda91f..d6554239f6 100644
--- a/r/R/filesystem.R
+++ b/r/R/filesystem.R
@@ -165,8 +165,8 @@ FileSelector$create <- function(base_dir, allow_not_found =
FALSE, recursive = F
#' credentials using standard GCS configuration methods.
#' - `access_token`: optional string for authentication. Should be provided
along
#' with `expiration`
-#' - `expiration`: optional date representing point at which `access_token`
will
-#' expire.
+#' - `expiration`: `POSIXct`. optional datetime representing point at which
+#' `access_token` will expire.
#' - `json_credentials`: optional string for authentication. Point to a JSON
#' credentials file downloaded from GCS.
#' - `endpoint_override`: if non-empty, will connect to provided host name /
port,
@@ -503,7 +503,21 @@ gs_bucket <- function(bucket, ...) {
#' @rdname FileSystem
#' @export
GcsFileSystem <- R6Class("GcsFileSystem",
- inherit = FileSystem
+ inherit = FileSystem,
+ active = list(
+ options = function() {
+ out <- fs___GcsFileSystem__options(self)
+
+ # Convert from nanoseconds to POSIXct w/ UTC tz
+ if ("expiration" %in% names(out)) {
+ out$expiration <- as.POSIXct(
+ out$expiration / 1000000000, origin = "1970-01-01", tz = "UTC"
+ )
+ }
+
+ out
+ }
+ )
)
GcsFileSystem$create <- function(anonymous = FALSE, retry_limit_seconds = 15,
...) {
# The default retry limit in C++ is 15 minutes, but that is experienced as
@@ -547,6 +561,15 @@ GcsFileSystem$create <- function(anonymous = FALSE,
retry_limit_seconds = 15, ..
)
}
+ # Stop if expiration isn't a POSIXct
+ if ("expiration" %in% names(options) && !inherits(options$expiration,
"POSIXct")) {
+ stop(
+ paste(
+ "Option 'expiration' must be of class POSIXct, not",
+ class(options$expiration)[[1]]),
+ call. = FALSE)
+ }
+
options$retry_limit_seconds <- retry_limit_seconds
fs___GcsFileSystem__Make(anonymous, options)
diff --git a/r/man/FileSystem.Rd b/r/man/FileSystem.Rd
index f0b9cea05a..c9586f70e7 100644
--- a/r/man/FileSystem.Rd
+++ b/r/man/FileSystem.Rd
@@ -67,8 +67,8 @@ SDK default is used (typically 1 second).
credentials using standard GCS configuration methods.
\item \code{access_token}: optional string for authentication. Should be
provided along
with \code{expiration}
-\item \code{expiration}: optional date representing point at which
\code{access_token} will
-expire.
+\item \code{expiration}: \code{POSIXct}. optional datetime representing point
at which
+\code{access_token} will expire.
\item \code{json_credentials}: optional string for authentication. Point to a
JSON
credentials file downloaded from GCS.
\item \code{endpoint_override}: if non-empty, will connect to provided host
name / port,
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index dade762683..3122361571 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -3347,6 +3347,21 @@ extern "C" SEXP _arrow_fs___GcsFileSystem__Make(SEXP
anonymous_sexp, SEXP option
}
#endif
+// filesystem.cpp
+#if defined(ARROW_R_WITH_GCS)
+cpp11::list fs___GcsFileSystem__options(const
std::shared_ptr<fs::GcsFileSystem>& fs);
+extern "C" SEXP _arrow_fs___GcsFileSystem__options(SEXP fs_sexp){
+BEGIN_CPP11
+ arrow::r::Input<const std::shared_ptr<fs::GcsFileSystem>&>::type
fs(fs_sexp);
+ return cpp11::as_sexp(fs___GcsFileSystem__options(fs));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_fs___GcsFileSystem__options(SEXP fs_sexp){
+ Rf_error("Cannot call fs___GcsFileSystem__options(). See
https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow
C++ libraries. ");
+}
+#endif
+
// io.cpp
std::shared_ptr<arrow::Buffer> io___Readable__Read(const
std::shared_ptr<arrow::io::Readable>& x, int64_t nbytes);
extern "C" SEXP _arrow_io___Readable__Read(SEXP x_sexp, SEXP nbytes_sexp){
@@ -5643,6 +5658,7 @@ static const R_CallMethodDef CallEntries[] = {
{ "_arrow_fs___S3FileSystem__create", (DL_FUNC)
&_arrow_fs___S3FileSystem__create, 17},
{ "_arrow_fs___S3FileSystem__region", (DL_FUNC)
&_arrow_fs___S3FileSystem__region, 1},
{ "_arrow_fs___GcsFileSystem__Make", (DL_FUNC)
&_arrow_fs___GcsFileSystem__Make, 2},
+ { "_arrow_fs___GcsFileSystem__options", (DL_FUNC)
&_arrow_fs___GcsFileSystem__options, 1},
{ "_arrow_io___Readable__Read", (DL_FUNC)
&_arrow_io___Readable__Read, 2},
{ "_arrow_io___InputStream__Close", (DL_FUNC)
&_arrow_io___InputStream__Close, 1},
{ "_arrow_io___OutputStream__Close", (DL_FUNC)
&_arrow_io___OutputStream__Close, 1},
diff --git a/r/src/filesystem.cpp b/r/src/filesystem.cpp
index 2875cbd429..cd795c0f80 100644
--- a/r/src/filesystem.cpp
+++ b/r/src/filesystem.cpp
@@ -20,6 +20,7 @@
#include <arrow/filesystem/filesystem.h>
#include <arrow/filesystem/localfs.h>
+#include <arrow/util/key_value_metadata.h>
namespace fs = ::arrow::fs;
namespace io = ::arrow::io;
@@ -422,4 +423,64 @@ std::shared_ptr<fs::GcsFileSystem>
fs___GcsFileSystem__Make(bool anonymous,
return fs::GcsFileSystem::Make(gcs_opts, io_context);
}
+// [[gcs::export]]
+cpp11::list fs___GcsFileSystem__options(const
std::shared_ptr<fs::GcsFileSystem>& fs) {
+ using cpp11::literals::operator"" _nm;
+
+ cpp11::writable::list out;
+
+ fs::GcsOptions opts = fs->options();
+
+ // GcsCredentials
+ out.push_back({"anonymous"_nm = opts.credentials.anonymous()});
+
+ if (opts.credentials.access_token() != "") {
+ out.push_back({"access_token"_nm = opts.credentials.access_token()});
+ }
+
+ if (opts.credentials.expiration().time_since_epoch().count() != 0) {
+ out.push_back({"expiration"_nm = cpp11::as_sexp<double>(
+
opts.credentials.expiration().time_since_epoch().count())});
+ }
+
+ if (opts.credentials.target_service_account() != "") {
+ out.push_back(
+ {"target_service_account"_nm =
opts.credentials.target_service_account()});
+ }
+
+ if (opts.credentials.json_credentials() != "") {
+ out.push_back({"json_credentials"_nm =
opts.credentials.json_credentials()});
+ }
+
+ // GcsOptions direct members
+ if (opts.endpoint_override != "") {
+ out.push_back({"endpoint_override"_nm = opts.endpoint_override});
+ }
+
+ if (opts.scheme != "") {
+ out.push_back({"scheme"_nm = opts.scheme});
+ }
+
+ if (opts.default_bucket_location != "") {
+ out.push_back({"default_bucket_location"_nm =
opts.default_bucket_location});
+ }
+
+ out.push_back({"retry_limit_seconds"_nm = opts.retry_limit_seconds.value()});
+
+ // default_metadata
+ if (opts.default_metadata != nullptr && opts.default_metadata->size() > 0) {
+ cpp11::writable::strings metadata(opts.default_metadata->size());
+
+ metadata.names() = opts.default_metadata->keys();
+
+ for (int64_t i = 0; i < opts.default_metadata->size(); i++) {
+ metadata[static_cast<size_t>(i)] = opts.default_metadata->value(i);
+ }
+
+ out.push_back({"default_metadata"_nm = metadata});
+ }
+
+ return out;
+}
+
#endif
diff --git a/r/tests/testthat/test-gcs.R b/r/tests/testthat/test-gcs.R
index c0a02193c5..e284beb225 100644
--- a/r/tests/testthat/test-gcs.R
+++ b/r/tests/testthat/test-gcs.R
@@ -24,20 +24,52 @@ test_that("FileSystem$from_uri with gs://", {
})
test_that("GcsFileSystem$create() options", {
- # TODO: expose options as a list so we can confirm they are set?
expect_r6_class(GcsFileSystem$create(), "GcsFileSystem")
expect_r6_class(GcsFileSystem$create(anonymous = TRUE), "GcsFileSystem")
+
+ # Verify default options
+ expect_equal(GcsFileSystem$create()$options, list(
+ anonymous = FALSE,
+ scheme = "https",
+ retry_limit_seconds = 15
+ ))
+
+ # Verify a more complete set of options round-trips
+ options <- list(
+ anonymous = TRUE,
+ endpoint_override = "localhost:8888",
+ scheme = "http",
+ default_bucket_location = "here",
+ retry_limit_seconds = 30,
+ default_metadata = c(a = "list", of = "stuff")
+ )
+
+ fs <- do.call(GcsFileSystem$create, options)
+
expect_r6_class(
- GcsFileSystem$create(
- anonymous = TRUE,
- scheme = "http",
- endpoint_override = "localhost:8888",
- default_bucket_location = "here",
- retry_limit_seconds = 30,
- default_metadata = c(a = "list", of = "stuff")
- ),
+ fs,
"GcsFileSystem"
)
+
+ expect_equal(
+ fs$options,
+ options
+ )
+
+ # Expiration round-trips
+ options <- list(
+ expiration = as.POSIXct("2030-01-01", tz = "UTC"),
+ access_token = "MY_TOKEN"
+ )
+ fs <- do.call(GcsFileSystem$create, options)
+
+ expect_equal(fs$options$expiration, options$expiration)
+
+ # Verify create fails if expiration isn't a POSIXct
+ expect_error(
+ GcsFileSystem$create(access_token = "", expiration = ""),
+ "must be of class POSIXct, not"
+ )
})
test_that("GcsFileSystem$create() input validation", {