This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 17f416f80f GH-34422: [R] Expose GcsFileSystem$options (#34477)
17f416f80f is described below

commit 17f416f80f0bccd58173308d8e0aa326363bd388
Author: Bryce Mecum <[email protected]>
AuthorDate: Wed Mar 8 17:34:40 2023 -0900

    GH-34422: [R] Expose GcsFileSystem$options (#34477)
    
    Closes https://github.com/apache/arrow/issues/34422
    
    ### Rationale for this change
    
    Exposing options on GcsFilesystem objects is useful for testing but may 
also be useful to R arrow users who are using GCS (gs_bucket). The 
implementation here is one-way which might be a good point for in discussion in 
code review.
    
    ### Are these changes tested?
    
    I've added two tests which pass.
    
    ### Are there any user-facing changes?
    
    Yes, though I wasn't sure if it made sense to document it. With this 
change, a user can run the following to get the options:
    
    ```r
    > bucket <- gs_bucket("voltrondata-labs-datasets")
    > bucket$base_fs$options
    $anonymous
    [1] FALSE
    
    $scheme
    [1] "https"
    
    $retry_limit_seconds
    [1] 15
    ```
    * Closes: #34422
    
    Lead-authored-by: Bryce Mecum <[email protected]>
    Co-authored-by: Nic Crane <[email protected]>
    Signed-off-by: Dewey Dunnington <[email protected]>
---
 r/R/arrowExports.R          |  4 +++
 r/R/filesystem.R            | 29 ++++++++++++++++++---
 r/man/FileSystem.Rd         |  4 +--
 r/src/arrowExports.cpp      | 16 ++++++++++++
 r/src/filesystem.cpp        | 61 +++++++++++++++++++++++++++++++++++++++++++++
 r/tests/testthat/test-gcs.R | 50 ++++++++++++++++++++++++++++++-------
 6 files changed, 150 insertions(+), 14 deletions(-)

diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 5e807fbab1..a318c7a4f3 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1344,6 +1344,10 @@ fs___GcsFileSystem__Make <- function(anonymous, options) 
{
   .Call(`_arrow_fs___GcsFileSystem__Make`, anonymous, options)
 }
 
+fs___GcsFileSystem__options <- function(fs) {
+  .Call(`_arrow_fs___GcsFileSystem__options`, fs)
+}
+
 io___Readable__Read <- function(x, nbytes) {
   .Call(`_arrow_io___Readable__Read`, x, nbytes)
 }
diff --git a/r/R/filesystem.R b/r/R/filesystem.R
index dc95fda91f..d6554239f6 100644
--- a/r/R/filesystem.R
+++ b/r/R/filesystem.R
@@ -165,8 +165,8 @@ FileSelector$create <- function(base_dir, allow_not_found = 
FALSE, recursive = F
 #'    credentials using standard GCS configuration methods.
 #' - `access_token`: optional string for authentication. Should be provided 
along
 #'   with `expiration`
-#' - `expiration`: optional date representing point at which `access_token` 
will
-#'   expire.
+#' - `expiration`: `POSIXct`. optional datetime representing point at which
+#'   `access_token` will expire.
 #' - `json_credentials`: optional string for authentication. Point to a JSON
 #'   credentials file downloaded from GCS.
 #' - `endpoint_override`: if non-empty, will connect to provided host name / 
port,
@@ -503,7 +503,21 @@ gs_bucket <- function(bucket, ...) {
 #' @rdname FileSystem
 #' @export
 GcsFileSystem <- R6Class("GcsFileSystem",
-  inherit = FileSystem
+  inherit = FileSystem,
+  active = list(
+    options = function() {
+      out <- fs___GcsFileSystem__options(self)
+
+      # Convert from nanoseconds to POSIXct w/ UTC tz
+      if ("expiration" %in% names(out)) {
+        out$expiration <- as.POSIXct(
+          out$expiration / 1000000000, origin = "1970-01-01", tz = "UTC"
+        )
+      }
+
+      out
+    }
+  )
 )
 GcsFileSystem$create <- function(anonymous = FALSE, retry_limit_seconds = 15, 
...) {
   # The default retry limit in C++ is 15 minutes, but that is experienced as
@@ -547,6 +561,15 @@ GcsFileSystem$create <- function(anonymous = FALSE, 
retry_limit_seconds = 15, ..
     )
   }
 
+  # Stop if expiration isn't a POSIXct
+  if ("expiration" %in% names(options) && !inherits(options$expiration, 
"POSIXct")) {
+    stop(
+      paste(
+        "Option 'expiration' must be of class POSIXct, not",
+        class(options$expiration)[[1]]),
+      call. = FALSE)
+  }
+
   options$retry_limit_seconds <- retry_limit_seconds
 
   fs___GcsFileSystem__Make(anonymous, options)
diff --git a/r/man/FileSystem.Rd b/r/man/FileSystem.Rd
index f0b9cea05a..c9586f70e7 100644
--- a/r/man/FileSystem.Rd
+++ b/r/man/FileSystem.Rd
@@ -67,8 +67,8 @@ SDK default is used (typically 1 second).
 credentials using standard GCS configuration methods.
 \item \code{access_token}: optional string for authentication. Should be 
provided along
 with \code{expiration}
-\item \code{expiration}: optional date representing point at which 
\code{access_token} will
-expire.
+\item \code{expiration}: \code{POSIXct}. optional datetime representing point 
at which
+\code{access_token} will expire.
 \item \code{json_credentials}: optional string for authentication. Point to a 
JSON
 credentials file downloaded from GCS.
 \item \code{endpoint_override}: if non-empty, will connect to provided host 
name / port,
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index dade762683..3122361571 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -3347,6 +3347,21 @@ extern "C" SEXP _arrow_fs___GcsFileSystem__Make(SEXP 
anonymous_sexp, SEXP option
 }
 #endif
 
+// filesystem.cpp
+#if defined(ARROW_R_WITH_GCS)
+cpp11::list fs___GcsFileSystem__options(const 
std::shared_ptr<fs::GcsFileSystem>& fs);
+extern "C" SEXP _arrow_fs___GcsFileSystem__options(SEXP fs_sexp){
+BEGIN_CPP11
+       arrow::r::Input<const std::shared_ptr<fs::GcsFileSystem>&>::type 
fs(fs_sexp);
+       return cpp11::as_sexp(fs___GcsFileSystem__options(fs));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_fs___GcsFileSystem__options(SEXP fs_sexp){
+       Rf_error("Cannot call fs___GcsFileSystem__options(). See 
https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow 
C++ libraries. ");
+}
+#endif
+
 // io.cpp
 std::shared_ptr<arrow::Buffer> io___Readable__Read(const 
std::shared_ptr<arrow::io::Readable>& x, int64_t nbytes);
 extern "C" SEXP _arrow_io___Readable__Read(SEXP x_sexp, SEXP nbytes_sexp){
@@ -5643,6 +5658,7 @@ static const R_CallMethodDef CallEntries[] = {
                { "_arrow_fs___S3FileSystem__create", (DL_FUNC) 
&_arrow_fs___S3FileSystem__create, 17}, 
                { "_arrow_fs___S3FileSystem__region", (DL_FUNC) 
&_arrow_fs___S3FileSystem__region, 1}, 
                { "_arrow_fs___GcsFileSystem__Make", (DL_FUNC) 
&_arrow_fs___GcsFileSystem__Make, 2}, 
+               { "_arrow_fs___GcsFileSystem__options", (DL_FUNC) 
&_arrow_fs___GcsFileSystem__options, 1}, 
                { "_arrow_io___Readable__Read", (DL_FUNC) 
&_arrow_io___Readable__Read, 2}, 
                { "_arrow_io___InputStream__Close", (DL_FUNC) 
&_arrow_io___InputStream__Close, 1}, 
                { "_arrow_io___OutputStream__Close", (DL_FUNC) 
&_arrow_io___OutputStream__Close, 1}, 
diff --git a/r/src/filesystem.cpp b/r/src/filesystem.cpp
index 2875cbd429..cd795c0f80 100644
--- a/r/src/filesystem.cpp
+++ b/r/src/filesystem.cpp
@@ -20,6 +20,7 @@
 
 #include <arrow/filesystem/filesystem.h>
 #include <arrow/filesystem/localfs.h>
+#include <arrow/util/key_value_metadata.h>
 
 namespace fs = ::arrow::fs;
 namespace io = ::arrow::io;
@@ -422,4 +423,64 @@ std::shared_ptr<fs::GcsFileSystem> 
fs___GcsFileSystem__Make(bool anonymous,
   return fs::GcsFileSystem::Make(gcs_opts, io_context);
 }
 
+// [[gcs::export]]
+cpp11::list fs___GcsFileSystem__options(const 
std::shared_ptr<fs::GcsFileSystem>& fs) {
+  using cpp11::literals::operator"" _nm;
+
+  cpp11::writable::list out;
+
+  fs::GcsOptions opts = fs->options();
+
+  // GcsCredentials
+  out.push_back({"anonymous"_nm = opts.credentials.anonymous()});
+
+  if (opts.credentials.access_token() != "") {
+    out.push_back({"access_token"_nm = opts.credentials.access_token()});
+  }
+
+  if (opts.credentials.expiration().time_since_epoch().count() != 0) {
+    out.push_back({"expiration"_nm = cpp11::as_sexp<double>(
+                       
opts.credentials.expiration().time_since_epoch().count())});
+  }
+
+  if (opts.credentials.target_service_account() != "") {
+    out.push_back(
+        {"target_service_account"_nm = 
opts.credentials.target_service_account()});
+  }
+
+  if (opts.credentials.json_credentials() != "") {
+    out.push_back({"json_credentials"_nm = 
opts.credentials.json_credentials()});
+  }
+
+  // GcsOptions direct members
+  if (opts.endpoint_override != "") {
+    out.push_back({"endpoint_override"_nm = opts.endpoint_override});
+  }
+
+  if (opts.scheme != "") {
+    out.push_back({"scheme"_nm = opts.scheme});
+  }
+
+  if (opts.default_bucket_location != "") {
+    out.push_back({"default_bucket_location"_nm = 
opts.default_bucket_location});
+  }
+
+  out.push_back({"retry_limit_seconds"_nm = opts.retry_limit_seconds.value()});
+
+  // default_metadata
+  if (opts.default_metadata != nullptr && opts.default_metadata->size() > 0) {
+    cpp11::writable::strings metadata(opts.default_metadata->size());
+
+    metadata.names() = opts.default_metadata->keys();
+
+    for (int64_t i = 0; i < opts.default_metadata->size(); i++) {
+      metadata[static_cast<size_t>(i)] = opts.default_metadata->value(i);
+    }
+
+    out.push_back({"default_metadata"_nm = metadata});
+  }
+
+  return out;
+}
+
 #endif
diff --git a/r/tests/testthat/test-gcs.R b/r/tests/testthat/test-gcs.R
index c0a02193c5..e284beb225 100644
--- a/r/tests/testthat/test-gcs.R
+++ b/r/tests/testthat/test-gcs.R
@@ -24,20 +24,52 @@ test_that("FileSystem$from_uri with gs://", {
 })
 
 test_that("GcsFileSystem$create() options", {
-  # TODO: expose options as a list so we can confirm they are set?
   expect_r6_class(GcsFileSystem$create(), "GcsFileSystem")
   expect_r6_class(GcsFileSystem$create(anonymous = TRUE), "GcsFileSystem")
+
+  # Verify default options
+  expect_equal(GcsFileSystem$create()$options, list(
+    anonymous = FALSE,
+    scheme = "https",
+    retry_limit_seconds = 15
+  ))
+
+  # Verify a more complete set of options round-trips
+  options <- list(
+    anonymous = TRUE,
+    endpoint_override = "localhost:8888",
+    scheme = "http",
+    default_bucket_location = "here",
+    retry_limit_seconds = 30,
+    default_metadata = c(a = "list", of = "stuff")
+  )
+
+  fs <- do.call(GcsFileSystem$create, options)
+
   expect_r6_class(
-    GcsFileSystem$create(
-      anonymous = TRUE,
-      scheme = "http",
-      endpoint_override = "localhost:8888",
-      default_bucket_location = "here",
-      retry_limit_seconds = 30,
-      default_metadata = c(a = "list", of = "stuff")
-    ),
+    fs,
     "GcsFileSystem"
   )
+
+  expect_equal(
+    fs$options,
+    options
+  )
+
+  # Expiration round-trips
+  options <- list(
+    expiration = as.POSIXct("2030-01-01", tz = "UTC"),
+    access_token = "MY_TOKEN"
+  )
+  fs <- do.call(GcsFileSystem$create, options)
+
+  expect_equal(fs$options$expiration, options$expiration)
+
+  # Verify create fails if expiration isn't a POSIXct
+  expect_error(
+    GcsFileSystem$create(access_token = "", expiration = ""),
+    "must be of class POSIXct, not"
+  )
 })
 
 test_that("GcsFileSystem$create() input validation", {

Reply via email to