This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 68fb4644ea GH-50009: [R] FinalizeS3 segfaults for stale connection 
(#50081)
68fb4644ea is described below

commit 68fb4644eabebca683b37ac9500e742da75585df
Author: Nic Crane <[email protected]>
AuthorDate: Mon Jun 8 12:43:29 2026 +0200

    GH-50009: [R] FinalizeS3 segfaults for stale connection (#50081)
    
    ### Rationale for this change
    
    User experiences issues with process crashing when reading/writing from S3. 
Looks like a stale connection and sigpipe stuff.  See also #32026
    
    ### What changes are included in this PR?
    
    Install sigpipe handler upon S3 initialisation so it'll not kill the 
process.
    
    ### Are these changes tested?
    
    No - and I'm not sure how I can really test this out.
    
    ### Are there any user-facing changes?
    
    No
    * GitHub Issue: #50009
    
    Authored-by: Nic Crane <[email protected]>
    Signed-off-by: Nic Crane <[email protected]>
---
 r/src/filesystem.cpp | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/r/src/filesystem.cpp b/r/src/filesystem.cpp
index 82cf99514d..9324a13ce0 100644
--- a/r/src/filesystem.cpp
+++ b/r/src/filesystem.cpp
@@ -256,10 +256,23 @@ std::string fs___SubTreeFileSystem__base_path(
   return file_system->base_path();
 }
 
+// Forward declaration - defined in the ARROW_R_WITH_S3 block below.
+#if defined(ARROW_R_WITH_S3)
+void EnsureS3InitializedWithSigpipeHandler();
+#endif
+
 // [[arrow::export]]
 cpp11::writable::list fs___FileSystemFromUri(const std::string& path) {
   using cpp11::literals::operator""_nm;
 
+#if defined(ARROW_R_WITH_S3)
+  // Initialize S3 before FileSystemFromUri so our options (with SIGPIPE 
handler)
+  // take effect before the C++ library's internal EnsureS3Initialized() call.
+  if (path.substr(0, 5) == "s3://") {
+    EnsureS3InitializedWithSigpipeHandler();
+  }
+#endif
+
   std::string out_path;
   auto io_context = MainRThread::GetInstance().CancellableIOContext();
   return cpp11::writable::list({"fs"_nm = cpp11::to_r6(ValueOrStop(
@@ -281,6 +294,20 @@ void fs___CopyFiles(const std::shared_ptr<fs::FileSystem>& 
source_fs,
 
 #include <arrow/filesystem/s3fs.h>
 
+// Initialize S3 with the SIGPIPE handler enabled. Without it, stale 
connections
+// in the SDK's connection pool can trigger SIGPIPE during Aws::ShutdownAPI(),
+// which causes R's signal handler to longjmp out of the teardown and segfault
+// (GH-50009, GH-32026).
+void EnsureS3InitializedWithSigpipeHandler() {
+  fs::S3GlobalOptions options = fs::S3GlobalOptions::Defaults();
+  options.install_sigpipe_handler = true;
+  auto status = fs::InitializeS3(options);
+  // InitializeS3 returns Invalid if already initialized - that's fine
+  if (!status.ok() && !fs::IsS3Initialized()) {
+    StopIfNotOk(status);
+  }
+}
+
 // [[s3::export]]
 std::shared_ptr<fs::S3FileSystem> fs___S3FileSystem__create(
     bool anonymous = false, std::string access_key = "", std::string 
secret_key = "",
@@ -291,9 +318,7 @@ std::shared_ptr<fs::S3FileSystem> fs___S3FileSystem__create(
     bool allow_bucket_creation = false, bool allow_bucket_deletion = false,
     bool check_directory_existence_before_creation = false, double 
connect_timeout = -1,
     double request_timeout = -1) {
-  // We need to ensure that S3 is initialized before we start messing with the
-  // options
-  StopIfNotOk(fs::EnsureS3Initialized());
+  EnsureS3InitializedWithSigpipeHandler();
   fs::S3Options s3_opts;
   // Handle auth (anonymous, keys, default)
   // (validation/internal coherence handled in R)

Reply via email to