This is an automated email from the ASF dual-hosted git repository.

pitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 5633a18f99 GH-49433: [C++] Buffer ARROW_LOG output to prevent thread 
interleaving (#49663)
5633a18f99 is described below

commit 5633a18f996eaf39fb6283cb0ec4eaf7956fee7c
Author: Adrián Feito Blázquez <[email protected]>
AuthorDate: Wed Apr 22 14:41:56 2026 +0200

    GH-49433: [C++] Buffer ARROW_LOG output to prevent thread interleaving 
(#49663)
    
    ### Rationale for this change
    
    When using the C++ `ARROW_LOG` macros from multiple threads, messages often 
get mingled together in `stderr` because the `operator<<` writes directly to 
the global stream piece-by-piece, which is not thread-safe.
    
    ### What changes are included in this PR?
    
    This PR introduces an internal `std::ostringstream` buffer to the fallback 
`CerrLog` class.
    Instead of writing to `std::cerr` immediately on every `<<` operation, the 
messages are accumulated locally within the `CerrLog` instance. The completed 
string is then flushed atomically to `std::cerr` upon the object's destruction.
    
    ### Are these changes tested?
    Yes. I built the C++ project locally and ran the test suite (`ctest`) to 
ensure no existing logging behavior or IPC functionality was broken.
    
    ### Are there any user-facing changes?
    No API changes. Users will simply notice that multi-threaded `ARROW_LOG` 
console output is now cleanly separated per line instead of interleaved.
    
    Closes #49433
    * GitHub Issue: #49433
    
    Lead-authored-by: shockp <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/util/logging.cc      | 11 +++++++---
 cpp/src/arrow/util/logging_test.cc | 41 ++++++++++++++++++++++++++++----------
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/cpp/src/arrow/util/logging.cc b/cpp/src/arrow/util/logging.cc
index 993c5306ca..9a61d261ed 100644
--- a/cpp/src/arrow/util/logging.cc
+++ b/cpp/src/arrow/util/logging.cc
@@ -24,6 +24,8 @@
 #endif
 #include <cstdlib>
 #include <iostream>
+#include <mutex>
+#include <sstream>
 
 #ifdef ARROW_USE_GLOG
 
@@ -67,7 +69,9 @@ class CerrLog {
 
   virtual ~CerrLog() {
     if (has_logged_) {
-      std::cerr << std::endl;
+      static std::mutex cerr_mutex;
+      std::lock_guard<std::mutex> lock(cerr_mutex);
+      std::cerr << std::move(buffer_).str() << std::endl;
     }
     if (severity_ == ArrowLogLevel::ARROW_FATAL) {
       PrintBackTrace();
@@ -77,14 +81,14 @@ class CerrLog {
 
   std::ostream& Stream() {
     has_logged_ = true;
-    return std::cerr;
+    return buffer_;
   }
 
   template <class T>
   CerrLog& operator<<(const T& t) {
     if (severity_ != ArrowLogLevel::ARROW_DEBUG) {
       has_logged_ = true;
-      std::cerr << t;
+      buffer_ << t;
     }
     return *this;
   }
@@ -92,6 +96,7 @@ class CerrLog {
  protected:
   const ArrowLogLevel severity_;
   bool has_logged_;
+  std::ostringstream buffer_;
 
   void PrintBackTrace() {
 #ifdef ARROW_WITH_BACKTRACE
diff --git a/cpp/src/arrow/util/logging_test.cc 
b/cpp/src/arrow/util/logging_test.cc
index 280edd6111..2b846afdd6 100644
--- a/cpp/src/arrow/util/logging_test.cc
+++ b/cpp/src/arrow/util/logging_test.cc
@@ -18,6 +18,8 @@
 #include <chrono>
 #include <cstdint>
 #include <iostream>
+#include <thread>
+#include <vector>
 
 #include <gtest/gtest.h>
 
@@ -40,17 +42,11 @@ int64_t current_time_ms() {
 // This file just print some information using the logging macro.
 
 void PrintLog() {
-  ARROW_LOG(DEBUG) << "This is the"
-                   << " DEBUG"
-                   << " message";
-  ARROW_LOG(INFO) << "This is the"
-                  << " INFO message";
-  ARROW_LOG(WARNING) << "This is the"
-                     << " WARNING message";
-  ARROW_LOG(ERROR) << "This is the"
-                   << " ERROR message";
-  ARROW_CHECK(true) << "This is a ARROW_CHECK"
-                    << " message but it won't show up";
+  ARROW_LOG(DEBUG) << "This is the" << " DEBUG" << " message";
+  ARROW_LOG(INFO) << "This is the" << " INFO message";
+  ARROW_LOG(WARNING) << "This is the" << " WARNING message";
+  ARROW_LOG(ERROR) << "This is the" << " ERROR message";
+  ARROW_CHECK(true) << "This is a ARROW_CHECK" << " message but it won't show 
up";
   // The following 2 lines should not run since it will cause program failure.
   // ARROW_LOG(FATAL) << "This is the FATAL message";
   // ARROW_CHECK(false) << "This is a ARROW_CHECK message but it won't show 
up";
@@ -95,6 +91,29 @@ TEST(ArrowCheck, PayloadEvaluatedOnFailure) {
   ASSERT_TRUE(tracer.was_printed);
 }
 
+TEST(ArrowLog, MultiThreadedLogging) {
+  // This is mostly a visual test that logging from multiple threads produces
+  // a clean output without interleaving messages (GH-49433).
+  constexpr int kNumThreads = 10;
+  constexpr int kNumMessges = 10;
+  std::vector<std::thread> threads;
+
+  threads.reserve(kNumThreads);
+
+  for (int i = 0; i < kNumThreads; ++i) {
+    threads.emplace_back([i]() {
+      for (int j = 0; j < kNumMessges; ++j) {
+        ARROW_LOG(INFO) << "Thread " << i << " message " << j
+                        << " - testing thread safety.";
+      }
+    });
+  }
+
+  for (auto& thread : threads) {
+    thread.join();
+  }
+}
+
 }  // namespace util
 
 TEST(DcheckMacros, DoNotEvaluateReleaseMode) {

Reply via email to