This is an automated email from the ASF dual-hosted git repository.
pitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 5633a18f99 GH-49433: [C++] Buffer ARROW_LOG output to prevent thread
interleaving (#49663)
5633a18f99 is described below
commit 5633a18f996eaf39fb6283cb0ec4eaf7956fee7c
Author: Adrián Feito Blázquez <[email protected]>
AuthorDate: Wed Apr 22 14:41:56 2026 +0200
GH-49433: [C++] Buffer ARROW_LOG output to prevent thread interleaving
(#49663)
### Rationale for this change
When using the C++ `ARROW_LOG` macros from multiple threads, messages often
get mingled together in `stderr` because the `operator<<` writes directly to
the global stream piece-by-piece, which is not thread-safe.
### What changes are included in this PR?
This PR introduces an internal `std::ostringstream` buffer to the fallback
`CerrLog` class.
Instead of writing to `std::cerr` immediately on every `<<` operation, the
messages are accumulated locally within the `CerrLog` instance. The completed
string is then flushed atomically to `std::cerr` upon the object's destruction.
### Are these changes tested?
Yes. I built the C++ project locally and ran the test suite (`ctest`) to
ensure no existing logging behavior or IPC functionality was broken.
### Are there any user-facing changes?
No API changes. Users will simply notice that multi-threaded `ARROW_LOG`
console output is now cleanly separated per line instead of interleaved.
Closes #49433
* GitHub Issue: #49433
Lead-authored-by: shockp <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/util/logging.cc | 11 +++++++---
cpp/src/arrow/util/logging_test.cc | 41 ++++++++++++++++++++++++++++----------
2 files changed, 38 insertions(+), 14 deletions(-)
diff --git a/cpp/src/arrow/util/logging.cc b/cpp/src/arrow/util/logging.cc
index 993c5306ca..9a61d261ed 100644
--- a/cpp/src/arrow/util/logging.cc
+++ b/cpp/src/arrow/util/logging.cc
@@ -24,6 +24,8 @@
#endif
#include <cstdlib>
#include <iostream>
+#include <mutex>
+#include <sstream>
#ifdef ARROW_USE_GLOG
@@ -67,7 +69,9 @@ class CerrLog {
virtual ~CerrLog() {
if (has_logged_) {
- std::cerr << std::endl;
+ static std::mutex cerr_mutex;
+ std::lock_guard<std::mutex> lock(cerr_mutex);
+ std::cerr << std::move(buffer_).str() << std::endl;
}
if (severity_ == ArrowLogLevel::ARROW_FATAL) {
PrintBackTrace();
@@ -77,14 +81,14 @@ class CerrLog {
std::ostream& Stream() {
has_logged_ = true;
- return std::cerr;
+ return buffer_;
}
template <class T>
CerrLog& operator<<(const T& t) {
if (severity_ != ArrowLogLevel::ARROW_DEBUG) {
has_logged_ = true;
- std::cerr << t;
+ buffer_ << t;
}
return *this;
}
@@ -92,6 +96,7 @@ class CerrLog {
protected:
const ArrowLogLevel severity_;
bool has_logged_;
+ std::ostringstream buffer_;
void PrintBackTrace() {
#ifdef ARROW_WITH_BACKTRACE
diff --git a/cpp/src/arrow/util/logging_test.cc
b/cpp/src/arrow/util/logging_test.cc
index 280edd6111..2b846afdd6 100644
--- a/cpp/src/arrow/util/logging_test.cc
+++ b/cpp/src/arrow/util/logging_test.cc
@@ -18,6 +18,8 @@
#include <chrono>
#include <cstdint>
#include <iostream>
+#include <thread>
+#include <vector>
#include <gtest/gtest.h>
@@ -40,17 +42,11 @@ int64_t current_time_ms() {
// This file just print some information using the logging macro.
void PrintLog() {
- ARROW_LOG(DEBUG) << "This is the"
- << " DEBUG"
- << " message";
- ARROW_LOG(INFO) << "This is the"
- << " INFO message";
- ARROW_LOG(WARNING) << "This is the"
- << " WARNING message";
- ARROW_LOG(ERROR) << "This is the"
- << " ERROR message";
- ARROW_CHECK(true) << "This is a ARROW_CHECK"
- << " message but it won't show up";
+ ARROW_LOG(DEBUG) << "This is the" << " DEBUG" << " message";
+ ARROW_LOG(INFO) << "This is the" << " INFO message";
+ ARROW_LOG(WARNING) << "This is the" << " WARNING message";
+ ARROW_LOG(ERROR) << "This is the" << " ERROR message";
+ ARROW_CHECK(true) << "This is a ARROW_CHECK" << " message but it won't show
up";
// The following 2 lines should not run since it will cause program failure.
// ARROW_LOG(FATAL) << "This is the FATAL message";
// ARROW_CHECK(false) << "This is a ARROW_CHECK message but it won't show
up";
@@ -95,6 +91,29 @@ TEST(ArrowCheck, PayloadEvaluatedOnFailure) {
ASSERT_TRUE(tracer.was_printed);
}
+TEST(ArrowLog, MultiThreadedLogging) {
+ // This is mostly a visual test that logging from multiple threads produces
+ // a clean output without interleaving messages (GH-49433).
+ constexpr int kNumThreads = 10;
+ constexpr int kNumMessges = 10;
+ std::vector<std::thread> threads;
+
+ threads.reserve(kNumThreads);
+
+ for (int i = 0; i < kNumThreads; ++i) {
+ threads.emplace_back([i]() {
+ for (int j = 0; j < kNumMessges; ++j) {
+ ARROW_LOG(INFO) << "Thread " << i << " message " << j
+ << " - testing thread safety.";
+ }
+ });
+ }
+
+ for (auto& thread : threads) {
+ thread.join();
+ }
+}
+
} // namespace util
TEST(DcheckMacros, DoNotEvaluateReleaseMode) {