common/Common.hpp | 2 +- wsd/DocumentBroker.cpp | 8 +++++++- wsd/LOOLWSD.cpp | 16 ++++++++++++---- 3 files changed, 20 insertions(+), 6 deletions(-)
New commits: commit 0b9bc008178659470d063a42ec93a943a3e6b08f Author: Ashod Nakashian <ashod.nakash...@collabora.co.uk> AuthorDate: Fri Oct 18 08:10:12 2019 -0400 Commit: Ashod Nakashian <ashnak...@gmail.com> CommitDate: Tue Oct 29 02:30:07 2019 +0100 wsd: dynamic child timeout The initial child spawning takes significantly longer than subsequent ones (for obvious reasons) and this lead to unit-tests being sensitive to the timeout we use for child spawning. Too short, and we spawn more than we want on startup, too long and crash-recovery tests fail (we don't recover fast enough, as we wait too long before spawning new children). Dynamically setting the timeout allows us to give longer timeout at startup, and reduce it afterwards. Reviewed-on: https://gerrit.libreoffice.org/81194 Reviewed-by: Andras Timar <andras.ti...@collabora.com> Tested-by: Andras Timar <andras.ti...@collabora.com> (cherry picked from commit 32fa1d95fc2ec65866d0cb47d619885182db7040) Change-Id: I8423f5c6619e57030ab43d519aaa41d8712c36d3 Reviewed-on: https://gerrit.libreoffice.org/81570 Reviewed-by: Ashod Nakashian <ashnak...@gmail.com> Tested-by: Ashod Nakashian <ashnak...@gmail.com> diff --git a/common/Common.hpp b/common/Common.hpp index f35ca08f8..7a02dfaae 100644 --- a/common/Common.hpp +++ b/common/Common.hpp @@ -16,7 +16,7 @@ constexpr int DEFAULT_CLIENT_PORT_NUMBER = 9980; constexpr int COMMAND_TIMEOUT_MS = 5000; -constexpr int CHILD_TIMEOUT_MS = COMMAND_TIMEOUT_MS * 2; +constexpr int CHILD_TIMEOUT_MS = COMMAND_TIMEOUT_MS; constexpr int CHILD_REBALANCE_INTERVAL_MS = CHILD_TIMEOUT_MS / 10; constexpr int POLL_TIMEOUT_MS = COMMAND_TIMEOUT_MS / 5; constexpr int WS_SEND_TIMEOUT_MS = 1000; diff --git a/wsd/DocumentBroker.cpp b/wsd/DocumentBroker.cpp index 18568b53b..d3b0c654f 100644 --- a/wsd/DocumentBroker.cpp +++ b/wsd/DocumentBroker.cpp @@ -407,7 +407,13 @@ void DocumentBroker::pollThread() } // Flush socket data first. - const int flushTimeoutMs = POLL_TIMEOUT_MS * 2; // ~1000ms + constexpr int flushTimeoutMs = POLL_TIMEOUT_MS * 2; // ~1000ms + LOG_INF("Flushing socket for doc [" + << _docKey << "] for " << flushTimeoutMs << " ms. stop: " << _stop + << ", continuePolling: " << _poll->continuePolling() + << ", ShutdownRequestFlag: " << SigUtil::getShutdownRequestFlag() + << ", TerminationFlag: " << SigUtil::getTerminationFlag() + << ". Terminating child with reason: [" << _closeReason << "]."); const auto flushStartTime = std::chrono::steady_clock::now(); while (_poll->getSocketCount()) { diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp index d7ebcbb26..e43b827ec 100644 --- a/wsd/LOOLWSD.cpp +++ b/wsd/LOOLWSD.cpp @@ -211,6 +211,8 @@ extern "C" { void dump_state(void); /* easy for gdb */ } static int careerSpanMs = 0; #endif +/// The timeout for a child to spawn, initially high, then reset to the default. +int ChildSpawnTimeoutMs = CHILD_TIMEOUT_MS * 4; bool LOOLWSD::NoCapsForKit = false; std::atomic<unsigned> LOOLWSD::NumConnections; std::set<std::string> LOOLWSD::EditFileExtensions; @@ -425,7 +427,7 @@ static int rebalanceChildren(int balance) const auto duration = (std::chrono::steady_clock::now() - LastForkRequestTime); const std::chrono::milliseconds::rep durationMs = std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(); - if (OutstandingForks != 0 && durationMs >= CHILD_TIMEOUT_MS) + if (OutstandingForks != 0 && durationMs >= ChildSpawnTimeoutMs) { // Children taking too long to spawn. // Forget we had requested any, and request anew. @@ -507,7 +509,7 @@ std::shared_ptr<ChildProcess> getNewChild_Blocks( } // With valgrind we need extended time to spawn kits. - const size_t timeoutMs = CHILD_TIMEOUT_MS / 2; + const size_t timeoutMs = ChildSpawnTimeoutMs / 2; LOG_TRC("Waiting for a new child for a max of " << timeoutMs << " ms."); const auto timeout = std::chrono::milliseconds(timeoutMs); #else @@ -1676,7 +1678,10 @@ bool LOOLWSD::createForKit() Admin::instance().setForKitPid(ForKitProcId); Admin::instance().setForKitWritePipe(ForKitWritePipe); - rebalanceChildren(LOOLWSD::NumPreSpawnedChildren - 1); + const int balance = LOOLWSD::NumPreSpawnedChildren - OutstandingForks; + if (balance > 0) + rebalanceChildren(balance); + return ForKitProcId != -1; #endif } @@ -3415,7 +3420,7 @@ int LOOLWSD::innerMain() } else { - const int timeoutMs = CHILD_TIMEOUT_MS * (LOOLWSD::NoCapsForKit ? 150 : 50); + const int timeoutMs = ChildSpawnTimeoutMs * (LOOLWSD::NoCapsForKit ? 150 : 50); const auto timeout = std::chrono::milliseconds(timeoutMs); LOG_TRC("Waiting for a new child for a max of " << timeoutMs << " ms."); if (!NewChildrenCV.wait_for(lock, timeout, []() { return !NewChildren.empty(); })) @@ -3453,6 +3458,9 @@ int LOOLWSD::innerMain() std::cerr << "Ready to accept connections on port " << ClientPortNumber << ".\n" << std::endl; #endif + // Reset the child-spawn timeout to the default, now that we're set. + ChildSpawnTimeoutMs = CHILD_TIMEOUT_MS; + const auto startStamp = std::chrono::steady_clock::now(); while (!SigUtil::getTerminationFlag() && !SigUtil::getShutdownRequestFlag()) _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits