common/Common.hpp      |    2 +-
 wsd/DocumentBroker.cpp |    8 +++++++-
 wsd/LOOLWSD.cpp        |   16 ++++++++++++----
 3 files changed, 20 insertions(+), 6 deletions(-)

New commits:
commit 0b9bc008178659470d063a42ec93a943a3e6b08f
Author:     Ashod Nakashian <ashod.nakash...@collabora.co.uk>
AuthorDate: Fri Oct 18 08:10:12 2019 -0400
Commit:     Ashod Nakashian <ashnak...@gmail.com>
CommitDate: Tue Oct 29 02:30:07 2019 +0100

    wsd: dynamic child timeout
    
    The initial child spawning takes significantly longer
    than subsequent ones (for obvious reasons) and this
    lead to unit-tests being sensitive to the timeout
    we use for child spawning. Too short, and we
    spawn more than we want on startup, too long
    and crash-recovery tests fail (we don't
    recover fast enough, as we wait too long before
    spawning new children).
    
    Dynamically setting the timeout allows us to give
    longer timeout at startup, and reduce it afterwards.
    
    Reviewed-on: https://gerrit.libreoffice.org/81194
    Reviewed-by: Andras Timar <andras.ti...@collabora.com>
    Tested-by: Andras Timar <andras.ti...@collabora.com>
    (cherry picked from commit 32fa1d95fc2ec65866d0cb47d619885182db7040)
    
    Change-Id: I8423f5c6619e57030ab43d519aaa41d8712c36d3
    Reviewed-on: https://gerrit.libreoffice.org/81570
    Reviewed-by: Ashod Nakashian <ashnak...@gmail.com>
    Tested-by: Ashod Nakashian <ashnak...@gmail.com>

diff --git a/common/Common.hpp b/common/Common.hpp
index f35ca08f8..7a02dfaae 100644
--- a/common/Common.hpp
+++ b/common/Common.hpp
@@ -16,7 +16,7 @@
 constexpr int DEFAULT_CLIENT_PORT_NUMBER = 9980;
 
 constexpr int COMMAND_TIMEOUT_MS = 5000;
-constexpr int CHILD_TIMEOUT_MS = COMMAND_TIMEOUT_MS * 2;
+constexpr int CHILD_TIMEOUT_MS = COMMAND_TIMEOUT_MS;
 constexpr int CHILD_REBALANCE_INTERVAL_MS = CHILD_TIMEOUT_MS / 10;
 constexpr int POLL_TIMEOUT_MS = COMMAND_TIMEOUT_MS / 5;
 constexpr int WS_SEND_TIMEOUT_MS = 1000;
diff --git a/wsd/DocumentBroker.cpp b/wsd/DocumentBroker.cpp
index 18568b53b..d3b0c654f 100644
--- a/wsd/DocumentBroker.cpp
+++ b/wsd/DocumentBroker.cpp
@@ -407,7 +407,13 @@ void DocumentBroker::pollThread()
     }
 
     // Flush socket data first.
-    const int flushTimeoutMs = POLL_TIMEOUT_MS * 2; // ~1000ms
+    constexpr int flushTimeoutMs = POLL_TIMEOUT_MS * 2; // ~1000ms
+    LOG_INF("Flushing socket for doc ["
+            << _docKey << "] for " << flushTimeoutMs << " ms. stop: " << _stop
+            << ", continuePolling: " << _poll->continuePolling()
+            << ", ShutdownRequestFlag: " << SigUtil::getShutdownRequestFlag()
+            << ", TerminationFlag: " << SigUtil::getTerminationFlag()
+            << ". Terminating child with reason: [" << _closeReason << "].");
     const auto flushStartTime = std::chrono::steady_clock::now();
     while (_poll->getSocketCount())
     {
diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp
index d7ebcbb26..e43b827ec 100644
--- a/wsd/LOOLWSD.cpp
+++ b/wsd/LOOLWSD.cpp
@@ -211,6 +211,8 @@ extern "C" { void dump_state(void); /* easy for gdb */ }
 static int careerSpanMs = 0;
 #endif
 
+/// The timeout for a child to spawn, initially high, then reset to the 
default.
+int ChildSpawnTimeoutMs = CHILD_TIMEOUT_MS * 4;
 bool LOOLWSD::NoCapsForKit = false;
 std::atomic<unsigned> LOOLWSD::NumConnections;
 std::set<std::string> LOOLWSD::EditFileExtensions;
@@ -425,7 +427,7 @@ static int rebalanceChildren(int balance)
 
     const auto duration = (std::chrono::steady_clock::now() - 
LastForkRequestTime);
     const std::chrono::milliseconds::rep durationMs = 
std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
-    if (OutstandingForks != 0 && durationMs >= CHILD_TIMEOUT_MS)
+    if (OutstandingForks != 0 && durationMs >= ChildSpawnTimeoutMs)
     {
         // Children taking too long to spawn.
         // Forget we had requested any, and request anew.
@@ -507,7 +509,7 @@ std::shared_ptr<ChildProcess> getNewChild_Blocks(
     }
 
     // With valgrind we need extended time to spawn kits.
-    const size_t timeoutMs = CHILD_TIMEOUT_MS / 2;
+    const size_t timeoutMs = ChildSpawnTimeoutMs / 2;
     LOG_TRC("Waiting for a new child for a max of " << timeoutMs << " ms.");
     const auto timeout = std::chrono::milliseconds(timeoutMs);
 #else
@@ -1676,7 +1678,10 @@ bool LOOLWSD::createForKit()
     Admin::instance().setForKitPid(ForKitProcId);
     Admin::instance().setForKitWritePipe(ForKitWritePipe);
 
-    rebalanceChildren(LOOLWSD::NumPreSpawnedChildren - 1);
+    const int balance = LOOLWSD::NumPreSpawnedChildren - OutstandingForks;
+    if (balance > 0)
+        rebalanceChildren(balance);
+
     return ForKitProcId != -1;
 #endif
 }
@@ -3415,7 +3420,7 @@ int LOOLWSD::innerMain()
         }
         else
         {
-            const int timeoutMs = CHILD_TIMEOUT_MS * (LOOLWSD::NoCapsForKit ? 
150 : 50);
+            const int timeoutMs = ChildSpawnTimeoutMs * (LOOLWSD::NoCapsForKit 
? 150 : 50);
             const auto timeout = std::chrono::milliseconds(timeoutMs);
             LOG_TRC("Waiting for a new child for a max of " << timeoutMs << " 
ms.");
             if (!NewChildrenCV.wait_for(lock, timeout, []() { return 
!NewChildren.empty(); }))
@@ -3453,6 +3458,9 @@ int LOOLWSD::innerMain()
     std::cerr << "Ready to accept connections on port " << ClientPortNumber << 
 ".\n" << std::endl;
 #endif
 
+    // Reset the child-spawn timeout to the default, now that we're set.
+    ChildSpawnTimeoutMs = CHILD_TIMEOUT_MS;
+
     const auto startStamp = std::chrono::steady_clock::now();
 
     while (!SigUtil::getTerminationFlag() && 
!SigUtil::getShutdownRequestFlag())
_______________________________________________
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits

Reply via email to