In case a service is restarted while the DP is not ready yet, it gets
restarted again immediatelly, which means the DP might still not be
ready. The allowed number of restarts is then depleted quickly.

This patch changes the restart mechanism such that the first restart
happens immediatelly, the second is scheduled after 2 second, then 4
etc..

https://fedorahosted.org/sssd/ticket/1528
>From f7729a651ecc3de3d082dc2e40e0a1b1c413fef8 Mon Sep 17 00:00:00 2001
From: Jakub Hrozek <jhro...@redhat.com>
Date: Thu, 15 Nov 2012 19:26:18 +0100
Subject: [PATCH] Restart services with a delay in case they are restarted too
 often

In case a service is restarted while the DP is not ready yet, it gets
restarted again immediatelly, which means the DP might still not be
ready. The allowed number of restarts is then depleted quickly.

This patch changes the restart mechanism such that the first restart
happens immediatelly, the second is scheduled after 2 second, then 4
etc..

https://fedorahosted.org/sssd/ticket/1528
---
 src/monitor/monitor.c | 52 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 13 deletions(-)

diff --git a/src/monitor/monitor.c b/src/monitor/monitor.c
index 
caa157138c19d18b61a77b1f9d4b29ac9e1fb64e..41881feb11f4e8e6d06e6976eb418537bceb3200
 100644
--- a/src/monitor/monitor.c
+++ b/src/monitor/monitor.c
@@ -2506,11 +2506,43 @@ static void service_startup_handler(struct 
tevent_context *ev,
     _exit(1);
 }
 
+static void mt_svc_restart(struct tevent_context *ev,
+                           struct tevent_timer *te,
+                           struct timeval t, void *ptr)
+{
+    struct mt_svc *svc;
+
+    svc = talloc_get_type(ptr, struct mt_svc);
+    if (svc == NULL) {
+        return;
+    }
+
+    DEBUG(SSSDBG_TRACE_FUNC,
+          ("Scheduling a service for restart %d\n", svc->restarts));
+
+    if (svc->type == MT_SVC_SERVICE) {
+        add_new_service(svc->mt_ctx, svc->name, svc->restarts + 1);
+    } else if (svc->type == MT_SVC_PROVIDER) {
+        add_new_provider(svc->mt_ctx, svc->name, svc->restarts + 1);
+    } else {
+        /* Invalid type? */
+        DEBUG(SSSDBG_CRIT_FAILURE,
+              ("BUG: Invalid child process type [%d]\n", svc->type));
+    }
+
+    /* Free the old service (which will also remove it
+     * from the child list)
+     */
+    talloc_free(svc);
+}
+
 static void mt_svc_exit_handler(int pid, int wait_status, void *pvt)
 {
     struct mt_svc *svc = talloc_get_type(pvt, struct mt_svc);
     struct mt_ctx *mt_ctx = svc->mt_ctx;
     time_t now = time(NULL);
+    struct tevent_timer *te;
+    struct timeval tv;
 
     if WIFEXITED(wait_status) {
         DEBUG(SSSDBG_OP_FAILURE,
@@ -2546,20 +2578,14 @@ static void mt_svc_exit_handler(int pid, int 
wait_status, void *pvt)
         return;
     }
 
-    if (svc->type == MT_SVC_SERVICE) {
-        add_new_service(svc->mt_ctx, svc->name, svc->restarts + 1);
-    } else if (svc->type == MT_SVC_PROVIDER) {
-        add_new_provider(svc->mt_ctx, svc->name, svc->restarts + 1);
-    } else {
-        /* Invalid type? */
-        DEBUG(SSSDBG_CRIT_FAILURE,
-              ("BUG: Invalid child process type [%d]\n", svc->type));
+    /* restarts are schedule after 0, 2, 4, ...seconds */
+    tv = tevent_timeval_current_ofs((svc->restarts << 1), 0);
+    te = tevent_add_timer(svc->mt_ctx->ev, svc, tv, mt_svc_restart, svc);
+    if (!te) {
+        /* Nothing much we can do */
+        DEBUG(SSSDBG_CRIT_FAILURE, ("Out of memory?!\n"));
+        return;
     }
-
-    /* Free the old service (which will also remove it
-     * from the child list)
-     */
-    talloc_free(svc);
 }
 
 int main(int argc, const char *argv[])
-- 
1.8.0

_______________________________________________
sssd-devel mailing list
sssd-devel@lists.fedorahosted.org
https://lists.fedorahosted.org/mailman/listinfo/sssd-devel

Reply via email to