Package: release.debian.org Severity: normal User: release.debian....@packages.debian.org Usertags: unblock
Please unblock package icinga icinga 1.7.1 has a nasty bug that duplicates events (see #686036 for extensive details). 1.7.2 unfortunatly has a little bit too much changes for an unblock request, therefore I decided to upload 1.7.1-3 to unstable. It includes the cherry-picked fix. For yor convinience I attached the interdiff against the package in testing. The diff is fairly small and only touches the problem. I also attached the cherrypicked patch. Thanks for considering it. Alex unblock icinga/1.7.1-3 -- System Information: Debian Release: wheezy/sid APT prefers unstable APT policy: (500, 'unstable'), (500, 'testing'), (1, 'experimental') Architecture: amd64 (x86_64) Kernel: Linux 3.6.0-rc4lisa+ (SMP w/4 CPU cores) Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/dash
diff -u icinga-1.7.1/debian/changelog icinga-1.7.1/debian/changelog --- icinga-1.7.1/debian/changelog +++ icinga-1.7.1/debian/changelog @@ -1,3 +1,11 @@ +icinga (1.7.1-3) unstable; urgency=low + + * Fix generation of duplicated events. + Patch cherrypicked from 1.7.2 + (Closes: #686036) + + -- Alexander Wirt <formo...@debian.org> Sun, 09 Sep 2012 14:50:53 +0200 + icinga (1.7.1-2) unstable; urgency=low [ Alexander Wirt ] diff -u icinga-1.7.1/debian/patches/00list icinga-1.7.1/debian/patches/00list --- icinga-1.7.1/debian/patches/00list +++ icinga-1.7.1/debian/patches/00list @@ -6,0 +7 @@ +99_fix_doubled_events.dpatch only in patch2: unchanged: --- icinga-1.7.1.orig/debian/patches/99_fix_doubled_events.dpatch +++ icinga-1.7.1/debian/patches/99_fix_doubled_events.dpatch @@ -0,0 +1,203 @@ +#! /bin/sh /usr/share/dpatch/dpatch-run +## 99_fix_doubled_events.dpatch by Andreas Ericsson +## +## All lines beginning with `## DP:' are a description of the patch. +## DP: fix duplicated events on check scheduling logic for new events + +@DPATCH@ +diff -urNad '--exclude=CVS' '--exclude=.svn' '--exclude=.git' '--exclude=.arch' '--exclude=.hg' '--exclude=_darcs' '--exclude=.bzr' icinga-1.7.1~/THANKS icinga-1.7.1/THANKS +--- icinga-1.7.1~/THANKS 2012-06-15 19:26:09.000000000 +0200 ++++ icinga-1.7.1/THANKS 2012-09-09 13:37:09.065317730 +0200 +@@ -343,3 +343,4 @@ + * Michal Zimen + * Dennis van Zuijlekom + * Pawel Zuzelski ++* Imri Zvik +diff -urNad '--exclude=CVS' '--exclude=.svn' '--exclude=.git' '--exclude=.arch' '--exclude=.hg' '--exclude=_darcs' '--exclude=.bzr' icinga-1.7.1~/base/checks.c icinga-1.7.1/base/checks.c +--- icinga-1.7.1~/base/checks.c 2012-06-15 19:26:09.000000000 +0200 ++++ icinga-1.7.1/base/checks.c 2012-09-09 13:37:09.065317730 +0200 +@@ -1881,15 +1881,6 @@ + return; + } + +- /* allocate memory for a new event item */ +- new_event = (timed_event *)malloc(sizeof(timed_event)); +- if (new_event == NULL) { +- +- logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Could not reschedule check of service '%s' on host '%s'!\n", svc->description, svc->host_name); +- +- return; +- } +- + /* default is to use the new event */ + use_original_event = FALSE; + +@@ -1903,7 +1894,7 @@ + */ + if (temp_event != NULL) { + +- log_debug_info(DEBUGL_CHECKS, 2, "Found another service check event for this service @ %s", ctime(&temp_event->run_time)); ++ log_debug_info(DEBUGL_CHECKS, 2, "Found another service check event for service '%s' on host '%s' @ %s", svc->description, svc->host_name, ctime(&temp_event->run_time)); + + /* use the originally scheduled check unless we decide otherwise */ + use_original_event = TRUE; +@@ -1938,32 +1929,39 @@ + log_debug_info(DEBUGL_CHECKS, 2, "New service check event occurs after the existing event, so we'll ignore it.\n"); + } + } ++ } + +- /* the originally queued event won the battle, so keep it */ +- if (use_original_event == TRUE) { +- my_free(new_event); ++ ++ /* ++ * we can't use the original event, ++ * so schedule a new event ++ */ ++ if (use_original_event == FALSE) { ++ ++ log_debug_info(DEBUGL_CHECKS, 2, "Scheduling new service check event for '%s' on host '%s' @ %s", svc->description, svc->host_name, ctime(&check_time)); ++ ++ /* allocate memory for a new event item */ ++ new_event = (timed_event *)malloc(sizeof(timed_event)); ++ ++ if (new_event == NULL) { ++ logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Could not reschedule check of service '%s' on host '%s'!\n", svc->description, svc->host_name); ++ return; + } + +- /* else we're using the new event, so remove the old one */ +- else { ++ /* make sure we kill off the old event */ ++ if (temp_event) { ++ log_debug_info(DEBUGL_CHECKS, 2, "Removing service check event for service '%s' on host '%s' @ %s", svc->description, svc->host_name, ctime(&temp_event->run_time)); + remove_event(temp_event, &event_list_low, &event_list_low_tail); +- /* save new event for later */ +- svc->next_check_event = new_event; + my_free(temp_event); + } +- } + +- /* save check options for retention purposes */ +- svc->check_options = options; +- +- /* schedule a new event */ +- if (use_original_event == FALSE) { +- +- log_debug_info(DEBUGL_CHECKS, 2, "Scheduling new service check event.\n"); +- +- /* set the next service check time */ ++ /* set the next service check event and time */ ++ svc->next_check_event = new_event; + svc->next_check = check_time; + ++ /* save check options for retention purposes */ ++ svc->check_options = options; ++ + /* place the new event in the event queue */ + new_event->event_type = EVENT_SERVICE_CHECK; + new_event->event_data = (void *)svc; +@@ -2360,14 +2358,6 @@ + return; + } + +- /* allocate memory for a new event item */ +- if ((new_event = (timed_event *)malloc(sizeof(timed_event))) == NULL) { +- +- logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Could not reschedule check of host '%s'!\n", hst->name); +- +- return; +- } +- + /* default is to use the new event */ + use_original_event = FALSE; + +@@ -2381,7 +2371,7 @@ + */ + if (temp_event != NULL) { + +- log_debug_info(DEBUGL_CHECKS, 2, "Found another host check event for this host @ %s", ctime(&temp_event->run_time)); ++ log_debug_info(DEBUGL_CHECKS, 2, "Found another host check event for host '%s' @ %s", hst->name, ctime(&temp_event->run_time)); + + /* use the originally scheduled check unless we decide otherwise */ + use_original_event = TRUE; +@@ -2416,32 +2406,35 @@ + log_debug_info(DEBUGL_CHECKS, 2, "New host check event occurs after the existing event, so we'll ignore it.\n"); + } + } ++ } + +- /* the originally queued event won the battle, so keep it */ +- if (use_original_event == TRUE) { +- my_free(new_event); ++ /* ++ * we can't use the original event, ++ * so schedule a new event ++ */ ++ if (use_original_event == FALSE) { ++ ++ log_debug_info(DEBUGL_CHECKS, 2, "Scheduling new host check event for '%s' @ %s", hst->name, ctime(&check_time)); ++ ++ /* allocate memory for a new event item */ ++ if((new_event = (timed_event *)malloc(sizeof(timed_event))) == NULL) { ++ logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Could not reschedule check of host '%s'!\n", hst->name); ++ return; + } + +- /* else use the new event, so remove the old */ +- else { ++ if (temp_event) { ++ log_debug_info(DEBUGL_CHECKS, 2, "Removing host check event for host '%s' @ %s", hst->name, ctime(&temp_event->run_time)); + remove_event(temp_event, &event_list_low, &event_list_low_tail); +- /* save new event for later */ +- hst->next_check_event = new_event; + my_free(temp_event); + } +- } +- +- /* save check options for retention purposes */ +- hst->check_options = options; +- +- /* use the new event */ +- if (use_original_event == FALSE) { +- +- log_debug_info(DEBUGL_CHECKS, 2, "Scheduling new host check event.\n"); + +- /* set the next host check time */ ++ /* set the next host check event and time */ ++ hst->next_check_event = new_event; + hst->next_check = check_time; + ++ /* save check options for retention purposes */ ++ hst->check_options = options; ++ + /* place the new event in the event queue */ + new_event->event_type = EVENT_HOST_CHECK; + new_event->event_data = (void *)hst; +diff -urNad '--exclude=CVS' '--exclude=.svn' '--exclude=.git' '--exclude=.arch' '--exclude=.hg' '--exclude=_darcs' '--exclude=.bzr' icinga-1.7.1~/base/events.c icinga-1.7.1/base/events.c +--- icinga-1.7.1~/base/events.c 2012-06-15 19:26:09.000000000 +0200 ++++ icinga-1.7.1/base/events.c 2012-09-09 13:37:09.069317730 +0200 +@@ -929,6 +929,22 @@ + new_event->event_interval = event_interval; + new_event->timing_func = timing_func; + new_event->compensate_for_time_change = compensate_for_time_change; ++ /* ++ * we need to keep the reverse link from the (service|host *)event_data->next_check_event ++ * to the new_event in order to stay sane on schedule_host|service_check() checks ++ * later on, already having a new event assigned to host/service, not rescheduling a new event. ++ * see #2993 for deeper analysis ++ */ ++ if (event_type == EVENT_SERVICE_CHECK) { ++ service *temp_service = (service *)event_data; ++ temp_service->next_check_event = new_event; ++ log_debug_info(DEBUGL_CHECKS, 2, "Service '%s' on Host '%s' next_check_event populated\n", temp_service->description, temp_service->host_name); ++ } ++ if (event_type == EVENT_HOST_CHECK) { ++ host *temp_host = (host *)event_data; ++ temp_host->next_check_event = new_event; ++ log_debug_info(DEBUGL_CHECKS, 2, "Host '%s' next_check_event populated\n", temp_host->name); ++ } + } else + return ERROR; +
>From 264cdc31c82f00f207deec483b9d6df42fdd312c Mon Sep 17 00:00:00 2001 From: Michael Friedrich <michael.friedr...@univie.ac.at> Date: Fri, 6 Jul 2012 13:21:41 +0200 Subject: [PATCH] core: fix duplicated events on check scheduling logic for new events #2676 #2993 * core: fix duplicated events on check scheduling logic for new events (Andreas Ericsson) #2676 #2993 previously, the logic on scheduling a new event was changed using the new_event attribute. the decision for actually scheduling a new event now happens generalized after having decided to actually do so. furthermore next_check_event is correctly assigned to that new event for the host|service check (which is a bug in previous versions, causing duplicate events under different circumstances). refs #2676 refs #2993 Conflicts: Changelog core: avoid duplicate events when scheduling forced host|service check (Imri Zvik) #2993 previously, we had introduced a hash-like implementation of host|service->next_check_event directly pointing to the next scheduled event. this algorithm is being used within schedule_host|service_check, determing wether to use the already assigned event, or scheduling a new event. Since we did not populate the event_data (host or service object) when adding a new event to the scheduler, this became insame, always rescheduling a new event, but not discarding the old one. This has been partly fixed in #2676 with refactoring that detection and saving the next_check_event accordingly. But on already scheduled events which were forced (overridden), another bug was unveiled. Now that we add the reverse pointer from the host|service event_data back to the newly created event when forcing a check, we will make sure that those events are checked correctly, and executed/discarded in the first place, and not always creating a new event, seperated from the rest. basically, using the previous implementation (with and without the fix from #2676) we've created an event bomb under various circumstances, especially when future events would have been overridden (forced checks). as events usually result in checks, which can result into perfdata, this could possibly the root cause for #2924 as well, as other users reported on the portal as well. http://www.monitoring-portal.org/wbb/index.php?page=Thread&threadID=26352 With the kind patch provided by Imri Zvik, this now works like expected. Adapted the debug output a bit myself, so with debug_level=272 it will be easy to see what happens on events scheduling - and not the non-telling mess before. kudos to Imri Zvik for the patch. refs #2993 refs #2676 refs #2182 refs #2924 Conflicts: Changelog --- THANKS | 1 + base/checks.c | 99 +++++++++++++++++++++++++++------------------------------ base/events.c | 16 ++++++++++ 3 files changed, 63 insertions(+), 53 deletions(-) diff --git a/THANKS b/THANKS index d11030f..6a8a243 100644 --- a/THANKS +++ b/THANKS @@ -343,3 +343,4 @@ in various ways. If we missed your name, let us know. * Michal Zimen * Dennis van Zuijlekom * Pawel Zuzelski +* Imri Zvik diff --git a/base/checks.c b/base/checks.c index eedecb7..8644633 100644 --- a/base/checks.c +++ b/base/checks.c @@ -1881,15 +1881,6 @@ void schedule_service_check(service *svc, time_t check_time, int options) { return; } - /* allocate memory for a new event item */ - new_event = (timed_event *)malloc(sizeof(timed_event)); - if (new_event == NULL) { - - logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Could not reschedule check of service '%s' on host '%s'!\n", svc->description, svc->host_name); - - return; - } - /* default is to use the new event */ use_original_event = FALSE; @@ -1903,7 +1894,7 @@ void schedule_service_check(service *svc, time_t check_time, int options) { */ if (temp_event != NULL) { - log_debug_info(DEBUGL_CHECKS, 2, "Found another service check event for this service @ %s", ctime(&temp_event->run_time)); + log_debug_info(DEBUGL_CHECKS, 2, "Found another service check event for service '%s' on host '%s' @ %s", svc->description, svc->host_name, ctime(&temp_event->run_time)); /* use the originally scheduled check unless we decide otherwise */ use_original_event = TRUE; @@ -1938,32 +1929,39 @@ void schedule_service_check(service *svc, time_t check_time, int options) { log_debug_info(DEBUGL_CHECKS, 2, "New service check event occurs after the existing event, so we'll ignore it.\n"); } } + } + - /* the originally queued event won the battle, so keep it */ - if (use_original_event == TRUE) { - my_free(new_event); + /* + * we can't use the original event, + * so schedule a new event + */ + if (use_original_event == FALSE) { + + log_debug_info(DEBUGL_CHECKS, 2, "Scheduling new service check event for '%s' on host '%s' @ %s", svc->description, svc->host_name, ctime(&check_time)); + + /* allocate memory for a new event item */ + new_event = (timed_event *)malloc(sizeof(timed_event)); + + if (new_event == NULL) { + logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Could not reschedule check of service '%s' on host '%s'!\n", svc->description, svc->host_name); + return; } - /* else we're using the new event, so remove the old one */ - else { + /* make sure we kill off the old event */ + if (temp_event) { + log_debug_info(DEBUGL_CHECKS, 2, "Removing service check event for service '%s' on host '%s' @ %s", svc->description, svc->host_name, ctime(&temp_event->run_time)); remove_event(temp_event, &event_list_low, &event_list_low_tail); - /* save new event for later */ - svc->next_check_event = new_event; my_free(temp_event); } - } - - /* save check options for retention purposes */ - svc->check_options = options; - - /* schedule a new event */ - if (use_original_event == FALSE) { - - log_debug_info(DEBUGL_CHECKS, 2, "Scheduling new service check event.\n"); - /* set the next service check time */ + /* set the next service check event and time */ + svc->next_check_event = new_event; svc->next_check = check_time; + /* save check options for retention purposes */ + svc->check_options = options; + /* place the new event in the event queue */ new_event->event_type = EVENT_SERVICE_CHECK; new_event->event_data = (void *)svc; @@ -2360,14 +2358,6 @@ void schedule_host_check(host *hst, time_t check_time, int options) { return; } - /* allocate memory for a new event item */ - if ((new_event = (timed_event *)malloc(sizeof(timed_event))) == NULL) { - - logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Could not reschedule check of host '%s'!\n", hst->name); - - return; - } - /* default is to use the new event */ use_original_event = FALSE; @@ -2381,7 +2371,7 @@ void schedule_host_check(host *hst, time_t check_time, int options) { */ if (temp_event != NULL) { - log_debug_info(DEBUGL_CHECKS, 2, "Found another host check event for this host @ %s", ctime(&temp_event->run_time)); + log_debug_info(DEBUGL_CHECKS, 2, "Found another host check event for host '%s' @ %s", hst->name, ctime(&temp_event->run_time)); /* use the originally scheduled check unless we decide otherwise */ use_original_event = TRUE; @@ -2416,32 +2406,35 @@ void schedule_host_check(host *hst, time_t check_time, int options) { log_debug_info(DEBUGL_CHECKS, 2, "New host check event occurs after the existing event, so we'll ignore it.\n"); } } + } - /* the originally queued event won the battle, so keep it */ - if (use_original_event == TRUE) { - my_free(new_event); + /* + * we can't use the original event, + * so schedule a new event + */ + if (use_original_event == FALSE) { + + log_debug_info(DEBUGL_CHECKS, 2, "Scheduling new host check event for '%s' @ %s", hst->name, ctime(&check_time)); + + /* allocate memory for a new event item */ + if((new_event = (timed_event *)malloc(sizeof(timed_event))) == NULL) { + logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Could not reschedule check of host '%s'!\n", hst->name); + return; } - /* else use the new event, so remove the old */ - else { + if (temp_event) { + log_debug_info(DEBUGL_CHECKS, 2, "Removing host check event for host '%s' @ %s", hst->name, ctime(&temp_event->run_time)); remove_event(temp_event, &event_list_low, &event_list_low_tail); - /* save new event for later */ - hst->next_check_event = new_event; my_free(temp_event); } - } - - /* save check options for retention purposes */ - hst->check_options = options; - /* use the new event */ - if (use_original_event == FALSE) { - - log_debug_info(DEBUGL_CHECKS, 2, "Scheduling new host check event.\n"); - - /* set the next host check time */ + /* set the next host check event and time */ + hst->next_check_event = new_event; hst->next_check = check_time; + /* save check options for retention purposes */ + hst->check_options = options; + /* place the new event in the event queue */ new_event->event_type = EVENT_HOST_CHECK; new_event->event_data = (void *)hst; diff --git a/base/events.c b/base/events.c index ce8666d..0cde885 100644 --- a/base/events.c +++ b/base/events.c @@ -929,6 +929,22 @@ int schedule_new_event(int event_type, int high_priority, time_t run_time, int r new_event->event_interval = event_interval; new_event->timing_func = timing_func; new_event->compensate_for_time_change = compensate_for_time_change; + /* + * we need to keep the reverse link from the (service|host *)event_data->next_check_event + * to the new_event in order to stay sane on schedule_host|service_check() checks + * later on, already having a new event assigned to host/service, not rescheduling a new event. + * see #2993 for deeper analysis + */ + if (event_type == EVENT_SERVICE_CHECK) { + service *temp_service = (service *)event_data; + temp_service->next_check_event = new_event; + log_debug_info(DEBUGL_CHECKS, 2, "Service '%s' on Host '%s' next_check_event populated\n", temp_service->description, temp_service->host_name); + } + if (event_type == EVENT_HOST_CHECK) { + host *temp_host = (host *)event_data; + temp_host->next_check_event = new_event; + log_debug_info(DEBUGL_CHECKS, 2, "Host '%s' next_check_event populated\n", temp_host->name); + } } else return ERROR; -- 1.7.10