--- Begin Message ---
Package: keepalived
Version: 1:1.1.20-1
Severity: important
Tags: patch
The option nb_get_retry is ignored when a timeout occurs. Only when a check
fails due to digest or status code, it is retried.
The attached patch rewrites the method epilog() to uniform the handling of all
check outcomes. A check is now retried independently of the kind of failure.
However, if a real server is already down, failed checks are not retried.
-- System Information:
Debian Release: 6.0.1
APT prefers stable
APT policy: (500, 'stable')
Architecture: amd64 (x86_64)
Kernel: Linux 2.6.32-5-xen-amd64 (SMP w/2 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
diff -ur keepalived-1.1.20/keepalived/check/check_http.c keepalived-1.1.20.new/keepalived/check/check_http.c
--- keepalived-1.1.20/keepalived/check/check_http.c 2010-05-06 17:48:29.000000000 +0200
+++ keepalived-1.1.20.new/keepalived/check/check_http.c 2011-05-12 09:56:33.861014216 +0200
@@ -280,66 +280,82 @@
* Simple epilog functions. Handling event timeout.
* Finish the checker with memory managment or url rety check.
*
- * c == 0 => reset to 0 retry_it counter
- * t == 0 => reset to 0 url_it counter
- * method == 1 => register a new checker thread
- * method == 2 => register a retry on url checker thread
+ * success == 0 => current check failed
+ * smtp_msg => message used for mail notification
+ *
+ * returns value of success (i.e. 0 if check failed)
*/
int
-epilog(thread * thread_obj, int method, int t, int c)
+epilog(thread * thread_obj, int success, char *smtp_msg)
{
checker *checker_obj = THREAD_ARG(thread_obj);
http_get_checker *http_get_check = CHECKER_ARG(checker_obj);
http_arg *http_arg_obj = HTTP_ARG(http_get_check);
REQ *req = HTTP_REQ(http_arg_obj);
- uint16_t addr_port = get_service_port(checker_obj);
long delay = 0;
- if (method) {
- http_arg_obj->url_it += t ? t : -http_arg_obj->url_it;
- http_arg_obj->retry_it += c ? c : -http_arg_obj->retry_it;
- }
+ /* check succeeded */
+ if (success) {
+ /* reset retry counter */
+ http_arg_obj->retry_it = 0;
- /*
- * The get retry implementation mean that we retry performing
- * a GET on the same url until the remote web server return
- * html buffer. This is sometime needed with some applications
- * servers.
- */
- if (http_arg_obj->retry_it > http_get_check->nb_get_retry-1) {
- if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
- log_message(LOG_INFO, "Check on service [%s:%d] failed after %d retry."
- , inet_ntop2(CHECKER_RIP(checker_obj))
- , ntohs(addr_port), http_arg_obj->retry_it);
- smtp_alert(checker_obj->rs, NULL, NULL,
- "DOWN",
- "=> CHECK failed on service"
- " : MD5 digest mismatch <=");
- update_svr_checker_state(DOWN, checker_obj->id
- , checker_obj->vs
- , checker_obj->rs);
- }
+ /* all checks succeeded */
+ if (!fetch_next_url(http_get_check)) {
- /* Reset it counters */
- http_arg_obj->url_it = 0;
- http_arg_obj->retry_it = 0;
- }
+ /* real server is currently down */
+ if (!svr_checker_up(checker_obj->id, checker_obj->rs)) {
+ /* send mail and add server to pool */
+ smtp_alert(checker_obj->rs, NULL, NULL, "UP",
+ smtp_msg);
+ update_svr_checker_state(UP, checker_obj->id
+ , checker_obj->vs
+ , checker_obj->rs);
+ }
- /* register next timer thread */
- switch (method) {
- case 1:
- if (req)
+ /* schedule next round of checks */
delay = checker_obj->vs->delay_loop;
- else
- delay =
- http_get_check->delay_before_retry;
- break;
- case 2:
- if (http_arg_obj->url_it == 0 && http_arg_obj->retry_it == 0)
+ http_arg_obj->url_it = 0;
+ /* there could be urls left to check */
+ } else {
+ /* check next url immediately (delay = 0) */
+ http_arg_obj->url_it += 1;
+ }
+
+ /* check failed */
+ } else {
+
+ /* real server is currently down */
+ if (!svr_checker_up(checker_obj->id, checker_obj->rs)) {
+ /* don't retry */
+ http_arg_obj->retry_it = http_get_check->nb_get_retry;
+ } else {
+ /* increment retry counter */
+ http_arg_obj->retry_it += 1;
+ }
+
+ /* no retries left */
+ if (http_arg_obj->retry_it > http_get_check->nb_get_retry-1) {
+
+ /* real server is currently up */
+ if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
+ /* send mail and remove server from pool */
+ smtp_alert(checker_obj->rs, NULL, NULL, "DOWN",
+ smtp_msg);
+ update_svr_checker_state(DOWN, checker_obj->id
+ , checker_obj->vs
+ , checker_obj->rs);
+ }
+
+ /* schedule next round of checks */
delay = checker_obj->vs->delay_loop;
- else
+ http_arg_obj->retry_it = 0;
+ http_arg_obj->url_it = 0;
+ /* retry check */
+ } else {
+ /* schedule retry according to config */
delay = http_get_check->delay_before_retry;
- break;
+ }
+
}
/* If req == NULL, fd is not created */
@@ -355,9 +371,17 @@
/* Register next checker thread */
thread_add_timer(thread_obj->master, http_connect_thread, checker_obj, delay);
- return 0;
+ return success;
}
+/*
+ * Wrapper for epilog() to handle timeouts,
+ * logs info message and calls epilog()
+ * with success = 0
+ *
+ * smtp_msg => message used for mail notification
+ * debug_msg => message for logging
+ */
int
timeout_epilog(thread * thread_obj, char *smtp_msg, char *debug_msg)
{
@@ -369,16 +393,7 @@
, inet_ntop2(CHECKER_RIP(checker_obj))
, ntohs(addr_port));
- /* check if server is currently alive */
- if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
- smtp_alert(checker_obj->rs, NULL, NULL,
- "DOWN", smtp_msg);
- update_svr_checker_state(DOWN, checker_obj->id
- , checker_obj->vs
- , checker_obj->rs);
- }
-
- return epilog(thread_obj, 1, 0, 0);
+ return epilog(thread_obj, 0, smtp_msg);
}
/* return the url pointer of the current url iterator */
@@ -413,34 +428,15 @@
/* Next check the HTTP status code */
if (fetched_url->status_code) {
if (req->status_code != fetched_url->status_code) {
- /* check if server is currently alive */
- if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
- log_message(LOG_INFO,
- "HTTP status code error to [%s:%d] url(%s)"
- ", status_code [%d].",
- inet_ntop2(CHECKER_RIP(checker_obj)),
- ntohs(addr_port), fetched_url->path,
- req->status_code);
- smtp_alert(checker_obj->rs, NULL, NULL,
- "DOWN",
- "=> CHECK failed on service"
+ /* check failed, log message and call epilog */
+ log_message(LOG_INFO,
+ "HTTP status code error to [%s:%d] url(%s)"
+ ", status_code [%d].",
+ inet_ntop2(CHECKER_RIP(checker_obj)),
+ ntohs(addr_port), fetched_url->path,
+ req->status_code);
+ return epilog(thread_obj, 0, "=> CHECK failed on service"
" : HTTP status code mismatch <=");
- update_svr_checker_state(DOWN, checker_obj->id
- , checker_obj->vs
- , checker_obj->rs);
- } else {
- DBG("HTTP Status_code to [%s:%d] url(%d) = [%d].",
- inet_ntop2(CHECKER_RIP(checker_obj))
- , ntohs(addr_port)
- , http_arg_obj->url_it + 1
- , req->status_code);
- /*
- * We set retry iterator to max value to not retry
- * when service is already know as die.
- */
- http_arg_obj->retry_it = http_get_check->nb_get_retry;
- }
- return epilog(thread_obj, 2, 0, 1);
} else {
if (!svr_checker_up(checker_obj->id, checker_obj->rs))
log_message(LOG_INFO,
@@ -448,7 +444,7 @@
inet_ntop2(CHECKER_RIP(checker_obj))
, ntohs(addr_port)
, http_arg_obj->url_it + 1);
- return epilog(thread_obj, 1, 1, 0) + 1;
+ return epilog(thread_obj, 1, "=> CHECK succeed on service <=");
}
}
@@ -462,35 +458,16 @@
r = strcmp(fetched_url->digest, digest_tmp);
if (r) {
- /* check if server is currently alive */
- if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
- log_message(LOG_INFO,
- "MD5 digest error to [%s:%d] url[%s]"
- ", MD5SUM [%s].",
- inet_ntop2(CHECKER_RIP(checker_obj)),
- ntohs(addr_port), fetched_url->path,
- digest_tmp);
- smtp_alert(checker_obj->rs, NULL, NULL,
- "DOWN",
- "=> CHECK failed on service"
- " : HTTP MD5SUM mismatch <=");
- update_svr_checker_state(DOWN, checker_obj->id
- , checker_obj->vs
- , checker_obj->rs);
- } else {
- DBG("MD5SUM to [%s:%d] url(%d) = [%s].",
- inet_ntop2(CHECKER_RIP(checker_obj))
- , ntohs(addr_port)
- , http_arg_obj->url_it + 1
- , digest_tmp);
- /*
- * We set retry iterator to max value to not retry
- * when service is already know as die.
- */
- http_arg_obj->retry_it = http_get_check->nb_get_retry;
- }
+ /* check failed, log message and call epilog */
+ log_message(LOG_INFO,
+ "MD5 digest error to [%s:%d] url[%s]"
+ ", MD5SUM [%s].",
+ inet_ntop2(CHECKER_RIP(checker_obj)),
+ ntohs(addr_port), fetched_url->path,
+ digest_tmp);
FREE(digest_tmp);
- return epilog(thread_obj, 2, 0, 1);
+ return epilog(thread_obj, 0, "=> CHECK failed on service"
+ " : HTTP MD5SUM mismatch <=");
} else {
if (!svr_checker_up(checker_obj->id, checker_obj->rs))
log_message(LOG_INFO, "MD5 digest success to [%s:%d] url(%d).",
@@ -498,11 +475,11 @@
, ntohs(addr_port)
, http_arg_obj->url_it + 1);
FREE(digest_tmp);
- return epilog(thread_obj, 1, 1, 0) + 1;
+ return epilog(thread_obj, 1, "=> CHECK succeed on service <=");
}
}
- return epilog(thread_obj, 1, 0, 0) + 1;
+ return epilog(thread_obj, 1, "=> CHECK succeed on service <=");
}
/* Handle response stream performing MD5 updates */
@@ -577,21 +554,13 @@
MD5_Final(digest, &req->context);
if (r == -1) {
- /* We have encourred a real read error */
- if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
- log_message(LOG_INFO, "Read error with server [%s:%d]: %s",
- inet_ntop2(CHECKER_RIP(checker_obj))
- , ntohs(addr_port)
- , strerror(errno));
- smtp_alert(checker_obj->rs, NULL, NULL,
- "DOWN",
- "=> HTTP CHECK failed on service"
+ /* check failed, log message and call epilog */
+ log_message(LOG_INFO, "Read error with server [%s:%d]: %s",
+ inet_ntop2(CHECKER_RIP(checker_obj))
+ , ntohs(addr_port)
+ , strerror(errno));
+ return epilog(thread_obj, 0, "=> HTTP CHECK failed on service"
" : cannot receive data <=");
- update_svr_checker_state(DOWN, checker_obj->id
- , checker_obj->vs
- , checker_obj->rs);
- }
- return epilog(thread_obj, 1, 0, 0);
}
/* Handle response stream */
@@ -700,21 +669,12 @@
FREE(str_request);
if (!ret) {
+ /* check failed, log message and call epilog */
log_message(LOG_INFO, "Cannot send get request to [%s:%d].",
inet_ntop2(CHECKER_RIP(checker_obj))
, ntohs(addr_port));
-
- /* check if server is currently alive */
- if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
- smtp_alert(checker_obj->rs, NULL, NULL,
- "DOWN",
- "=> CHECK failed on service"
+ return epilog(thread_obj, 0, "=> CHECK failed on service"
" : cannot send data <=");
- update_svr_checker_state(DOWN, checker_obj->id
- , checker_obj->vs
- , checker_obj->rs);
- }
- return epilog(thread_obj, 1, 0, 0);
}
/* Register read timeouted thread */
@@ -744,20 +704,12 @@
, addr_port, http_check_thread);
switch (status) {
case connect_error:
- /* check if server is currently alive */
- if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
- log_message(LOG_INFO, "Error connecting server [%s:%d].",
- inet_ntop2(CHECKER_RIP(checker_obj))
- , ntohs(addr_port));
- smtp_alert(checker_obj->rs, NULL, NULL,
- "DOWN",
- "=> CHECK failed on service"
+ /* check failed, log message and call epilog */
+ log_message(LOG_INFO, "Error connecting server [%s:%d].",
+ inet_ntop2(CHECKER_RIP(checker_obj))
+ , ntohs(addr_port));
+ return epilog(thread_obj, 0, "=> CHECK failed on service"
" : connection error <=");
- update_svr_checker_state(DOWN, checker_obj->id
- , checker_obj->vs
- , checker_obj->rs);
- }
- return epilog(thread_obj, 1, 0, 0);
break;
case connect_timeout:
@@ -829,24 +781,20 @@
ssl_printerr(SSL_get_error
(req->ssl, ret));
#endif
- if ((http_get_check->proto == PROTO_SSL) &&
- (svr_checker_up(checker_obj->id, checker_obj->rs))) {
+ if (http_get_check->proto == PROTO_SSL) {
+ /* check failed, log message and call epilog */
log_message(LOG_INFO, "SSL handshake/communication error"
" connecting to server"
" (openssl errno: %d) [%s:%d]."
, SSL_get_error (http_arg_obj->req->ssl, ret)
, inet_ntop2(CHECKER_RIP(checker_obj))
, ntohs(addr_port));
- smtp_alert(checker_obj->rs, NULL, NULL,
- "DOWN",
- "=> CHECK failed on service"
+ return epilog(thread_obj, 0, "=> CHECK failed on service"
" : SSL connection error <=");
- update_svr_checker_state(DOWN, checker_obj->id
- , checker_obj->vs
- , checker_obj->rs);
}
- return epilog(thread_obj, 1, 0, 0);
+ return epilog(thread_obj, 0, "=> CHECK failed on service"
+ " : connection error <=");
}
}
break;
@@ -888,14 +836,9 @@
log_message(LOG_INFO, "Remote Web server [%s:%d] succeed on service.",
inet_ntop2(CHECKER_RIP(checker_obj))
, ntohs(addr_port));
- smtp_alert(checker_obj->rs, NULL, NULL, "UP",
- "=> CHECK succeed on service <=");
- update_svr_checker_state(UP, checker_obj->id
- , checker_obj->vs
- , checker_obj->rs);
}
http_arg_obj->req = NULL;
- return epilog(thread_obj, 1, 0, 0) + 1;
+ return epilog(thread_obj, 1, "=> CHECK succeed on service <=");
}
/* Create the socket */
diff -ur keepalived-1.1.20/keepalived/check/check_ssl.c keepalived-1.1.20.new/keepalived/check/check_ssl.c
--- keepalived-1.1.20/keepalived/check/check_ssl.c 2010-05-06 17:48:58.000000000 +0200
+++ keepalived-1.1.20.new/keepalived/check/check_ssl.c 2011-05-12 09:56:31.557014358 +0200
@@ -244,6 +244,7 @@
http_get_checker *http_get_check = CHECKER_ARG(checker_obj);
http_arg *http_arg_obj = HTTP_ARG(http_get_check);
REQ *req = HTTP_REQ(http_arg_obj);
+ uint16_t addr_port = get_service_port(checker_obj);
unsigned char digest[16];
int r = 0;
int val;
@@ -290,17 +291,13 @@
SSL_ERROR_ZERO_RETURN) ? SSL_shutdown(req->ssl) : 0;
if (r && !req->extracted) {
- /* check if server is currently alive */
- if (svr_checker_up(checker_obj->id, checker_obj->rs)) {
- smtp_alert(checker_obj->rs, NULL, NULL,
- "DOWN",
- "=> SSL CHECK failed on service"
+ /* check failed, log message and call epilog */
+ log_message(LOG_INFO, "Read error with server [%s:%d]: %s",
+ inet_ntop2(CHECKER_RIP(checker_obj))
+ , ntohs(addr_port)
+ , strerror(errno));
+ return epilog(thread_obj, 0, "=> SSL CHECK failed on service"
" : cannot receive data <=\n\n");
- update_svr_checker_state(DOWN, checker_obj->id
- , checker_obj->vs
- , checker_obj->rs);
- }
- return epilog(thread_obj, 1, 0, 0);
}
/* Handle response stream */
diff -ur keepalived-1.1.20/keepalived/include/check_http.h keepalived-1.1.20.new/keepalived/include/check_http.h
--- keepalived-1.1.20/keepalived/include/check_http.h 2010-05-06 17:50:29.000000000 +0200
+++ keepalived-1.1.20.new/keepalived/include/check_http.h 2011-05-09 11:37:23.265015403 +0200
@@ -89,7 +89,7 @@
/* Define prototypes */
extern void install_http_check_keyword(void);
-extern int epilog(thread * thread_obj, int metod, int t, int c);
+extern int epilog(thread * thread_obj, int success, char *smtp_msg);
extern int timeout_epilog(thread * thread_obj, char *smtp_msg, char *debug_msg);
extern url *fetch_next_url(http_get_checker * http_get_check);
extern int http_process_response(REQ * req, int r);
--- End Message ---