Author: rjung Date: Sun Aug 27 13:29:08 2006 New Revision: 437455 URL: http://svn.apache.org/viewvc?rev=437455&view=rev Log: Reorg lb service method: - return from method at only one point, so that passing back results gets easier - retry handling was at least strange, should be clearer now - abort once get_most_suitable_worker only returns NULL
Modified: tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h Modified: tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c URL: http://svn.apache.org/viewvc/tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c?rev=437455&r1=437454&r2=437455&view=diff ============================================================================== --- tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c (original) +++ tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c Sun Aug 27 13:29:08 2006 @@ -598,9 +598,9 @@ jk_logger_t *l, int *is_error) { lb_endpoint_t *p; - int attempt = 0; + int attempt = 1; int num_of_workers; - worker_record_t *prec = NULL; + int rc = -1; char *sessionid = NULL; JK_TRACE_ENTER(l); @@ -635,26 +635,49 @@ "service sticky_session=%d id='%s'", p->worker->s->sticky_session, sessionid ? sessionid : "empty"); - while (num_of_workers) { + while (attempt <= num_of_workers && rc == -1) { worker_record_t *rec = get_most_suitable_worker(p->worker, sessionid, s, l); - int rc; /* Do not reuse previous worker, because * that worker already failed. */ - if (rec && rec != prec) { + if (rec) { + int r; int is_service_error = JK_HTTP_OK; - int service_stat = JK_FALSE; jk_endpoint_t *end = NULL; - + int retry = 0; + int retry_wait = JK_LB_MIN_RETRY_WAIT; s->jvm_route = rec->r; - rc = rec->w->get_endpoint(rec->w, &end, l); if (JK_IS_DEBUG_LEVEL(l)) jk_log(l, JK_LOG_DEBUG, "service worker=%s jvm_route=%s", rec->s->name, s->jvm_route); - if (rc && end) { + while ((!(r=rec->w->get_endpoint(rec->w, &end, l)) || !end) && (retry < p->worker->s->retries)) { + retry++; + retry_wait *=2; + if (retry_wait > JK_LB_MAX_RETRY_WAIT) + retry_wait = JK_LB_MAX_RETRY_WAIT; + if (JK_IS_DEBUG_LEVEL(l)) + jk_log(l, JK_LOG_DEBUG, + "could not get free endpoint for worker" + " (retry %d, sleeping for %d ms)", + retry, retry_wait); + jk_sleep(retry_wait); + } + if (!r || !end) { + /* If we can not get the endpoint + * mark the worker as busy rather then + * as in error if the retry number is + * greater then the number of retries. + */ + rec->s->is_busy = JK_TRUE; + jk_log(l, JK_LOG_INFO, + "could not get free endpoint for worker %s (%d retries)", + rec->s->name, retry); + } + else { + int service_stat = -1; size_t rd = 0; size_t wr = 0; /* Reset endpoint read and write sizes for @@ -732,116 +755,92 @@ rec->s->error_time = 0; if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) jk_shm_unlock(); - JK_TRACE_EXIT(l); - return JK_TRUE; - } - if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) - jk_shm_unlock(); - } - else { - /* If we can not get the endpoint - * mark the worker as busy rather then - * as in error if the attempt number is - * greater then the number of retries. - */ - attempt++; - if (attempt > p->worker->s->retries) { - rec->s->is_busy = JK_TRUE; - num_of_workers = 0; + rc = JK_TRUE; } - jk_log(l, JK_LOG_INFO, - "could not get free endpoint for worker %s (attempt %d)", - rec->s->name, attempt); - /* In case of attempt > num of workers sleep for 100 ms - * on each consecutive attempt. - */ - if (attempt > (int)p->worker->num_of_workers) - jk_sleep(JK_SLEEP_DEF); - continue; - } - if (service_stat == JK_FALSE) { - /* - * Service failed !!! - * - * Time for fault tolerance (if possible)... - */ - - rec->s->errors++; - rec->s->in_error_state = JK_TRUE; - rec->s->in_recovering = JK_FALSE; - rec->s->error_time = time(NULL); + else if (service_stat == JK_FALSE) { + /* + * Service failed !!! + * + * Time for fault tolerance (if possible)... + */ - if (is_service_error != JK_HTTP_SERVER_BUSY) { + rec->s->errors++; + rec->s->in_error_state = JK_TRUE; + rec->s->in_recovering = JK_FALSE; + rec->s->error_time = time(NULL); + if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) + jk_shm_unlock(); + + if (is_service_error != JK_HTTP_SERVER_BUSY) { + /* + * Error is not recoverable - break with an error. + */ + jk_log(l, JK_LOG_ERROR, + "unrecoverable error %d, request failed." + " Tomcat failed in the middle of request," + " we can't recover to another instance.", + is_service_error); + *is_error = is_service_error; + rc = JK_FALSE; + } + else + jk_log(l, JK_LOG_INFO, + "service failed, worker %s is in error state", + rec->s->name); + } + else if (service_stat == JK_CLIENT_ERROR) { /* - * Error is not recoverable - break with an error. + * Client error !!! + * Since this is bad request do not fail over. */ - jk_log(l, JK_LOG_ERROR, - "unrecoverable error %d, request failed." - " Tomcat failed in the middle of request," - " we can't recover to another instance.", - is_service_error); + rec->s->errors++; + rec->s->in_error_state = JK_FALSE; + rec->s->in_recovering = JK_FALSE; + rec->s->error_time = 0; + if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) + jk_shm_unlock(); + jk_log(l, JK_LOG_INFO, + "unrecoverable error %d, request failed." + " Client failed in the middle of request," + " we can't recover to another instance.", + is_service_error); *is_error = is_service_error; - JK_TRACE_EXIT(l); - return JK_FALSE; + rc = JK_CLIENT_ERROR; + } + else { + if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) + jk_shm_unlock(); } - jk_log(l, JK_LOG_INFO, - "service failed, worker %s is in error state", - rec->s->name); } - else if (service_stat == JK_CLIENT_ERROR) { + if ( rc == -1 ) { /* - * Client error !!! - * Since this is bad request do not fail over. - */ - rec->s->errors++; - rec->s->in_error_state = JK_FALSE; - rec->s->in_recovering = JK_FALSE; - rec->s->error_time = 0; - *is_error = is_service_error; - - jk_log(l, JK_LOG_INFO, - "unrecoverable error %d, request failed." - " Client failed in the middle of request," - " we can't recover to another instance.", - is_service_error); - JK_TRACE_EXIT(l); - return JK_CLIENT_ERROR; - } - else { - /* If we can not get the endpoint from the worker - * that does not mean that the worker is in error - * state. It means that the worker is busy. - * We will try another worker. - * To prevent infinite loop decrement worker count; + * Error is recoverable by submitting the request to + * another worker... Lets try to do that. */ + if (JK_IS_DEBUG_LEVEL(l)) + jk_log(l, JK_LOG_DEBUG, + "recoverable error... will try to recover on other worker"); } - /* - * Error is recoverable by submitting the request to - * another worker... Lets try to do that. - */ - if (JK_IS_DEBUG_LEVEL(l)) - jk_log(l, JK_LOG_DEBUG, - "recoverable error... will try to recover on other host"); } -#if 0 else { /* NULL record, no more workers left ... */ jk_log(l, JK_LOG_ERROR, "All tomcat instances failed, no more workers left"); - JK_TRACE_EXIT(l); *is_error = JK_HTTP_SERVER_BUSY; - return JK_FALSE; + rc = JK_FALSE; } -#endif - --num_of_workers; - prec = rec; + attempt++; } - jk_log(l, JK_LOG_INFO, - "All tomcat instances are busy or in error state"); - /* Set error to Timeout */ - *is_error = JK_HTTP_SERVER_BUSY; + if ( rc == -1 ) { + jk_log(l, JK_LOG_INFO, + "All tomcat instances are busy or in error state"); + /* Set error to Timeout */ + *is_error = JK_HTTP_SERVER_BUSY; + rc = JK_FALSE; + } + JK_TRACE_EXIT(l); - return JK_FALSE; + return rc; } static int JK_METHOD done(jk_endpoint_t **e, jk_logger_t *l) Modified: tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h URL: http://svn.apache.org/viewvc/tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h?rev=437455&r1=437454&r2=437455&view=diff ============================================================================== --- tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h (original) +++ tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h Sun Aug 27 13:29:08 2006 @@ -53,6 +53,10 @@ #define JK_LB_LOCK_TEXT_PESSIMISTIC ("Pessimistic") #define JK_LB_LOCK_TEXT_DEF (JK_LB_LOCK_TEXT_OPTIMISTIC) +/* Minimal time in ms to wait between get_endpoint retries for balanced workers */ +#define JK_LB_MIN_RETRY_WAIT (25) +/* Maximal time in ms to wait between get_endpoint retries for balanced workers */ +#define JK_LB_MAX_RETRY_WAIT (100) /* Time to wait before retry. */ #define WAIT_BEFORE_RECOVER (60) /* We accept doing global maintenance if we are */ --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]