Author: rjung
Date: Sun Aug 27 13:29:08 2006
New Revision: 437455

URL: http://svn.apache.org/viewvc?rev=437455&view=rev
Log:
Reorg lb service method:
- return from method at only one point, so that passing back
  results gets easier
- retry handling was at least strange, should be clearer now
- abort once get_most_suitable_worker only returns NULL

Modified:
    tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c
    tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h

Modified: tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c
URL: 
http://svn.apache.org/viewvc/tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c?rev=437455&r1=437454&r2=437455&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c Sun Aug 27 13:29:08 
2006
@@ -598,9 +598,9 @@
                              jk_logger_t *l, int *is_error)
 {
     lb_endpoint_t *p;
-    int attempt = 0;
+    int attempt = 1;
     int num_of_workers;
-    worker_record_t *prec = NULL;
+    int rc = -1;
     char *sessionid = NULL;
 
     JK_TRACE_ENTER(l);
@@ -635,26 +635,49 @@
                "service sticky_session=%d id='%s'",
                p->worker->s->sticky_session, sessionid ? sessionid : "empty");
 
-    while (num_of_workers) {
+    while (attempt <= num_of_workers && rc == -1) {
         worker_record_t *rec =
             get_most_suitable_worker(p->worker, sessionid, s, l);
-        int rc;
         /* Do not reuse previous worker, because
          * that worker already failed.
          */
-        if (rec && rec != prec) {
+        if (rec) {
+            int r;
             int is_service_error = JK_HTTP_OK;
-            int service_stat = JK_FALSE;
             jk_endpoint_t *end = NULL;
-
+            int retry = 0;
+            int retry_wait = JK_LB_MIN_RETRY_WAIT;
             s->jvm_route = rec->r;
-            rc = rec->w->get_endpoint(rec->w, &end, l);
 
             if (JK_IS_DEBUG_LEVEL(l))
                 jk_log(l, JK_LOG_DEBUG,
                        "service worker=%s jvm_route=%s",
                        rec->s->name, s->jvm_route);
-            if (rc && end) {
+            while ((!(r=rec->w->get_endpoint(rec->w, &end, l)) || !end) && 
(retry < p->worker->s->retries)) {
+                retry++;
+                retry_wait *=2;
+                if (retry_wait > JK_LB_MAX_RETRY_WAIT)
+                    retry_wait = JK_LB_MAX_RETRY_WAIT;
+                if (JK_IS_DEBUG_LEVEL(l))
+                    jk_log(l, JK_LOG_DEBUG,
+                           "could not get free endpoint for worker"
+                           " (retry %d, sleeping for %d ms)",
+                           retry, retry_wait);
+                jk_sleep(retry_wait);
+            }
+            if (!r || !end) {
+                /* If we can not get the endpoint
+                 * mark the worker as busy rather then
+                 * as in error if the retry number is
+                 * greater then the number of retries.
+                 */
+                rec->s->is_busy = JK_TRUE;
+                jk_log(l, JK_LOG_INFO,
+                       "could not get free endpoint for worker %s (%d 
retries)",
+                       rec->s->name, retry);
+            }
+            else {
+                int service_stat = -1;
                 size_t rd = 0;
                 size_t wr = 0;
                 /* Reset endpoint read and write sizes for
@@ -732,116 +755,92 @@
                     rec->s->error_time = 0;
                     if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
                         jk_shm_unlock();
-                    JK_TRACE_EXIT(l);
-                    return JK_TRUE;
-                }
-                if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
-                    jk_shm_unlock();
-            }
-            else {
-                /* If we can not get the endpoint
-                 * mark the worker as busy rather then
-                 * as in error if the attempt number is
-                 * greater then the number of retries.
-                 */
-                attempt++;
-                if (attempt > p->worker->s->retries) {
-                    rec->s->is_busy = JK_TRUE;
-                    num_of_workers = 0;
+                    rc = JK_TRUE;
                 }
-                jk_log(l, JK_LOG_INFO,
-                       "could not get free endpoint for worker %s (attempt 
%d)",
-                       rec->s->name, attempt);
-                /* In case of attempt > num of workers sleep for 100 ms
-                 * on each consecutive attempt.
-                 */
-                if (attempt > (int)p->worker->num_of_workers)
-                    jk_sleep(JK_SLEEP_DEF);
-                continue;
-            }
-            if (service_stat == JK_FALSE) {
-                /*
-                * Service failed !!!
-                *
-                * Time for fault tolerance (if possible)...
-                */
-
-                rec->s->errors++;
-                rec->s->in_error_state = JK_TRUE;
-                rec->s->in_recovering = JK_FALSE;
-                rec->s->error_time = time(NULL);
+                else if (service_stat == JK_FALSE) {
+                    /*
+                    * Service failed !!!
+                    *
+                    * Time for fault tolerance (if possible)...
+                    */
 
-                if (is_service_error != JK_HTTP_SERVER_BUSY) {
+                    rec->s->errors++;
+                    rec->s->in_error_state = JK_TRUE;
+                    rec->s->in_recovering = JK_FALSE;
+                    rec->s->error_time = time(NULL);
+                    if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
+                        jk_shm_unlock();
+
+                    if (is_service_error != JK_HTTP_SERVER_BUSY) {
+                        /*
+                        * Error is not recoverable - break with an error.
+                        */
+                        jk_log(l, JK_LOG_ERROR,
+                            "unrecoverable error %d, request failed."
+                            " Tomcat failed in the middle of request,"
+                            " we can't recover to another instance.",
+                            is_service_error);
+                        *is_error = is_service_error;
+                        rc = JK_FALSE;
+                    }
+                    else
+                        jk_log(l, JK_LOG_INFO,
+                               "service failed, worker %s is in error state",
+                               rec->s->name);
+                }
+                else if (service_stat == JK_CLIENT_ERROR) {
                     /*
-                    * Error is not recoverable - break with an error.
+                    * Client error !!!
+                    * Since this is bad request do not fail over.
                     */
-                    jk_log(l, JK_LOG_ERROR,
-                        "unrecoverable error %d, request failed."
-                        " Tomcat failed in the middle of request,"
-                        " we can't recover to another instance.",
-                        is_service_error);
+                    rec->s->errors++;
+                    rec->s->in_error_state = JK_FALSE;
+                    rec->s->in_recovering = JK_FALSE;
+                    rec->s->error_time = 0;
+                    if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
+                        jk_shm_unlock();
+                    jk_log(l, JK_LOG_INFO,
+                           "unrecoverable error %d, request failed."
+                           " Client failed in the middle of request,"
+                           " we can't recover to another instance.",
+                           is_service_error);
                     *is_error = is_service_error;
-                    JK_TRACE_EXIT(l);
-                    return JK_FALSE;
+                    rc = JK_CLIENT_ERROR;
+                }
+                else {
+                    if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
+                        jk_shm_unlock();
                 }
-                jk_log(l, JK_LOG_INFO,
-                       "service failed, worker %s is in error state",
-                       rec->s->name);
             }
-            else if (service_stat == JK_CLIENT_ERROR) {
+            if ( rc == -1 ) {
                 /*
-                * Client error !!!
-                * Since this is bad request do not fail over.
-                */
-                rec->s->errors++;
-                rec->s->in_error_state = JK_FALSE;
-                rec->s->in_recovering = JK_FALSE;
-                rec->s->error_time = 0;
-                *is_error = is_service_error;
-
-                jk_log(l, JK_LOG_INFO,
-                       "unrecoverable error %d, request failed."
-                       " Client failed in the middle of request,"
-                       " we can't recover to another instance.",
-                       is_service_error);
-                JK_TRACE_EXIT(l);
-                return JK_CLIENT_ERROR;
-            }
-            else {
-                /* If we can not get the endpoint from the worker
-                 * that does not mean that the worker is in error
-                 * state. It means that the worker is busy.
-                 * We will try another worker.
-                 * To prevent infinite loop decrement worker count;
+                 * Error is recoverable by submitting the request to
+                 * another worker... Lets try to do that.
                  */
+                if (JK_IS_DEBUG_LEVEL(l))
+                    jk_log(l, JK_LOG_DEBUG,
+                           "recoverable error... will try to recover on other 
worker");
             }
-            /*
-             * Error is recoverable by submitting the request to
-             * another worker... Lets try to do that.
-             */
-            if (JK_IS_DEBUG_LEVEL(l))
-                jk_log(l, JK_LOG_DEBUG,
-                       "recoverable error... will try to recover on other 
host");
         }
-#if 0
         else {
             /* NULL record, no more workers left ... */
             jk_log(l, JK_LOG_ERROR,
                    "All tomcat instances failed, no more workers left");
-            JK_TRACE_EXIT(l);
             *is_error = JK_HTTP_SERVER_BUSY;
-            return JK_FALSE;
+            rc = JK_FALSE;
         }
-#endif
-        --num_of_workers;
-        prec = rec;
+        attempt++;
     }
-    jk_log(l, JK_LOG_INFO,
-           "All tomcat instances are busy or in error state");
-    /* Set error to Timeout */
-    *is_error = JK_HTTP_SERVER_BUSY;
+    if ( rc == -1 ) {
+        jk_log(l, JK_LOG_INFO,
+               "All tomcat instances are busy or in error state");
+        /* Set error to Timeout */
+        *is_error = JK_HTTP_SERVER_BUSY;
+        rc = JK_FALSE;
+    }
+
     JK_TRACE_EXIT(l);
-    return JK_FALSE;
+    return rc;
 }
 
 static int JK_METHOD done(jk_endpoint_t **e, jk_logger_t *l)

Modified: tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h
URL: 
http://svn.apache.org/viewvc/tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h?rev=437455&r1=437454&r2=437455&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_lb_worker.h Sun Aug 27 13:29:08 
2006
@@ -53,6 +53,10 @@
 #define JK_LB_LOCK_TEXT_PESSIMISTIC    ("Pessimistic")
 #define JK_LB_LOCK_TEXT_DEF            (JK_LB_LOCK_TEXT_OPTIMISTIC)
 
+/* Minimal time in ms to wait between get_endpoint retries for balanced 
workers */
+#define JK_LB_MIN_RETRY_WAIT  (25)
+/* Maximal time in ms to wait between get_endpoint retries for balanced 
workers */
+#define JK_LB_MAX_RETRY_WAIT  (100)
 /* Time to wait before retry. */
 #define WAIT_BEFORE_RECOVER   (60)
 /* We accept doing global maintenance if we are */



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to