Author: rjung
Date: Fri Dec 14 21:38:45 2007
New Revision: 604393
URL: http://svn.apache.org/viewvc?rev=604393&view=rev
Log:
Refactoring jk_ajp_common.c
(and in smaller parts jk_lb_worker.c).
- we now explicitely pass back the information,
if the lb should do failover. This was not
an automatic consequence of the return codes we used.
- I hope the logic in ajp_send_request() is now
easier to understand
- the service method has been adopted, to handle
the slightly changed return codes from ajp_send_request(),
and also w.r.t. the changed interface to the lb
(recovery flag).
- improved handling of reply timeouts in lb
(max_reply_timeouts should work better than before
and reply_timeouts should now work again in
combination with failover)
Note: there is still a conceptual problem with
max_reply_timeouts
- improved handling of fail on status codes, especially
concerning the decision, if the node should go
into error state
Modified:
tomcat/connectors/trunk/jk/native/common/jk_ajp_common.c
tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c
Modified: tomcat/connectors/trunk/jk/native/common/jk_ajp_common.c
URL:
http://svn.apache.org/viewvc/tomcat/connectors/trunk/jk/native/common/jk_ajp_common.c?rev=604393&r1=604392&r2=604393&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_ajp_common.c (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_ajp_common.c Fri Dec 14
21:38:45 2007
@@ -755,9 +755,10 @@
JK_LEAVE_CS(&aw->cs, rc);
if (IS_VALID_SOCKET(ae->sd)) {
ret = JK_TRUE;
- jk_log(l, JK_LOG_INFO,
- "(%s) Will try pooled connection sd = %d from slot %d",
- ae->worker->name, ae->sd, i);
+ if (JK_IS_DEBUG_LEVEL(l))
+ jk_log(l, JK_LOG_DEBUG,
+ "(%s) Will try pooled connection sd = %d from slot %d",
+ ae->worker->name, ae->sd, i);
}
}
JK_TRACE_EXIT(l);
@@ -912,7 +913,7 @@
* @param ae endpoint
* @param msg message to send
* @param l logger
- * @return JK_FATAL_ERROR: message contains unknown protocol
+ * @return JK_FATAL_ERROR: endpoint contains unknown protocol
* JK_FALSE: other failure
* JK_TRUE: success
* @remark Always closes socket in case of
@@ -1230,7 +1231,7 @@
/*
* send request to Tomcat via Ajp13
* - first try to find reuseable socket
- * - if no one available, try to connect
+ * - if no such available, try to connect
* - send request, but send must be seen as asynchronous,
* since send() call will return noerror about 95% of time
* Hopefully we'll get more information on next read.
@@ -1240,23 +1241,23 @@
*
* Return values of ajp_send_request() function:
* return value op->recoverable reason
- * JK_FALSE JK_FALSE/JK_TRUE ajp_connection_tcp_send_message()
returns JK_FATAL_ERROR
+ * JK_FATAL_ERROR JK_FALSE ajp_connection_tcp_send_message()
returns JK_FATAL_ERROR
* Endpoint belongs to unknown protocol.
- * XXX: The recoverability handling of JK_FATAL_ERROR returning from
- * ajp_connection_tcp_send_message() seems to be inconsistent.
- * JK_FALSE JK_TRUE ajp_connection_tcp_send_message()
returns JK_FALSE
- * Sending data in jk_tcp_socket_sendfull() returns with error.
- * JK_FALSE JK_TRUE Could not connect to backend
- * JK_SERVER_ERROR JK_TRUE Error during sending parts of POST
body to backend
+ * JK_FATAL_ERROR JK_TRUE ajp_connection_tcp_send_message()
returns JK_FALSE
+ * Sending request or request body in jk_tcp_socket_sendfull()
returns with error.
+ * JK_FATAL_ERROR JK_TRUE Could not connect to backend
* JK_CLIENT_RD_ERROR JK_FALSE Error during reading parts of POST
body from client
- * JK_TRUE JK_TRUE All other cases
+ * JK_TRUE JK_TRUE All other cases (OK)
*/
static int ajp_send_request(jk_endpoint_t *e,
jk_ws_service_t *s,
jk_logger_t *l,
ajp_endpoint_t * ae, ajp_operation_t * op)
{
- int err = 0;
+ int err_conn = 0;
+ int err_cping = 0;
+ int err_send = 0;
+ int rc;
int postlen;
JK_TRACE_ENTER(l);
@@ -1268,16 +1269,19 @@
/*
* First try to reuse open connections...
*/
+ if (!IS_VALID_SOCKET(ae->sd))
+ ajp_next_connection(ae, l);
while (IS_VALID_SOCKET(ae->sd)) {
- int rc = 0;
- err = 0;
- if (!jk_is_socket_connected(ae->sd, l)) {
+ int err = JK_FALSE;
+ if (jk_is_socket_connected(ae->sd, l) == JK_FALSE) {
ae->last_errno = errno;
- jk_log(l, JK_LOG_DEBUG,
- "(%s) socket %d is not connected any more (errno=%d)",
+ jk_log(l, JK_LOG_INFO,
+ "(%s) failed sending request, "
+ "socket %d is not connected any more (errno=%d)",
ae->worker->name, ae->sd, ae->last_errno);
ae->sd = JK_INVALID_SOCKET;
- err = 1;
+ err = JK_TRUE;
+ err_conn++;
}
if (ae->worker->prepost_timeout > 0 && !err) {
/* handle cping/cpong if prepost_timeout is set
@@ -1286,6 +1290,10 @@
*/
if (ajp_handle_cping_cpong(ae,
ae->worker->prepost_timeout, l) == JK_FALSE) {
+ jk_log(l, JK_LOG_INFO,
+ "(%s) failed sending request, "
+ "socket %d prepost cping/cpong failure (errno=%d)",
+ ae->worker->name, ae->sd, ae->last_errno);
/* XXX: Is there any reason to try other
* connections to the node if one of them fails
* the cping/cpong heartbeat?
@@ -1293,42 +1301,40 @@
* there is a chance that all other connections would
* fail as well.
*/
- err = 2;
+ err = JK_TRUE;
+ err_cping++;
}
}
- /* If we got an error or can't send data, then try to get a pooled
- * connection and try again. If we are succesful, break out of this
- * loop. */
- if (err ||
- ((rc = ajp_connection_tcp_send_message(ae, op->request, l)) !=
JK_TRUE)) {
- if (rc != JK_FATAL_ERROR) {
- if (err == 1) {
- jk_log(l, JK_LOG_DEBUG,
- "(%s) failed sending request. "
- "Will try another pooled connection",
- ae->worker->name);
- }
- else {
- jk_log(l, JK_LOG_INFO,
- "(%s) error sending request. "
- "Will try another pooled connection",
- ae->worker->name);
- }
- ajp_next_connection(ae, l);
- }
- else {
+ /* We've got a connected socket and the optional
+ * cping/cpong worked, so let's send the request now.
+ */
+ if (err == JK_FALSE) {
+ rc = ajp_connection_tcp_send_message(ae, op->request, l);
+ /* If this worked, we can break out of the loop
+ * and proceed with the request.
+ */
+ if (rc == JK_TRUE)
+ break;
+ /* Error during sending the request.
+ */
+ err_send++;
+ if (rc == JK_FATAL_ERROR)
op->recoverable = JK_FALSE;
- jk_log(l, JK_LOG_ERROR,
- "(%s) error sending request. Unrecoverable operation",
- ae->worker->name);
- jk_shutdown_socket(ae->sd, l);
- ae->sd = JK_INVALID_SOCKET;
- JK_TRACE_EXIT(l);
- return JK_FALSE;
- }
- }
- else
+ jk_log(l, JK_LOG_INFO,
+ "(%s) failed sending request (%srecoverable), "
+ "socket %d (errno=%d)",
+ ae->worker->name,
+ op->recoverable ? "" : "un",
+ ae->sd, ae->last_errno);
+ JK_TRACE_EXIT(l);
+ return JK_FATAL_ERROR;
+ }
+ /* If we got an error or can't send data, then try to steal another
pooled
+ * connection and try again. If we are not successful, break out of
this
+ * loop and try to open a new connection after the loop.
+ */
+ if (ajp_next_connection(ae, l) == JK_FALSE)
break;
}
@@ -1336,52 +1342,49 @@
* If we failed to reuse a connection, try to reconnect.
*/
if (!IS_VALID_SOCKET(ae->sd)) {
- if (err == 1) {
- /* If err is set, the tomcat is disconnected */
- jk_log(l, JK_LOG_INFO,
- "(%s) all endpoints are disconnected", ae->worker->name);
- JK_TRACE_EXIT(l);
- return JK_FALSE;
- }
- else if (err) {
- /* If err is set, the tomcat is dead */
+ /* Could not steal any connection from an endpoint - backend is
disconnected */
+ if (err_conn + err_cping + err_send > 0)
jk_log(l, JK_LOG_INFO,
- "(%s) all endpoints are dead", ae->worker->name);
- /* TODO: What is the purpose of the following log message?
- * IMO it is very confusing and does not reflect the
- * real reason (CPING/CPONG failed) of the error.
- * Further more user might deliberately set the
- * connectionTimeout and this is normal operational
- * message in that case.
- */
- jk_log(l, JK_LOG_INFO,
- "(%s) increase the backend idle connection "
- "timeout or the connection_pool_minsize",
- ae->worker->name);
- JK_TRACE_EXIT(l);
- return JK_FALSE;
- }
+ "(%s) all endpoints are disconnected, "
+ "detected by connect check (%d), cping (%d), send (%d)",
+ ae->worker->name, err_conn, err_cping, err_send);
+ else if (JK_IS_DEBUG_LEVEL(l))
+ jk_log(l, JK_LOG_DEBUG,
+ "(%s) all endpoints are disconnected, "
+ "detected by connect check (%d), cping (%d), send (%d)",
+ ae->worker->name, err_conn, err_cping, err_send);
/* Connect to the backend.
- * This can be either uninitalized connection or a reconnect.
*/
- if (ajp_connect_to_endpoint(ae, l) == JK_TRUE) {
- /*
- * After we are connected, each error that we are going to
- * have is probably unrecoverable
- */
- if (ajp_connection_tcp_send_message(ae, op->request, l) !=
JK_TRUE) {
- jk_log(l, JK_LOG_INFO,
- "(%s) error sending request on a fresh connection
(errno=%d)",
- ae->worker->name, ae->last_errno);
- JK_TRACE_EXIT(l);
- return JK_FALSE;
- }
+ if (ajp_connect_to_endpoint(ae, l) != JK_TRUE) {
+ jk_log(l, JK_LOG_ERROR,
+ "(%s) connecting to backend failed. Tomcat is probably not
started "
+ "or is listening on the wrong port (errno=%d)",
+ ae->worker->name, ae->last_errno);
+ JK_TRACE_EXIT(l);
+ return JK_FATAL_ERROR;
}
- else {
+ /* Send the request.
+ */
+ rc = ajp_connection_tcp_send_message(ae, op->request, l);
+ /* Error during sending the request.
+ */
+ if (rc != JK_TRUE) {
+ if (rc == JK_FATAL_ERROR)
+ op->recoverable = JK_FALSE;
+ jk_log(l, JK_LOG_ERROR,
+ "(%s) failed sending request on a fresh connection
(%srecoverable), "
+ "socket %d (errno=%d)",
+ ae->worker->name, op->recoverable ? "" : "un",
+ ae->sd, ae->last_errno);
JK_TRACE_EXIT(l);
- return JK_FALSE;
+ return JK_FATAL_ERROR;
}
}
+ else if (JK_IS_DEBUG_LEVEL(l))
+ jk_log(l, JK_LOG_DEBUG,
+ "(%s) Statistics about invalid connections: "
+ "connect check (%d), cping (%d), send (%d)",
+ ae->worker->name, err_conn, err_cping, err_send);
/*
* From now on an error means that we have an internal server error
@@ -1405,11 +1408,19 @@
postlen = op->post->len;
if (postlen > AJP_HEADER_LEN) {
- if (ajp_connection_tcp_send_message(ae, op->post, l) != JK_TRUE) {
- jk_log(l, JK_LOG_ERROR, "(%s) failed resending request body (%d)",
- ae->worker->name, postlen);
+ rc = ajp_connection_tcp_send_message(ae, op->post, l);
+ /* Error during sending the request body.
+ */
+ if (rc != JK_TRUE) {
+ if (rc == JK_FATAL_ERROR)
+ op->recoverable = JK_FALSE;
+ jk_log(l, JK_LOG_ERROR,
+ "(%s) failed sending request body of size %d
(%srecoverable), "
+ "socket %d (errno=%d)",
+ ae->worker->name, postlen, op->recoverable ? "" : "un",
+ ae->sd, ae->last_errno);
JK_TRACE_EXIT(l);
- return JK_SERVER_ERROR;
+ return JK_FATAL_ERROR;
}
else {
if (JK_IS_DEBUG_LEVEL(l))
@@ -1422,12 +1433,19 @@
postlen = s->reco_buf->len;
if (postlen > AJP_HEADER_LEN) {
- if (ajp_connection_tcp_send_message(ae, s->reco_buf, l) !=
JK_TRUE) {
+ rc = ajp_connection_tcp_send_message(ae, s->reco_buf, l);
+ /* Error during sending the request body.
+ */
+ if (rc != JK_TRUE) {
+ if (rc == JK_FATAL_ERROR)
+ op->recoverable = JK_FALSE;
jk_log(l, JK_LOG_ERROR,
- "(%s) failed resending request body (lb mode) (%d)",
- ae->worker->name, postlen);
+ "(%s) failed sending request body of size %d (lb mode)
(%srecoverable), "
+ "socket %d (errno=%d)",
+ ae->worker->name, postlen, op->recoverable ? "" : "un",
+ ae->sd, ae->last_errno);
JK_TRACE_EXIT(l);
- return JK_SERVER_ERROR;
+ return JK_FATAL_ERROR;
}
}
else {
@@ -1469,16 +1487,24 @@
}
s->content_read = (jk_uint64_t)len;
- if (ajp_connection_tcp_send_message(ae, op->post, l) != JK_TRUE) {
- jk_log(l, JK_LOG_ERROR, "(%s) error sending request body",
- ae->worker->name);
+ rc = ajp_connection_tcp_send_message(ae, op->post, l);
+ /* Error during sending the request body.
+ */
+ if (rc != JK_TRUE) {
+ if (rc == JK_FATAL_ERROR)
+ op->recoverable = JK_FALSE;
+ jk_log(l, JK_LOG_ERROR,
+ "(%s) failed sending request body of size %d
(%srecoverable), "
+ "socket %d (errno=%d)",
+ ae->worker->name, len, op->recoverable ? "" : "un",
+ ae->sd, ae->last_errno);
JK_TRACE_EXIT(l);
- return JK_SERVER_ERROR;
+ return JK_FATAL_ERROR;
}
}
}
JK_TRACE_EXIT(l);
- return (JK_TRUE);
+ return JK_TRUE;
}
@@ -1657,13 +1683,6 @@
/*
* get replies from Tomcat via Ajp13/Ajp14
- * We will know only at read time if the remote host closed
- * the connection (half-closed state - FIN-WAIT2). In that case
- * we must close our side of the socket and abort emission.
- * We will need another connection to send the request
- * There is need of refactoring here since we mix
- * reply reception (tomcat -> apache) and request send (apache -> tomcat)
- * and everything using the same buffer (repmsg)
* ajp13/ajp14 is async but handling read/send this way prevent nice recovery
* In fact if tomcat link is broken during upload (browser -> apache -> tomcat)
* we'll loose data and we'll have to abort the whole request.
@@ -1679,8 +1698,8 @@
* JK_STATUS_FATAL_ERROR mostly JK_TRUE ajp_process_callback() returns
JK_STATUS_FATAL_ERROR
* Recoverable, if callback didn't return with a JK_HAS_RESPONSE
before.
* JK_HAS_RESPONSE: parts of the post buffer are consumed.
- * JK_SERVER_ERROR ? ajp_process_callback() returns
JK_AJP13_ERROR
- * JK_AJP13_ERROR: protocol error.
+ * JK_FATAL_ERROR ? ajp_process_callback() returns
JK_AJP13_ERROR
+ * JK_AJP13_ERROR: protocol error, or JK_INTERNAL_ERROR: chunk size
to large
* JK_CLIENT_RD_ERROR ? ajp_process_callback() returns
JK_CLIENT_RD_ERROR
* JK_CLIENT_RD_ERROR: could not read post from client.
* JK_CLIENT_WR_ERROR ? ajp_process_callback() returns
JK_CLIENT_WR_ERROR
@@ -1743,7 +1762,6 @@
}
if (ajp_connection_tcp_get_message(p, op->reply, l) != JK_TRUE) {
- /* we just can't recover, unset recover flag */
if (headeratclient == JK_FALSE) {
jk_log(l, JK_LOG_ERROR,
"(%s) Tomcat is down or refused connection. "
@@ -1752,10 +1770,6 @@
/*
* communication with tomcat has been interrupted BEFORE
* headers have been sent to the client.
- * DISCUSSION: As we suppose that tomcat has already started
- * to process the query we think it's unrecoverable (and we
- * should not retry or switch to another tomcat in the
- * cluster).
*/
/*
@@ -1763,7 +1777,6 @@
*/
if (p->worker->recovery_opts & RECOVER_ABORT_IF_TCGETREQUEST)
op->recoverable = JK_FALSE;
-
/*
* We revert back to recoverable, if recovery_opts allow it
for GET or HEAD
*/
@@ -1778,12 +1791,6 @@
}
}
- /*
- * we want to display the webservers error page, therefore
- * we return JK_FALSE
- */
- JK_TRACE_EXIT(l);
- return JK_FALSE;
}
else {
jk_log(l, JK_LOG_ERROR,
@@ -1797,11 +1804,6 @@
* sent, therefore the response is "complete" in a sense
* that nobody should append any data, especially no 500 error
* page of the webserver!
- *
- * BUT if you retrun JK_TRUE you have a 200 (OK) code in your
- * in your apache access.log instead of a 500 (Error).
- * Therefore return FALSE/FALSE
- * return JK_TRUE;
*/
/*
@@ -1810,23 +1812,14 @@
if (p->worker->recovery_opts & RECOVER_ABORT_IF_TCSENDHEADER)
op->recoverable = JK_FALSE;
- /*
- * We revert back to recoverable, if recovery_opts allow it
for GET or HEAD
- */
- if (op->recoverable == JK_FALSE) {
- if (p->worker->recovery_opts & RECOVER_ALWAYS_HTTP_HEAD) {
- if (!strcmp(s->method, "HEAD"))
- op->recoverable = JK_TRUE;
- }
- else if (p->worker->recovery_opts &
RECOVER_ALWAYS_HTTP_GET) {
- if (!strcmp(s->method, "GET"))
- op->recoverable = JK_TRUE;
- }
- }
-
- JK_TRACE_EXIT(l);
- return JK_FALSE;
}
+
+ /*
+ * we want to display the webservers error page, therefore
+ * we return JK_FALSE
+ */
+ JK_TRACE_EXIT(l);
+ return JK_FALSE;
}
rc = ajp_process_callback(op->reply, op->post, p, s, l);
@@ -1840,13 +1833,19 @@
headeratclient = JK_TRUE;
}
else if (JK_STATUS_ERROR == rc || JK_STATUS_FATAL_ERROR == rc) {
+ jk_log(l, JK_LOG_INFO,
+ "(%s) request failed%s, "
+ "because of response status %d, ",
+ p->worker->name,
+ rc == JK_STATUS_FATAL_ERROR ? "" : " (soft)",
+ s->http_response_status);
JK_TRACE_EXIT(l);
return rc;
}
else if (JK_AJP13_HAS_RESPONSE == rc) {
/*
* in upload-mode there is no second chance since
- * we may have allready sent part of the uploaded data
+ * we may have already sent part of the uploaded data
* to Tomcat.
* In this case if Tomcat connection is broken we must
* abort request and indicate error.
@@ -1870,7 +1869,7 @@
* failover is possible.
*/
JK_TRACE_EXIT(l);
- return JK_SERVER_ERROR;
+ return JK_FATAL_ERROR;
}
else if (JK_CLIENT_RD_ERROR == rc) {
/*
@@ -1893,7 +1892,7 @@
* Internal error, like memory allocation or invalid packet
lengths.
*/
JK_TRACE_EXIT(l);
- return JK_SERVER_ERROR;
+ return JK_FATAL_ERROR;
}
else if (JK_AJP13_NO_RESPONSE == rc) {
/*
@@ -1902,10 +1901,6 @@
continue;
}
else if (rc < 0) {
- /*
- * XXX: We should disable reuse in this case to.
- * Maybe use another return value and let service() do the disable
reuse.
- */
op->recoverable = JK_FALSE;
jk_log(l, JK_LOG_ERROR,
"(%s) Callback returns with unknown value %d",
@@ -1926,27 +1921,34 @@
* We serve here the request, using AJP13/AJP14 (e->proto)
*
* Return values of service() method for ajp13/ajp14 worker:
- * return value is_error op->recoverable reason
- * JK_FALSE JK_HTTP_SERVER_ERROR UNDEF Invalid
Parameters (null values)
- * JK_SERVER_ERROR JK_HTTP_OK UNDEF Error during
initializing empty request, response or post body objects
+ * return value is_error e->recoverable reason
+ * JK_FALSE JK_HTTP_SERVER_ERROR TRUE Invalid
Parameters (null values)
+ * JK_SERVER_ERROR JK_HTTP_SERVER_ERROR TRUE Error during
initializing empty request, response or post body objects
* JK_CLIENT_ERROR JK_HTTP_REQUEST_TOO_LARGE JK_TRUE Request doesn't
fit into buffer (error during ajp_marshal_into_msgb())
* JK_CLIENT_ERROR JK_HTTP_BAD_REQUEST JK_FALSE Error during
reading parts of POST body from client
- * JK_SERVER_ERROR JK_HTTP_SERVER_ERROR JK_FALSE If
ajp_send_request() returns JK_TRUE but !op->recoverable.
+ * JK_FATAL_ERROR JK_HTTP_SERVER_ERROR JK_FALSE If
ajp_send_request() returns JK_FATAL_ERROR and !op->recoverable.
+ * JK_FATAL_ERROR JK_HTTP_SERVER_ERROR JK_FALSE If
ajp_send_request() returns JK_TRUE but !op->recoverable.
* This should never happen.
* JK_CLIENT_ERROR JK_HTTP_BAD_REQUEST ? ajp_get_reply()
returns JK_CLIENT_RD_ERROR
* JK_CLIENT_ERROR JK_HTTP_OK ? ajp_get_reply()
returns JK_CLIENT_WR_ERROR
- * JK_SERVER_ERROR JK_HTTP_SERVER_ERROR ? ajp_get_reply()
returns JK_SERVER_ERROR
- * JK_SERVER_ERROR: protocol error or internal error
- * JK_REPLY_TIMEOUT JK_HTTP_GATEWAY_TIME_OUT JK_FALSE ajp_get_reply()
returns JK_REPLY_TIMEOUT
- * Only if !op->recoverable
- * JK_FALSE JK_HTTP_GATEWAY_TIME_OUT JK_FALSE ajp_get_reply()
returns something else
- * Only if !op->recoverable
- * JK_REPLY_TIMEOUT JK_HTTP_GATEWAY_TIME_OUT JK_TRUE ajp_get_reply()
returns JK_REPLY_TIMEOUT
+ * JK_FATAL_ERROR JK_HTTP_SERVER_ERROR JK_TRUE ajp_get_reply()
returns JK_FATAL_ERROR
+ * JK_FATAL_ERROR: protocol error or internal error
+ * JK_FATAL_ERROR JK_HTTP_SERVER_ERROR JK_FALSE ajp_get_reply()
returns JK_FATAL_ERROR
+ * JK_FATAL_ERROR: protocol error or internal error
+ * JK_STATUS_ERROR JK_HTTP_SERVER_BUSY JK_TRUE ajp_get_reply()
returns JK_STATUS_ERROR
* Only if op->recoverable and no more ajp13/ajp14 direct retries
- * JK_STATUS_ERROR JK_STATUS_ERROR JK_TRUE ajp_get_reply()
returns JK_STATUS_ERROR
+ * JK_STATUS_ERROR JK_HTTP_SERVER_BUSY JK_FALSE ajp_get_reply()
returns JK_STATUS_ERROR
+ * Only if !op->recoverable
+ * JK_STATUS_FATAL_ERROR JK_HTTP_SERVER_BUSY JK_TRUE ajp_get_reply()
returns JK_STATUS_ERROR
* Only if op->recoverable and no more ajp13/ajp14 direct retries
- * JK_FALSE JK_HTTP_SERVER_BUSY JK_TRUE ajp_get_reply()
returns JK_FALSE or JK_STATUS_FATAL_ERROR
+ * JK_STATUS_FATAL_ERROR JK_HTTP_SERVER_BUSY JK_FALSE ajp_get_reply()
returns JK_STATUS_FATAL_ERROR
+ * Only if !op->recoverable
+ * JK_REPLY_TIMEOUT JK_HTTP_GATEWAY_TIME_OUT JK_TRUE ajp_get_reply()
returns JK_REPLY_TIMEOUT
+ * ??? JK_FATAL_ERROR JK_HTTP_GATEWAY_TIME_OUT JK_FALSE
ajp_get_reply() returns something else
+ * Only if !op->recoverable
+ * ??? JK_FALSE JK_HTTP_SERVER_BUSY JK_TRUE
ajp_get_reply() returns JK_FALSE
* Only if op->recoverable and no more ajp13/ajp14 direct retries
+ * JK_TRUE JK_HTTP_OK ? OK
*/
static int JK_METHOD ajp_service(jk_endpoint_t *e,
jk_ws_service_t *s,
@@ -1957,6 +1959,9 @@
ajp_operation_t oper;
ajp_operation_t *op = &oper;
ajp_endpoint_t *p;
+ int log_error;
+ int rc = JK_UNSET;
+ char *msg;
JK_TRACE_ENTER(l);
@@ -1970,19 +1975,17 @@
p = e->endpoint_private;
- /* Set returned error to OK */
- *is_error = JK_HTTP_OK;
+ /* Set returned error to SERVER ERROR */
+ *is_error = JK_HTTP_SERVER_ERROR;
op->request = jk_b_new(&(p->pool));
if (!op->request) {
- *is_error = JK_HTTP_SERVER_ERROR;
jk_log(l, JK_LOG_ERROR,
"Failed allocating AJP message");
JK_TRACE_EXIT(l);
return JK_SERVER_ERROR;
}
if (jk_b_set_buffer_size(op->request, p->worker->max_packet_size)) {
- *is_error = JK_HTTP_SERVER_ERROR;
jk_log(l, JK_LOG_ERROR,
"Failed allocating AJP message buffer");
JK_TRACE_EXIT(l);
@@ -1992,14 +1995,12 @@
op->reply = jk_b_new(&(p->pool));
if (!op->reply) {
- *is_error = JK_HTTP_SERVER_ERROR;
jk_log(l, JK_LOG_ERROR,
"Failed allocating AJP message");
JK_TRACE_EXIT(l);
return JK_SERVER_ERROR;
}
if (jk_b_set_buffer_size(op->reply, p->worker->max_packet_size)) {
- *is_error = JK_HTTP_SERVER_ERROR;
jk_log(l, JK_LOG_ERROR,
"Failed allocating AJP message buffer");
JK_TRACE_EXIT(l);
@@ -2009,14 +2010,12 @@
op->post = jk_b_new(&(p->pool));
if (!op->post) {
- *is_error = JK_HTTP_SERVER_ERROR;
jk_log(l, JK_LOG_ERROR,
"Failed allocating AJP message");
JK_TRACE_EXIT(l);
return JK_SERVER_ERROR;
}
if (jk_b_set_buffer_size(op->post, p->worker->max_packet_size)) {
- *is_error = JK_HTTP_SERVER_ERROR;
jk_log(l, JK_LOG_ERROR,
"Failed allocating AJP message buffer");
JK_TRACE_EXIT(l);
@@ -2024,6 +2023,9 @@
}
jk_b_reset(op->post);
+ /* Set returned error to OK */
+ *is_error = JK_HTTP_OK;
+
op->recoverable = JK_TRUE;
op->uploadfd = -1; /* not yet used, later ;) */
@@ -2048,55 +2050,46 @@
jk_log(l, JK_LOG_DEBUG, "processing %s with %d retries",
p->worker->name, p->worker->worker.retries);
}
- /*
- * JK_RETRIES could be replaced by the number of workers in
- * a load-balancing configuration
- */
for (i = 0; i < p->worker->worker.retries; i++) {
/*
* We're using op->request which hold initial request
* if Tomcat is stopped or restarted, we will pass op->request
* to next valid tomcat.
*/
+ log_error = JK_TRUE;
+ rc = JK_UNSET;
+ msg = "";
err = ajp_send_request(e, s, l, p, op);
+ e->recoverable = op->recoverable;
if (err == JK_CLIENT_RD_ERROR) {
*is_error = JK_HTTP_BAD_REQUEST;
- if (p->worker->recovery_opts & RECOVER_ABORT_IF_CLIENTERROR) {
- /* Mark the endpoint for shutdown */
- p->reuse = JK_FALSE;
- }
- jk_log(l, JK_LOG_INFO,
- "(%s) sending request to tomcat failed, "
- "because of client read error "
- "without recovery in send loop attempt=%d",
- p->worker->name, i);
- JK_TRACE_EXIT(l);
- return JK_CLIENT_ERROR;
- }
- else if (err == JK_FALSE) {
- jk_log(l, JK_LOG_INFO,
- "(%s) sending request to tomcat failed, "
- "recoverable operation attempt=%d",
- p->worker->name, i);
- }
- else if (err == JK_TRUE) {
- /* XXX: this should never happen:
- * ajp_send_request() never returns JK_TRUE if
!op->recoverable.
- */
+ msg = "because of client read error";
+ rc = JK_CLIENT_ERROR;
+ log_error = JK_FALSE;
+ e->recoverable = JK_FALSE;
+
+ /* This doesn't make sense, because we already set reuse */
+ /* to JK_FALSE at the beginning of service() and only set it to
true again after */
+ /* the whole response has beend received (callback
JK_AJP13_END_RESPONSE). */
+// if (p->worker->recovery_opts & RECOVER_ABORT_IF_CLIENTERROR) {
+// /* Mark the endpoint for shutdown */
+// p->reuse = JK_FALSE;
+// }
+ }
+ else if (err == JK_FATAL_ERROR) {
+ *is_error = JK_HTTP_SERVER_BUSY;
+ msg = "because of error during request sending";
+ rc = err;
if (!op->recoverable) {
*is_error = JK_HTTP_SERVER_ERROR;
- jk_log(l, JK_LOG_ERROR,
- "(%s) sending request to tomcat failed "
- "because of an unknown reason "
- "without recovery in send loop %d",
- p->worker->name, i);
- JK_TRACE_EXIT(l);
- return JK_SERVER_ERROR;
+ msg = "because of protocol error during request sending";
}
-
+ }
+ else if (err == JK_TRUE && op->recoverable) {
/* Up to there we can recover */
err = ajp_get_reply(e, s, l, p, op);
+ e->recoverable = op->recoverable;
if (err == JK_TRUE) {
*is_error = JK_HTTP_OK;
/* Done with the request */
@@ -2106,79 +2099,61 @@
if (err == JK_CLIENT_RD_ERROR) {
*is_error = JK_HTTP_BAD_REQUEST;
- if (p->worker->recovery_opts & RECOVER_ABORT_IF_CLIENTERROR) {
- /* Mark the endpoint for shutdown */
- p->reuse = JK_FALSE;
- }
- jk_log(l, JK_LOG_INFO,
- "(%s) request failed, "
- "because of client read error "
- "without recovery in send loop attempt=%d",
- p->worker->name, i);
- JK_TRACE_EXIT(l);
- return JK_CLIENT_ERROR;
+ msg = "because of client read error";
+ rc = JK_CLIENT_ERROR;
+ log_error = JK_FALSE;
+ e->recoverable = JK_FALSE;
+ /* This doesn't make sense, because we already set reuse */
+ /* to JK_FALSE at the beginning of service() and only set it
to true again after */
+ /* the whole response has beend received (callback
JK_AJP13_END_RESPONSE). */
+// if (p->worker->recovery_opts & RECOVER_ABORT_IF_CLIENTERROR)
{
+// /* Mark the endpoint for shutdown */
+// p->reuse = JK_FALSE;
+// }
}
else if (err == JK_CLIENT_WR_ERROR) {
/* XXX: Is this correct to log this as 200? */
*is_error = JK_HTTP_OK;
- if (p->worker->recovery_opts & RECOVER_ABORT_IF_CLIENTERROR) {
- /* Mark the endpoint for shutdown */
- p->reuse = JK_FALSE;
- }
- jk_log(l, JK_LOG_INFO,
- "(%s) request failed, "
- "because of client write error "
- "without recovery in send loop attempt=%d",
- p->worker->name, i);
- JK_TRACE_EXIT(l);
- return JK_CLIENT_ERROR;
+ msg = "because of client write error";
+ rc = JK_CLIENT_ERROR;
+ log_error = JK_FALSE;
+ e->recoverable = JK_FALSE;
+ /* This doesn't make sense, because we already set reuse */
+ /* to JK_FALSE at the beginning of service() and only set it
to true again after */
+ /* the whole response has beend received (callback
JK_AJP13_END_RESPONSE). */
+// if (p->worker->recovery_opts & RECOVER_ABORT_IF_CLIENTERROR)
{
+// /* Mark the endpoint for shutdown */
+// p->reuse = JK_FALSE;
+// }
}
- else if (err == JK_SERVER_ERROR) {
+ else if (err == JK_FATAL_ERROR) {
*is_error = JK_HTTP_SERVER_ERROR;
- jk_log(l, JK_LOG_INFO,
- "(%s) request failed, "
- "because of server error "
- "without recovery in send loop attempt=%d",
- p->worker->name, i);
- JK_TRACE_EXIT(l);
- return JK_SERVER_ERROR;
+ msg = "because of server error";
+ rc = err;
+ }
+ else if (err == JK_REPLY_TIMEOUT) {
+ *is_error = JK_HTTP_GATEWAY_TIME_OUT;
+ msg = "because of reply timeout";
+ rc = err;
}
else if (err == JK_STATUS_ERROR || err == JK_STATUS_FATAL_ERROR) {
- jk_log(l, JK_LOG_INFO,
- "(%s) request failed%s, "
- "because of response status %d, "
- "recoverable operation attempt=%d",
- p->worker->name,
- err == JK_STATUS_FATAL_ERROR ? "" : " (soft)",
- s->http_response_status, i);
+ *is_error = JK_HTTP_SERVER_BUSY;
+ msg = "because of response status";
+ rc = err;
if (i >= JK_RETRIES) {
jk_sleep(JK_SLEEP_DEF);
}
}
+ /* This should only be the cases err == JK_FALSE */
else {
/* if we can't get reply, check if unrecoverable flag was set
* if is_recoverable_error is cleared, we have started
* receiving upload data and we must consider that
* operation is no more recoverable
*/
- if (!op->recoverable) {
- jk_log(l, JK_LOG_ERROR,
- "(%s) receiving reply from tomcat failed "
- "without recovery in send loop attempt=%d",
- p->worker->name, i);
- if (err == JK_REPLY_TIMEOUT) {
- *is_error = JK_HTTP_GATEWAY_TIME_OUT;
- JK_TRACE_EXIT(l);
- return JK_REPLY_TIMEOUT;
- }
- *is_error = JK_HTTP_BAD_GATEWAY;
- JK_TRACE_EXIT(l);
- return JK_FALSE;
- }
- jk_log(l, JK_LOG_INFO,
- "(%s) receiving from tomcat failed, "
- "recoverable operation attempt=%d",
- p->worker->name, i);
+ *is_error = JK_HTTP_BAD_GATEWAY;
+ msg = "";
+ rc = JK_FALSE;
/* Check for custom retries */
if (i >= JK_RETRIES) {
jk_sleep(JK_SLEEP_DEF);
@@ -2186,44 +2161,53 @@
}
}
else {
- /*
- * This should never happen.
- * XXX: What is a good return value?
- * XXX: What is a good is_error value?
- * XXX: Do we need to set recoverable and/or reuse to false?
+ /* XXX: this should never happen:
+ * ajp_send_request() never returns JK_TRUE if
!op->recoverable.
+ * and all other return values have already been handled.
*/
- op->recoverable = JK_FALSE;
+ e->recoverable = JK_FALSE;
+ *is_error = JK_HTTP_SERVER_ERROR;
+ msg = "because of an unknown reason";
+ rc = JK_FATAL_ERROR;
+ jk_log(l, JK_LOG_ERROR,
+ "(%s) unexpected condition err=%d recoverable=%d",
+ p->worker->name, err, op->recoverable);
+ }
+ if (!op->recoverable && log_error == JK_TRUE) {
jk_log(l, JK_LOG_ERROR,
- "(%s) sending request to tomcat failed with unknown value
%d",
- p->worker->name, err);
+ "(%s) sending request to tomcat failed (unrecoverable), "
+ "%s "
+ "(attempt=%d)",
+ p->worker->name, msg, i + 1);
+ }
+ else {
+ jk_log(l, JK_LOG_INFO,
+ "(%s) sending request to tomcat failed (%srecoverable), "
+ "%s "
+ "(attempt=%d)",
+ p->worker->name,
+ op->recoverable ? "" : "un",
+ msg, i + 1);
+ }
+ if (!op->recoverable) {
JK_TRACE_EXIT(l);
- return JK_FALSE;
+ return rc;
}
/* Get another connection from the pool and try again.
* Note: All sockets are probably closed already.
*/
ajp_next_connection(p, l);
}
- *is_error = JK_HTTP_SERVER_BUSY;
/* Log the error only once per failed request. */
jk_log(l, JK_LOG_ERROR,
"(%s) Connecting to tomcat failed. Tomcat is probably not started "
"or is listening on the wrong port",
p->worker->name);
- if (err == JK_STATUS_ERROR) {
- JK_TRACE_EXIT(l);
- return JK_STATUS_ERROR;
- }
-
- if (err == JK_REPLY_TIMEOUT) {
- *is_error = JK_HTTP_GATEWAY_TIME_OUT;
- JK_TRACE_EXIT(l);
- return JK_REPLY_TIMEOUT;
- }
+/* XXX: Do we need to fix rc or is_error before returning? */
JK_TRACE_EXIT(l);
- return JK_FALSE;
+ return rc;
}
/*
@@ -2523,7 +2507,7 @@
if (rc) {
int i;
- for(i = w->ep_cache_sz - 1; i >= 0; i--) {
+ for (i = w->ep_cache_sz - 1; i >= 0; i--) {
if (w->ep_cache[i] == NULL) {
w->ep_cache[i] = p;
break;
Modified: tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c
URL:
http://svn.apache.org/viewvc/tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c?rev=604393&r1=604392&r2=604393&view=diff
==============================================================================
--- tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c (original)
+++ tomcat/connectors/trunk/jk/native/common/jk_lb_worker.c Fri Dec 14 21:38:45
2007
@@ -886,6 +886,7 @@
int num_of_workers;
int first = 1;
int was_forced = 0;
+ int recoverable = JK_TRUE;
int rc = JK_UNSET;
char *sessionid = NULL;
@@ -940,9 +941,11 @@
"service sticky_session=%d id='%s'",
p->worker->sticky_session, sessionid ? sessionid : "empty");
- while (attempt <= num_of_workers && rc == JK_UNSET) {
+ while (attempt <= num_of_workers && recoverable == JK_TRUE) {
worker_record_t *rec =
get_most_suitable_worker(p->worker, sessionid, s, l);
+ rc = JK_FALSE;
+ *is_error = JK_HTTP_SERVER_BUSY;
/* Do not reuse previous worker, because
* that worker already failed.
*/
@@ -1009,6 +1012,7 @@
* this request.
*/
end->rd = end->wr = 0;
+ end->recoverable = JK_TRUE;
if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
jk_shm_lock();
@@ -1031,6 +1035,8 @@
service_stat = end->service(end, s, l, &is_service_error);
rd = end->rd;
wr = end->wr;
+ recoverable = end->recoverable;
+ *is_error = is_service_error;
end->done(&end, l);
if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
@@ -1080,6 +1086,7 @@
rec->s->state = JK_LB_STATE_OK;
rec->s->error_time = 0;
rc = JK_TRUE;
+ recoverable = JK_UNSET;
}
else if (service_stat == JK_CLIENT_ERROR) {
/*
@@ -1089,51 +1096,82 @@
rec->s->client_errors++;
rec->s->state = JK_LB_STATE_OK;
rec->s->error_time = 0;
- jk_log(l, JK_LOG_INFO,
- "unrecoverable error %d, request failed."
- " Client failed in the middle of request,"
- " we can't recover to another instance.",
- is_service_error);
- *is_error = is_service_error;
rc = JK_CLIENT_ERROR;
+ recoverable = JK_FALSE;
}
- else {
- if (is_service_error != JK_HTTP_SERVER_BUSY) {
- /*
- * Error is not recoverable - break with an error.
- */
- jk_log(l, JK_LOG_ERROR,
- "unrecoverable error %d, request failed."
- " Tomcat failed in the middle of request,"
- " we can't recover to another instance.",
- is_service_error);
- *is_error = is_service_error;
- rc = JK_FALSE;
- }
- if (service_stat == JK_REPLY_TIMEOUT) {
- rec->s->reply_timeouts++;
- }
- if (service_stat != JK_STATUS_ERROR &&
- (service_stat != JK_REPLY_TIMEOUT ||
- rec->s->reply_timeouts >
(unsigned)p->worker->s->max_reply_timeouts)) {
-
+ else if (service_stat == JK_SERVER_ERROR) {
+ /*
+ * Internal JK server error
+ * Don't mark the node as bad.
+ * Failing over to another node could help.
+ */
+ rec->s->state = JK_LB_STATE_OK;
+ rec->s->error_time = 0;
+ rc = JK_FALSE;
+ }
+ else if (service_stat == JK_STATUS_ERROR) {
+ /*
+ * Status code configured as service is down.
+ * Don't mark the node as bad.
+ * Failing over to another node could help.
+ */
+ rec->s->state = JK_LB_STATE_OK;
+ rec->s->error_time = 0;
+ rc = JK_FALSE;
+ }
+ else if (service_stat == JK_STATUS_FATAL_ERROR) {
+ /*
+ * Status code configured as service is down.
+ * Mark the node as bad.
+ * Failing over to another node could help.
+ */
+ rec->s->errors++;
+ rec->s->state = JK_LB_STATE_ERROR;
+ rec->s->error_time = time(NULL);
+ rc = JK_FALSE;
+ }
+ else if (service_stat == JK_REPLY_TIMEOUT) {
+ rec->s->reply_timeouts++;
+ if (rec->s->reply_timeouts >
(unsigned)p->worker->s->max_reply_timeouts) {
/*
- * Service failed !!!
- * Time for fault tolerance (if possible)...
+ * Service failed - to many reply timeouts
+ * Take this node out of service.
*/
-
rec->s->errors++;
rec->s->state = JK_LB_STATE_ERROR;
rec->s->error_time = time(NULL);
- jk_log(l, JK_LOG_INFO,
- "service failed, worker %s is in error state",
- rec->s->name);
}
+ else {
+ /*
+ * XXX: if we want to be able to failover
+ * to other nodes after a reply timeout,
+ * but we do not put the timeout node into error,
+ * how can we make sure, that we actually fail over
+ * to other nodes?
+ */
+ rec->s->state = JK_LB_STATE_OK;
+ rec->s->error_time = 0;
+ }
+ rc = JK_FALSE;
+ }
+ else {
+ /*
+ * Service failed !!!
+ * Time for fault tolerance (if possible)...
+ */
+ rec->s->errors++;
+ rec->s->state = JK_LB_STATE_ERROR;
+ rec->s->error_time = time(NULL);
+ rc = JK_FALSE;
}
+ if (rec->s->state == JK_LB_STATE_ERROR)
+ jk_log(l, JK_LOG_ERROR,
+ "service failed, worker %s is in error state",
+ rec->s->name);
if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
jk_shm_unlock();
}
- if ( rc == JK_UNSET ) {
+ if (recoverable == JK_TRUE) {
/*
* Error is recoverable by submitting the request to
* another worker... Lets try to do that.
@@ -1142,6 +1180,23 @@
jk_log(l, JK_LOG_DEBUG,
"recoverable error... will try to recover on other
worker");
}
+ else {
+ /*
+ * Error is not recoverable - break with an error.
+ */
+ if (rc == JK_CLIENT_ERROR)
+ jk_log(l, JK_LOG_INFO,
+ "unrecoverable error %d, request failed."
+ " Client failed in the middle of request,"
+ " we can't recover to another instance.",
+ *is_error);
+ else if (rc != JK_TRUE)
+ jk_log(l, JK_LOG_ERROR,
+ "unrecoverable error %d, request failed."
+ " Tomcat failed in the middle of request,"
+ " we can't recover to another instance.",
+ *is_error);
+ }
if (first == 1 && s->add_log_items) {
first = 0;
lb_add_log_items(s, lb_first_log_names, prec, l);
@@ -1163,7 +1218,6 @@
* Reset the service loop and go again
*/
prec = NULL;
- rc = JK_UNSET;
jk_log(l, JK_LOG_INFO,
"Forcing recovery once for %d workers", nf);
continue;
@@ -1187,12 +1241,10 @@
}
attempt++;
}
- if ( rc == JK_UNSET ) {
+ if ( recoverable == JK_TRUE ) {
jk_log(l, JK_LOG_INFO,
"All tomcat instances are busy or in error state");
- /* Set error to Timeout */
- *is_error = JK_HTTP_SERVER_BUSY;
- rc = JK_FALSE;
+ /* rc and http error must be set above */
}
if (prec && s->add_log_items) {
lb_add_log_items(s, lb_last_log_names, prec, l);
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]