[ https://issues.apache.org/jira/browse/DISPATCH-2046?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Alex Ward updated DISPATCH-2046: -------------------------------- Description: I am seeing a segv at line 1063 due to ctx->connector being 0 in server.c:handle(). {code:java} (ugdb-x86_64-7.11-261) p ctx $6 = (qd_connection_t *) 0x802e55890 (ugdb-x86_64-7.11-261) p *ctx $7 = {prev = 0x8023df490, next = 0x802e7a890, name = 0x0, server = 0x80220d0c0, opened = false, closed = false, closed_locally = false, enqueued = 0, timer = 0x0, pn_conn = 0x8023ec050, pn_sessions = {0x0 <repeats 15 times>}, ssl = 0x0, listener = 0x0, connector = 0x0, connector_lock = 0x8023e6600, context = 0x0, user_context = 0x0, link_context = 0x0, connection_id = 12, user_id = 0x0, free_user_id = false, policy_settings = 0x0, n_sessions = 0, n_senders = 0, n_receivers = 0, open_container = 0x0, deferred_calls = {head = 0x0, tail = 0x0, scratch = 0x0, size = 0}, deferred_call_lock = 0x8023e65c0, policy_counted = false, role = 0x801a17660 "route-container", free_link_session_list = {head = 0x0, tail = 0x0, scratch = 0x0, size = 0}, strip_annotations_in = false, strip_annotations_out = false, wake = 0x8005d6e20 <connection_wake>, rhost = '\000' <repeats 1024 times>, rhost_port = '\000' <repeats 1056 times>} {code} As the comment at the start of handle() states, there is only one event being handled at a time, but it doesn’t look like there is anything to stop someone clearing ctx->connector between lines 1055 and 1063. The python code seems to be coming in and deleting the connector while handle() is using it. [https://github.com/apache/qpid-dispatch/blob/main/src/server.c] {code:java} 989 /* Events involving a connection or listener are serialized by the proactor so 990 * only one event per connection / listener will be processed at a time. 991 */ 992 static bool handle(qd_server_t *qd_server, pn_event_t *e, pn_connection_t *pn_conn, qd_connection_t *ctx) 993 { 994 if (pn_conn && qdr_is_authentication_service_connection(pn_conn)) { 995 qdr_handle_authentication_service_connection_event(e); 996 return true; 997 } 998 999 switch (pn_event_type(e)) { … 1051 case PN_TRANSPORT_ERROR: 1052 { 1053 pn_transport_t *transport = pn_event_transport(e); 1054 pn_condition_t* condition = transport ? pn_transport_condition(transport) : NULL; 1055 if (ctx && ctx->connector) { /* Outgoing connection */ 1056 qd_increment_conn_index(ctx); 1057 const qd_server_config_t *config = &ctx->connector->config; 1058 ctx->connector->state = CXTR_STATE_FAILED; 1059 char conn_msg[300]; 1060 if (condition && pn_condition_is_set(condition)) { 1061 qd_format_string(conn_msg, 300, "[C%"PRIu64"] Connection to %s failed: %s %s", ctx->connection_id, config->host_port, 1062 pn_condition_get_name(condition), pn_condition_get_description(condition)); 1063 strcpy(ctx->connector->conn_msg, conn_msg); {code} Another thread is at line 1063 in qd_connection_manager_delete_connector so it looks like this thread just cleared ctx->connector that is being used in handle. What is meant to be preventing this happening? [https://github.com/apache/qpid-dispatch/blob/main/src/connection_manager.c] {code:java} 1055 void qd_connection_manager_delete_connector(qd_dispatch_t *qd, void *impl) 1056 { 1057 qd_connector_t *ct = (qd_connector_t*) impl; 1058 if (ct) { 1059 sys_mutex_lock(ct->lock); 1060 if (ct->ctx) { 1061 ct->ctx->connector = 0; 1062 if(ct->ctx->pn_conn) 1063 qd_connection_invoke_deferred(ct->ctx, deferred_close, ct->ctx->pn_conn); 1064 1065 } 1066 sys_mutex_unlock(ct->lock); 1067 DEQ_REMOVE(qd->connection_manager->connectors, ct); 1068 qd_connector_decref(ct); 1069 } 1070 } {code} Does anyone have any suggestions for how to prevent this? Doesn't look like I could use ct->lock in handle(). was: I am seeing a segv at line 1063 due to ctx->connector being 0 in server.c:handle(). {code:java} (ugdb-x86_64-7.11-261) p *ctx $7 = {prev = 0x8023df490, next = 0x802e7a890, name = 0x0, server = 0x80220d0c0, opened = false, closed = false, closed_locally = false, enqueued = 0, timer = 0x0, pn_conn = 0x8023ec050, pn_sessions = {0x0 <repeats 15 times>}, ssl = 0x0, listener = 0x0, connector = 0x0, connector_lock = 0x8023e6600, context = 0x0, user_context = 0x0, link_context = 0x0, connection_id = 12, user_id = 0x0, free_user_id = false, policy_settings = 0x0, n_sessions = 0, n_senders = 0, n_receivers = 0, open_container = 0x0, deferred_calls = {head = 0x0, tail = 0x0, scratch = 0x0, size = 0}, deferred_call_lock = 0x8023e65c0, policy_counted = false, role = 0x801a17660 "route-container", free_link_session_list = {head = 0x0, tail = 0x0, scratch = 0x0, size = 0}, strip_annotations_in = false, strip_annotations_out = false, wake = 0x8005d6e20 <connection_wake>, rhost = '\000' <repeats 1024 times>, rhost_port = '\000' <repeats 1056 times>} {code} {noformat} (ugdb-x86_64-7.11-261) p *ctx $7 = {prev = 0x8023df490, next = 0x802e7a890, name = 0x0, server = 0x80220d0c0, opened = false, closed = false, closed_locally = false, enqueued = 0, timer = 0x0, pn_conn = 0x8023ec050, pn_sessions = {0x0 <repeats 15 times>}, ssl = 0x0, listener = 0x0, connector = 0x0, connector_lock = 0x8023e6600, context = 0x0, user_context = 0x0, link_context = 0x0, connection_id = 12, user_id = 0x0, free_user_id = false, policy_settings = 0x0, n_sessions = 0, n_senders = 0, n_receivers = 0, open_container = 0x0, deferred_calls = {head = 0x0, tail = 0x0, scratch = 0x0, size = 0}, deferred_call_lock = 0x8023e65c0, policy_counted = false, role = 0x801a17660 "route-container", free_link_session_list = {head = 0x0, tail = 0x0, scratch = 0x0, size = 0}, strip_annotations_in = false, strip_annotations_out = false, wake = 0x8005d6e20 <connection_wake>, rhost = '\000' <repeats 1024 times>, rhost_port = '\000' <repeats 1056 times>} {noformat} As the comment at the start of handle() states, there is only one event being handled at a time, but it doesn’t look like there is anything to stop someone clearing ctx->connector between lines 1055 and 1063. The python code seems to be comin in and deleting the connector while handle is using it [https://github.com/apache/qpid-dispatch/blob/main/src/server.c] {code:java} 989 /* Events involving a connection or listener are serialized by the proactor so 990 * only one event per connection / listener will be processed at a time. 991 */ 992 static bool handle(qd_server_t *qd_server, pn_event_t *e, pn_connection_t *pn_conn, qd_connection_t *ctx) 993 { 994 if (pn_conn && qdr_is_authentication_service_connection(pn_conn)) { 995 qdr_handle_authentication_service_connection_event(e); 996 return true; 997 } 998 999 switch (pn_event_type(e)) { … 1051 case PN_TRANSPORT_ERROR: 1052 { 1053 pn_transport_t *transport = pn_event_transport(e); 1054 pn_condition_t* condition = transport ? pn_transport_condition(transport) : NULL; 1055 if (ctx && ctx->connector) { /* Outgoing connection */ 1056 qd_increment_conn_index(ctx); 1057 const qd_server_config_t *config = &ctx->connector->config; 1058 ctx->connector->state = CXTR_STATE_FAILED; 1059 char conn_msg[300]; 1060 if (condition && pn_condition_is_set(condition)) { 1061 qd_format_string(conn_msg, 300, "[C%"PRIu64"] Connection to %s failed: %s %s", ctx->connection_id, config->host_port, 1062 pn_condition_get_name(condition), pn_condition_get_description(condition)); 1063 strcpy(ctx->connector->conn_msg, conn_msg); {code} Another thread is at line 1063 in qd_connection_manager_delete_connector so it looks like this thread just cleared ctx->connector that is being used in handle. What is meant to be preventing this happening? [https://github.com/apache/qpid-dispatch/blob/main/src/connection_manager.c] {code:java} 1055 void qd_connection_manager_delete_connector(qd_dispatch_t *qd, void *impl) 1056 { 1057 qd_connector_t *ct = (qd_connector_t*) impl; 1058 if (ct) { 1059 sys_mutex_lock(ct->lock); 1060 if (ct->ctx) { 1061 ct->ctx->connector = 0; 1062 if(ct->ctx->pn_conn) 1063 qd_connection_invoke_deferred(ct->ctx, deferred_close, ct->ctx->pn_conn); 1064 1065 } 1066 sys_mutex_unlock(ct->lock); 1067 DEQ_REMOVE(qd->connection_manager->connectors, ct); 1068 qd_connector_decref(ct); 1069 } 1070 } {code} Does anyone have any suggestions for how to prevent this? Doesn't look like I could use ct->lock in handle(). > Panic in handle due to deleted connector. > ----------------------------------------- > > Key: DISPATCH-2046 > URL: https://issues.apache.org/jira/browse/DISPATCH-2046 > Project: Qpid Dispatch > Issue Type: Bug > Components: Routing Engine > Affects Versions: 1.15.0 > Reporter: Alex Ward > Priority: Major > > I am seeing a segv at line 1063 due to ctx->connector being 0 in > server.c:handle(). > {code:java} > (ugdb-x86_64-7.11-261) p ctx > $6 = (qd_connection_t *) 0x802e55890 > (ugdb-x86_64-7.11-261) p *ctx > $7 = {prev = 0x8023df490, next = 0x802e7a890, name = 0x0, server = > 0x80220d0c0, opened = false, closed = false, closed_locally = false, enqueued > = 0, timer = 0x0, pn_conn = 0x8023ec050, pn_sessions = {0x0 <repeats 15 > times>}, ssl = 0x0, listener = 0x0, connector = 0x0, connector_lock = > 0x8023e6600, context = 0x0, user_context = 0x0, link_context = 0x0, > connection_id = 12, user_id = 0x0, free_user_id = false, policy_settings = > 0x0, n_sessions = 0, n_senders = 0, n_receivers = 0, open_container = 0x0, > deferred_calls = {head = 0x0, tail = 0x0, scratch = 0x0, size = 0}, > deferred_call_lock = 0x8023e65c0, policy_counted = false, role = 0x801a17660 > "route-container", free_link_session_list = {head = 0x0, tail = 0x0, scratch > = 0x0, size = 0}, strip_annotations_in = false, strip_annotations_out = > false, wake = 0x8005d6e20 <connection_wake>, rhost = '\000' <repeats 1024 > times>, rhost_port = '\000' <repeats 1056 times>} > {code} > As the comment at the start of handle() states, there is only one event being > handled at a time, but it doesn’t look like there is anything to stop someone > clearing ctx->connector between lines 1055 and 1063. The python code seems to > be coming in and deleting the connector while handle() is using it. > [https://github.com/apache/qpid-dispatch/blob/main/src/server.c] > {code:java} > 989 /* Events involving a connection or listener are serialized by the > proactor so > 990 * only one event per connection / listener will be processed at a time. > > 991 */ > 992 static bool handle(qd_server_t *qd_server, pn_event_t *e, > pn_connection_t *pn_conn, qd_connection_t *ctx) > 993 { > > 994 if (pn_conn && qdr_is_authentication_service_connection(pn_conn)) { > > 995 qdr_handle_authentication_service_connection_event(e); > > 996 return true; > > 997 } > > 998 > > 999 switch (pn_event_type(e)) { > … > 1051 case PN_TRANSPORT_ERROR: > > 1052 { > > 1053 pn_transport_t *transport = pn_event_transport(e); > > 1054 pn_condition_t* condition = transport ? > pn_transport_condition(transport) : NULL; > 1055 if (ctx && ctx->connector) { /* Outgoing connection */ > > 1056 qd_increment_conn_index(ctx); > > 1057 const qd_server_config_t *config = > &ctx->connector->config; > 1058 ctx->connector->state = CXTR_STATE_FAILED; > > 1059 char conn_msg[300]; > > 1060 if (condition && pn_condition_is_set(condition)) { > > 1061 qd_format_string(conn_msg, 300, "[C%"PRIu64"] > Connection to %s failed: %s %s", ctx->connection_id, config->host_port, > > 1062 pn_condition_get_name(condition), > pn_condition_get_description(condition)); > 1063 strcpy(ctx->connector->conn_msg, conn_msg); > {code} > Another thread is at line 1063 in qd_connection_manager_delete_connector so > it looks like this thread just cleared ctx->connector that is being used in > handle. What is meant to be preventing this happening? > [https://github.com/apache/qpid-dispatch/blob/main/src/connection_manager.c] > {code:java} > 1055 void qd_connection_manager_delete_connector(qd_dispatch_t *qd, void > *impl) > 1056 { > > 1057 qd_connector_t *ct = (qd_connector_t*) impl; > > 1058 if (ct) { > > 1059 sys_mutex_lock(ct->lock); > > 1060 if (ct->ctx) { > > 1061 ct->ctx->connector = 0; > > 1062 if(ct->ctx->pn_conn) > > 1063 qd_connection_invoke_deferred(ct->ctx, deferred_close, > ct->ctx->pn_conn); > 1064 > > 1065 } > > 1066 sys_mutex_unlock(ct->lock); > 1067 DEQ_REMOVE(qd->connection_manager->connectors, ct); > > 1068 qd_connector_decref(ct); > > 1069 } > > 1070 } > {code} > Does anyone have any suggestions for how to prevent this? Doesn't look like > I could use ct->lock in handle(). -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@qpid.apache.org For additional commands, e-mail: dev-h...@qpid.apache.org