URL: https://github.com/SSSD/sssd/pull/752
Author: pbrezina
 Title: #752: sbus: terminated active ongoing request when reconnecting
Action: opened

PR body:
"""
Connection to the remote dbus server was lost. If there are any outgoing
requests they are waiting for a pretty long timeout. During this timeout
we kept chaining even new requests that come after successful reconnection
and these request were waiting for the timeout to ocurr as well because
they were chain to request that started before reconnection.

Now, we terminated all active outgoing request that have a key associated
so we can immediately start sending new requests.

Resolves:
https://pagure.io/SSSD/sssd/issue/3907

This is a trivial solution for now. It would be also possible to resend
existing request after reconnection so their consumers may get correct
answer. This however requires some logic behind in order to detect requests
that actually are causing the crash to avoid endless crash loop of sssd.
I'm thinking on - retry once, if sssd crashes again then kill the request.

Should I create a ticket for this?
"""

To pull the PR as Git branch:
git remote add ghsssd https://github.com/SSSD/sssd
git fetch ghsssd pull/752/head:pr752
git checkout pr752
From 318b60d6320f013373d1a159fd91973261ad0502 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pavel=20B=C5=99ezina?= <pbrez...@redhat.com>
Date: Thu, 14 Feb 2019 13:27:06 +0100
Subject: [PATCH] sbus: terminated active ongoing request when reconnecting

Connection to the remote dbus server was lost. If there are any outgoing
requests they are waiting for a pretty long timeout. During this timeout
we kept chaining even new requests that come after successful reconnection
and these request were waiting for the timeout to ocurr as well because
they were chain to request that started before reconnection.

Now, we terminated all active outgoing request that have a key associated
so we can immediately start sending new requests.

Resolves:
https://pagure.io/SSSD/sssd/issue/3907
---
 src/sbus/connection/sbus_dispatcher.c |  4 ++++
 src/sbus/request/sbus_request.c       |  5 -----
 src/sbus/request/sbus_request_hash.c  | 31 +++++++++++++++++++++++++++
 src/sbus/sbus_private.h               | 10 +++++++++
 4 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/src/sbus/connection/sbus_dispatcher.c b/src/sbus/connection/sbus_dispatcher.c
index 2d3c73e38e..3631c17547 100644
--- a/src/sbus/connection/sbus_dispatcher.c
+++ b/src/sbus/connection/sbus_dispatcher.c
@@ -35,6 +35,10 @@ sbus_dispatch_schedule(struct sbus_connection *conn, uint32_t usecs);
 static void
 sbus_dispatch_reconnect(struct sbus_connection *conn)
 {
+    /* Terminate all outgoing requests associated with this connection. */
+    DEBUG(SSSDBG_TRACE_FUNC, "Connection lost. Terminating active requests.\n");
+    sbus_requests_terminate_all(conn->requests->outgoing, ERR_TERMINATED);
+
     switch (conn->type) {
     case SBUS_CONNECTION_CLIENT:
         /* Remote client closed the connection. We can't reestablish
diff --git a/src/sbus/request/sbus_request.c b/src/sbus/request/sbus_request.c
index 1ccd01e7d5..99e21509f2 100644
--- a/src/sbus/request/sbus_request.c
+++ b/src/sbus/request/sbus_request.c
@@ -34,11 +34,6 @@ typedef errno_t
                             DBusMessage **_client_message,
                             DBusMessage ***_reply);
 
-struct sbus_active_requests {
-    hash_table_t *incoming;
-    hash_table_t *outgoing;
-};
-
 struct sbus_active_requests *
 sbus_active_requests_init(TALLOC_CTX *mem_ctx)
 {
diff --git a/src/sbus/request/sbus_request_hash.c b/src/sbus/request/sbus_request_hash.c
index 441fce2c60..0ddad03a87 100644
--- a/src/sbus/request/sbus_request_hash.c
+++ b/src/sbus/request/sbus_request_hash.c
@@ -292,3 +292,34 @@ sbus_requests_finish(struct sbus_request_list *item,
 
     item->req = NULL;
 }
+
+void
+sbus_requests_terminate_all(hash_table_t *table,
+                            errno_t error)
+{
+    struct sbus_request_list *list;
+    struct sbus_request_list *item;
+    hash_value_t *values;
+    unsigned long int num;
+    unsigned long int i;
+    int hret;
+
+    hret = hash_values(table, &num, &values);
+    if (hret != HASH_SUCCESS) {
+        DEBUG(SSSDBG_CRIT_FAILURE, "Unable to get list of active requests "
+              "[%d]: %s\n", hret, hash_error_string(hret));
+        return;
+    }
+
+    for (i = 0; i < num; i++) {
+        list = sss_ptr_get_value(&values[i], struct sbus_request_list);
+
+        DLIST_FOR_EACH(item, list) {
+            sbus_requests_finish(item, error);
+        }
+
+        sbus_requests_delete(list);
+    }
+
+    talloc_free(values);
+}
diff --git a/src/sbus/sbus_private.h b/src/sbus/sbus_private.h
index 5a86efc166..2c25d572e0 100644
--- a/src/sbus/sbus_private.h
+++ b/src/sbus/sbus_private.h
@@ -448,6 +448,11 @@ struct sbus_request_list {
     struct sbus_request_list *next;
 };
 
+struct sbus_active_requests {
+    hash_table_t *incoming;
+    hash_table_t *outgoing;
+};
+
 /* Initialize active requests structure. */
 struct sbus_active_requests *
 sbus_active_requests_init(TALLOC_CTX *mem_ctx);
@@ -479,6 +484,11 @@ void
 sbus_requests_finish(struct sbus_request_list *item,
                      errno_t error);
 
+/* Terminate all requests. */
+void
+sbus_requests_terminate_all(hash_table_t *table,
+                            errno_t error);
+
 /* Create new sbus request. */
 struct sbus_request *
 sbus_request_create(TALLOC_CTX *mem_ctx,
_______________________________________________
sssd-devel mailing list -- sssd-devel@lists.fedorahosted.org
To unsubscribe send an email to sssd-devel-le...@lists.fedorahosted.org
Fedora Code of Conduct: https://getfedora.org/code-of-conduct.html
List Guidelines: https://fedoraproject.org/wiki/Mailing_list_guidelines
List Archives: 
https://lists.fedorahosted.org/archives/list/sssd-devel@lists.fedorahosted.org

Reply via email to