URL: https://github.com/SSSD/sssd/pull/5552
Author: sumit-bose
 Title: #5552: files: split update into batches
Action: synchronized

To pull the PR as Git branch:
git remote add ghsssd https://github.com/SSSD/sssd
git fetch ghsssd pull/5552/head:pr5552
git checkout pr5552
From f238da3cea6f78c131ae763236530226ffe0e954 Mon Sep 17 00:00:00 2001
From: Sumit Bose <sb...@redhat.com>
Date: Thu, 25 Mar 2021 12:08:34 +0100
Subject: [PATCH] files: split update into batches

If the files managed by the files provider contain many users or groups
processing them might take a considerable amount of time. To keep the
backend responsive this patch splits the update into multiple steps
running one after the other but returning to the main loop in between.

This avoids issues during startup because the watchdog timer state is
reset properly. Additionally SBUS messages are process and as a result
the domain can be marked inconsistent in the frontends properly.

Resolves: https://github.com/SSSD/sssd/issues/5557

:fixes: Update large files in the files provider in batches to avoid
  timeouts
---
 src/providers/files/files_ops.c | 397 +++++++++++++++++++++++++-------
 1 file changed, 317 insertions(+), 80 deletions(-)

diff --git a/src/providers/files/files_ops.c b/src/providers/files/files_ops.c
index 54d2b41646..a96dfc521b 100644
--- a/src/providers/files/files_ops.c
+++ b/src/providers/files/files_ops.c
@@ -449,26 +449,15 @@ static errno_t refresh_override_attrs(struct files_id_ctx *id_ctx,
 }
 
 static errno_t sf_enum_groups(struct files_id_ctx *id_ctx,
-                              const char *group_file);
+                              struct group **groups, size_t start, size_t size);
 
-errno_t sf_enum_users(struct files_id_ctx *id_ctx,
-                      const char *passwd_file)
+static errno_t sf_enum_users(struct files_id_ctx *id_ctx, struct passwd **users,
+                             size_t start, size_t size)
 {
     errno_t ret;
-    TALLOC_CTX *tmp_ctx = NULL;
-    struct passwd **users = NULL;
+    size_t i;
 
-    tmp_ctx = talloc_new(NULL);
-    if (tmp_ctx == NULL) {
-        return ENOMEM;
-    }
-
-    ret = enum_files_users(tmp_ctx, passwd_file, &users);
-    if (ret != EOK) {
-        goto done;
-    }
-
-    for (size_t i = 0; users[i]; i++) {
+    for (i = start; users[i] != NULL && i < (start + size); i++) {
         ret = save_file_user(id_ctx, users[i]);
         if (ret != EOK) {
             DEBUG(SSSDBG_MINOR_FAILURE,
@@ -478,16 +467,19 @@ errno_t sf_enum_users(struct files_id_ctx *id_ctx,
         }
     }
 
-    ret = refresh_override_attrs(id_ctx, SYSDB_MEMBER_USER);
-    if (ret != EOK) {
-        DEBUG(SSSDBG_MINOR_FAILURE,
-              "Failed to refresh override attributes, "
-              "override values might not be available.\n");
+    if (users[i] == NULL) {
+        ret = refresh_override_attrs(id_ctx, SYSDB_MEMBER_USER);
+        if (ret != EOK) {
+            DEBUG(SSSDBG_MINOR_FAILURE,
+                  "Failed to refresh override attributes, "
+                  "override values might not be available.\n");
+        }
+
+        ret = EOK;
+    } else {
+        ret = EAGAIN;
     }
 
-    ret = EOK;
-done:
-    talloc_free(tmp_ctx);
     return ret;
 }
 
@@ -665,29 +657,25 @@ static errno_t save_file_group(struct files_id_ctx *id_ctx,
 }
 
 static errno_t sf_enum_groups(struct files_id_ctx *id_ctx,
-                              const char *group_file)
+                              struct group **groups, size_t start, size_t size)
 {
     errno_t ret;
     TALLOC_CTX *tmp_ctx = NULL;
-    struct group **groups = NULL;
     const char **cached_users = NULL;
+    size_t i;
 
     tmp_ctx = talloc_new(NULL);
     if (tmp_ctx == NULL) {
         return ENOMEM;
     }
 
-    ret = enum_files_groups(tmp_ctx, group_file, &groups);
-    if (ret != EOK) {
-        goto done;
-    }
-
     cached_users = get_cached_user_names(tmp_ctx, id_ctx->domain);
     if (cached_users == NULL) {
+        ret = ENOMEM;
         goto done;
     }
 
-    for (size_t i = 0; groups[i]; i++) {
+    for (i = start; groups[i] != NULL && i < (start + size); i++) {
         ret = save_file_group(id_ctx, groups[i], cached_users);
         if (ret != EOK) {
             DEBUG(SSSDBG_MINOR_FAILURE,
@@ -696,93 +684,266 @@ static errno_t sf_enum_groups(struct files_id_ctx *id_ctx,
         }
     }
 
-    ret = refresh_override_attrs(id_ctx, SYSDB_MEMBER_GROUP);
-    if (ret != EOK) {
-        DEBUG(SSSDBG_MINOR_FAILURE,
-              "Failed to refresh override attributes, "
-              "override values might not be available.\n");
+    if (groups[i] == NULL) {
+        ret = refresh_override_attrs(id_ctx, SYSDB_MEMBER_GROUP);
+        if (ret != EOK) {
+            DEBUG(SSSDBG_MINOR_FAILURE,
+                  "Failed to refresh override attributes, "
+                  "override values might not be available.\n");
+        }
+
+        ret = EOK;
+    } else {
+        ret = EAGAIN;
     }
 
-    ret = EOK;
 done:
     talloc_free(tmp_ctx);
     return ret;
 }
 
-static errno_t sf_enum_files(struct files_id_ctx *id_ctx,
-                             uint8_t flags)
+enum update_steps {
+    DELETE_USERS,
+    READ_USERS,
+    SAVE_USERS,
+    DELETE_GROUPS,
+    READ_GROUPS,
+    SAVE_GROUPS,
+    UPDATE_FINISH,
+    UPDATE_DONE,
+};
+
+struct sf_enum_files_state {
+    struct files_id_ctx *id_ctx;
+    uint8_t flags;
+    struct tevent_timer *te;
+    enum update_steps current_step;
+    size_t step;
+    bool in_transaction;
+    size_t batch_size;
+    size_t obj_idx;
+    size_t file_idx;
+    struct passwd **users;
+    struct group **groups;
+    uint32_t delay;
+    uint32_t initial_delay;
+};
+
+static void sf_enum_files_first_step(struct tevent_context *ev,
+                                     struct tevent_timer *te,
+                                     struct timeval tv,
+                                     void *data);
+static struct tevent_req *sf_enum_files_send(struct files_id_ctx *id_ctx,
+                                             uint8_t flags)
 {
+    struct tevent_req *req;
+    struct sf_enum_files_state *state;
+    struct timeval tv;
     errno_t ret;
-    errno_t tret;
-    bool in_transaction = false;
+
+    req = tevent_req_create(id_ctx, &state, struct sf_enum_files_state);
+    if (req == NULL) {
+        DEBUG(SSSDBG_CRIT_FAILURE, "Unable to create tevent request!\n");
+        return NULL;
+    }
+
+    state->id_ctx = id_ctx;
+    state->flags = flags;
+    state->step = 0;
+    state->batch_size = 10;
+    state->obj_idx = 0;
+    state->file_idx = 0;
+    state->initial_delay = 100;
+    state->delay = 100;
+
+    if (state->flags & SF_UPDATE_PASSWD) {
+        state->current_step = DELETE_USERS;
+    } else if (state->flags & SF_UPDATE_GROUP) {
+        state->current_step = DELETE_GROUPS;
+    }
+
+    tv = tevent_timeval_current_ofs(0, state->initial_delay);
+    state->te = tevent_add_timer(id_ctx->be->ev, state, tv,
+                                 sf_enum_files_first_step, req);
+    if (state->te == NULL) {
+        DEBUG(SSSDBG_OP_FAILURE, "Unable to schedule files update.\n");
+        ret = EFAULT;
+        goto done;
+    }
 
     ret = sysdb_transaction_start(id_ctx->domain->sysdb);
     if (ret != EOK) {
         goto done;
     }
-    in_transaction = true;
+    state->in_transaction = true;
+
+    return req;
+
+done:
+    tevent_req_error(req, ret);
+    tevent_req_post(req, id_ctx->be->ev);
+    return req;
+}
 
-    if (flags & SF_UPDATE_PASSWD) {
+static void sf_enum_files_first_step(struct tevent_context *ev,
+                                     struct tevent_timer *te,
+                                     struct timeval tv,
+                                     void *data)
+{
+    errno_t ret;
+    errno_t tret;
+    struct sf_enum_files_state *state;
+    struct tevent_req *req;
+    struct files_id_ctx *id_ctx;
+    const char *filename = NULL;
+
+    req = talloc_get_type(data, struct tevent_req);
+    state = tevent_req_data(req, struct sf_enum_files_state);
+
+    state->te = NULL;
+    id_ctx = state->id_ctx;
+
+    switch (state->current_step) {
+    case DELETE_USERS:
+        DEBUG(SSSDBG_TRACE_ALL, "Step DELETE_USERS.\n");
         ret = delete_all_users(id_ctx->domain);
         if (ret != EOK) {
             goto done;
         }
-
+        state->file_idx = 0;
+        state->current_step = READ_USERS;
+        break;
+    case READ_USERS:
+        DEBUG(SSSDBG_TRACE_ALL, "Step READ_USERS.\n");
+        talloc_zfree(state->users);
+        state->obj_idx = 0;
         /* All users were deleted, therefore we need to enumerate each file again */
-        for (size_t i = 0; id_ctx->passwd_files[i] != NULL; i++) {
-            ret = sf_enum_users(id_ctx, id_ctx->passwd_files[i]);
-            if (ret == ENOENT) {
+        if (id_ctx->passwd_files[state->file_idx] != NULL) {
+            filename = id_ctx->passwd_files[state->file_idx++];
+            ret = enum_files_users(state, filename, &state->users);
+            if (ret == EOK) {
+                state->current_step = SAVE_USERS;
+            } else if (ret == ENOENT) {
                 DEBUG(SSSDBG_MINOR_FAILURE,
                       "The file %s does not exist (yet), skipping\n",
-                      id_ctx->passwd_files[i]);
-                continue;
+                      filename);
             } else if (ret != EOK) {
                 DEBUG(SSSDBG_OP_FAILURE,
-                      "Cannot enumerate users from %s, aborting\n",
-                      id_ctx->passwd_files[i]);
+                      "Cannot enumerate groups from %s, aborting\n",
+                      filename);
                 goto done;
             }
+        } else {
+            if (state->flags & SF_UPDATE_GROUP) {
+                state->current_step = DELETE_GROUPS;
+            } else {
+                state->current_step = UPDATE_FINISH;
+            }
         }
-    }
-
-    if (flags & SF_UPDATE_GROUP) {
+        break;
+    case SAVE_USERS:
+        DEBUG(SSSDBG_TRACE_ALL, "Step SAVE_USERS.\n");
+        if (state->users != NULL) {
+            ret = sf_enum_users(id_ctx, state->users,
+                                state->obj_idx, state->batch_size);
+            if (ret == EOK) {
+                state->current_step = READ_USERS;
+            } else if (ret == EAGAIN) {
+                state->obj_idx += state->batch_size;
+            } else {
+                DEBUG(SSSDBG_OP_FAILURE, "Saving users failed.\n");
+                goto done;
+            }
+        }
+        break;
+    case DELETE_GROUPS:
+        DEBUG(SSSDBG_TRACE_ALL, "Step DELETE_GROUPS.\n");
         ret = delete_all_groups(id_ctx->domain);
         if (ret != EOK) {
             goto done;
         }
-
+        state->file_idx = 0;
+        state->current_step = READ_GROUPS;
+        break;
+    case READ_GROUPS:
+        DEBUG(SSSDBG_TRACE_ALL, "Step READ_GROUPS.\n");
+        talloc_zfree(state->groups);
+        state->obj_idx = 0;
         /* All groups were deleted, therefore we need to enumerate each file again */
-        for (size_t i = 0; id_ctx->group_files[i] != NULL; i++) {
-            ret = sf_enum_groups(id_ctx, id_ctx->group_files[i]);
-            if (ret == ENOENT) {
+        if (id_ctx->group_files[state->file_idx] != NULL) {
+            filename = id_ctx->group_files[state->file_idx++];
+            ret = enum_files_groups(state, filename, &state->groups);
+            if (ret == EOK) {
+                state->current_step = SAVE_GROUPS;
+            } else if (ret == ENOENT) {
                 DEBUG(SSSDBG_MINOR_FAILURE,
                       "The file %s does not exist (yet), skipping\n",
-                      id_ctx->group_files[i]);
-                continue;
+                      filename);
             } else if (ret != EOK) {
                 DEBUG(SSSDBG_OP_FAILURE,
                       "Cannot enumerate groups from %s, aborting\n",
-                      id_ctx->group_files[i]);
+                      filename);
                 goto done;
             }
+        } else {
+            state->current_step = UPDATE_FINISH;
+        }
+        break;
+    case SAVE_GROUPS:
+        DEBUG(SSSDBG_TRACE_ALL, "Step SAVE_GROUPS.\n");
+        if (state->groups != NULL) {
+            ret = sf_enum_groups(id_ctx, state->groups,
+                                 state->obj_idx, state->batch_size);
+            if (ret == EOK) {
+                state->current_step = READ_GROUPS;
+            } else if (ret == EAGAIN) {
+                state->obj_idx += state->batch_size;
+            } else {
+                DEBUG(SSSDBG_OP_FAILURE, "Saving groups failed.\n");
+                goto done;
+            }
+        }
+        break;
+    case UPDATE_FINISH:
+        DEBUG(SSSDBG_TRACE_ALL, "Step UPDATE_FINISH.\n");
+        ret = dp_add_sr_attribute(id_ctx->be);
+        if (ret != EOK) {
+            DEBUG(SSSDBG_CRIT_FAILURE,
+                  "Failed to add session recording attribute, ignored.\n");
         }
-    }
 
-    ret = dp_add_sr_attribute(id_ctx->be);
-    if (ret != EOK) {
-        DEBUG(SSSDBG_CRIT_FAILURE,
-              "Failed to add session recording attribute, ignored.\n");
-    }
+        ret = sysdb_transaction_commit(id_ctx->domain->sysdb);
+        if (ret != EOK) {
+            goto done;
+        }
+        state->in_transaction = false;
 
-    ret = sysdb_transaction_commit(id_ctx->domain->sysdb);
-    if (ret != EOK) {
+        state->current_step = UPDATE_DONE;
+
+        ret = EOK;
+        break;
+    default:
+        DEBUG(SSSDBG_CRIT_FAILURE, "Undefined update step [%zu].\n",
+                                   state->current_step);
+        ret = EINVAL;
         goto done;
     }
-    in_transaction = false;
 
-    ret = EOK;
+    if (state->current_step != UPDATE_DONE) {
+        tv = tevent_timeval_current_ofs(0, state->delay);
+        state->te = tevent_add_timer(id_ctx->be->ev, state, tv,
+                                     sf_enum_files_first_step, req);
+        if (state->te == NULL) {
+            DEBUG(SSSDBG_OP_FAILURE, "Unable to schedule files update.\n");
+            ret = EFAULT;
+            goto done;
+        }
+
+        return;
+    }
+
 done:
-    if (in_transaction) {
+    if (state->in_transaction) {
         tret = sysdb_transaction_cancel(id_ctx->domain->sysdb);
         if (tret != EOK) {
             DEBUG(SSSDBG_CRIT_FAILURE,
@@ -790,7 +951,20 @@ static errno_t sf_enum_files(struct files_id_ctx *id_ctx,
         }
     }
 
-    return ret;
+    if (ret == EOK) {
+        tevent_req_done(req);
+    } else {
+        tevent_req_error(req, ret);
+    }
+
+    return;
+}
+
+static errno_t sf_enum_files_recv(struct tevent_req *req)
+{
+    TEVENT_REQ_RETURN_ON_ERROR(req);
+
+    return EOK;
 }
 
 static void sf_cb_done(struct files_id_ctx *id_ctx)
@@ -803,9 +977,11 @@ static void sf_cb_done(struct files_id_ctx *id_ctx)
     }
 }
 
+static void sf_passwd_cb_done(struct tevent_req *req);
 static int sf_passwd_cb(const char *filename, uint32_t flags, void *pvt)
 {
     struct files_id_ctx *id_ctx;
+    struct tevent_req *req;
     errno_t ret;
 
     id_ctx = talloc_get_type(pvt, struct files_id_ctx);
@@ -826,7 +1002,30 @@ static int sf_passwd_cb(const char *filename, uint32_t flags, void *pvt)
      * only then edits passwd and adds the user. The reverse is not needed,
      * because member/memberof links are established when groups are saved.
      */
-    ret = sf_enum_files(id_ctx, SF_UPDATE_BOTH);
+    req = sf_enum_files_send(id_ctx, SF_UPDATE_BOTH);
+    if (req == NULL) {
+        DEBUG(SSSDBG_OP_FAILURE, "Failed to start files update.\n");
+        ret = ENOMEM;
+        id_ctx->updating_passwd = false;
+        sf_cb_done(id_ctx);
+        files_account_info_finished(id_ctx, BE_REQ_USER, ret);
+        return ret;
+    }
+
+    tevent_req_set_callback(req, sf_passwd_cb_done, id_ctx);
+
+    return EOK;
+}
+
+static void sf_passwd_cb_done(struct tevent_req *req)
+{
+    struct files_id_ctx *id_ctx;
+    errno_t ret;
+
+    id_ctx = tevent_req_callback_data(req, struct files_id_ctx);
+
+    ret = sf_enum_files_recv(req);
+    talloc_zfree(req);
     if (ret != EOK) {
         DEBUG(SSSDBG_OP_FAILURE,
               "Could not update files: [%d]: %s\n",
@@ -839,13 +1038,14 @@ static int sf_passwd_cb(const char *filename, uint32_t flags, void *pvt)
     id_ctx->updating_passwd = false;
     sf_cb_done(id_ctx);
     files_account_info_finished(id_ctx, BE_REQ_USER, ret);
-    return ret;
 }
 
+static void sf_group_cb_done(struct tevent_req *req);
 static int sf_group_cb(const char *filename, uint32_t flags, void *pvt)
 {
     struct files_id_ctx *id_ctx;
     errno_t ret;
+    struct tevent_req *req;
 
     id_ctx = talloc_get_type(pvt, struct files_id_ctx);
     if (id_ctx == NULL) {
@@ -861,7 +1061,30 @@ static int sf_group_cb(const char *filename, uint32_t flags, void *pvt)
     dp_sbus_reset_groups_memcache(id_ctx->be->provider);
     dp_sbus_reset_initgr_memcache(id_ctx->be->provider);
 
-    ret = sf_enum_files(id_ctx, SF_UPDATE_GROUP);
+    req = sf_enum_files_send(id_ctx, SF_UPDATE_GROUP);
+    if (req == NULL) {
+        DEBUG(SSSDBG_OP_FAILURE, "Failed to start files update.\n");
+        ret = ENOMEM;
+        id_ctx->updating_groups = false;
+        sf_cb_done(id_ctx);
+        files_account_info_finished(id_ctx, BE_REQ_GROUP, ret);
+        return ret;
+    }
+
+    tevent_req_set_callback(req, sf_group_cb_done, id_ctx);
+
+    return EOK;
+}
+
+static void sf_group_cb_done(struct tevent_req *req)
+{
+    struct files_id_ctx *id_ctx;
+    errno_t ret;
+
+    id_ctx = tevent_req_callback_data(req, struct files_id_ctx);
+
+    ret = sf_enum_files_recv(req);
+    talloc_zfree(req);
     if (ret != EOK) {
         DEBUG(SSSDBG_OP_FAILURE,
               "Could not update files: [%d]: %s\n",
@@ -874,19 +1097,33 @@ static int sf_group_cb(const char *filename, uint32_t flags, void *pvt)
     id_ctx->updating_groups = false;
     sf_cb_done(id_ctx);
     files_account_info_finished(id_ctx, BE_REQ_GROUP, ret);
-    return ret;
 }
 
+static void startup_enum_files_done(struct tevent_req *req);
 static void startup_enum_files(struct tevent_context *ev,
                                struct tevent_immediate *imm,
                                void *pvt)
 {
     struct files_id_ctx *id_ctx = talloc_get_type(pvt, struct files_id_ctx);
-    errno_t ret;
+    struct tevent_req *req;
 
     talloc_zfree(imm);
 
-    ret = sf_enum_files(id_ctx, SF_UPDATE_BOTH);
+    req = sf_enum_files_send(id_ctx, SF_UPDATE_BOTH);
+    if (req == NULL) {
+        DEBUG(SSSDBG_OP_FAILURE, "Could not update files after startup.\n");
+        return;
+    }
+
+    tevent_req_set_callback(req, startup_enum_files_done, NULL);
+}
+
+static void startup_enum_files_done(struct tevent_req *req)
+{
+    errno_t ret;
+
+    ret = sf_enum_files_recv(req);
+    talloc_zfree(req);
     if (ret != EOK) {
         DEBUG(SSSDBG_OP_FAILURE,
               "Could not update files after startup: [%d]: %s\n",
_______________________________________________
sssd-devel mailing list -- sssd-devel@lists.fedorahosted.org
To unsubscribe send an email to sssd-devel-le...@lists.fedorahosted.org
Fedora Code of Conduct: 
https://docs.fedoraproject.org/en-US/project/code-of-conduct/
List Guidelines: https://fedoraproject.org/wiki/Mailing_list_guidelines
List Archives: 
https://lists.fedorahosted.org/archives/list/sssd-devel@lists.fedorahosted.org
Do not reply to spam on the list, report it: 
https://pagure.io/fedora-infrastructure

Reply via email to