RT kernels allow to disable migration while being preemptible. Tasks which
have migration disabled cannot be moved to a different CPU when the
affinity mask is changed until they leave the migrate disabled section.

Add a mechanism to queue the migration request in the task and wait for it
to complete. The task will handle it when it leaves the migrate disabled
section.

This ensures that __set_cpus_allowed_ptr() is guaranteed to return only after
the new affinity mask has taken effect.

Signed-off-by: Thomas Gleixner <t...@linutronix.de>
---
 include/linux/sched.h |   19 ++++++++++++
 kernel/sched/core.c   |   76 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -629,8 +629,16 @@ struct wake_q_node {
 };
 
 #if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
+struct task_migrate_data {
+       const cpumask_t         *mask;
+       struct completion       *done;
+       bool                    check;
+       int                     res;
+};
+
 struct task_migration_ctrl {
        struct mutex                    mutex;
+       struct task_migrate_data        *pending;
        int                             disable_cnt;
 };
 
@@ -638,8 +646,19 @@ struct task_migration_ctrl {
 {                                                                      \
        .mutex = __MUTEX_INITIALIZER(init_task.migration_ctrl.mutex),   \
 }
+
+static inline int task_self_migrate_result(struct task_migrate_data *data)
+{
+       return data->res;
+}
+
 #else /* CONFIG_PREEMPT_RT && CONFIG_SMP */
+struct task_migrate_data { };
 struct task_migration_ctrl { };
+static inline int task_self_migrate_result(struct task_migrate_data *data)
+{
+       return -ENOSYS;
+}
 #endif /* !(CONFIG_PREEMPT_RT && CONFIG_SMP) */
 
 struct task_struct {
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -442,6 +442,70 @@ static inline void hrtick_rq_init(struct
 }
 #endif /* CONFIG_SCHED_HRTICK */
 
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
+static inline void task_lock_migration_ctrl(struct task_struct *p)
+{
+       mutex_lock(&p->migration_ctrl.mutex);
+}
+
+static inline void task_unlock_migration_ctrl(struct task_struct *p)
+{
+       mutex_unlock(&p->migration_ctrl.mutex);
+}
+
+/*
+ * If the affinity of a task should be set and the task is in a migrate
+ * disabled region then the operation has to wait until the task leaves the
+ * migrate disabled region and takes care of setting it's affinity on its
+ * own.
+ */
+static bool task_self_migration(struct task_struct *p,
+                               const struct cpumask *new_mask, bool check,
+                               struct rq *rq, struct rq_flags *rf,
+                               struct task_migrate_data *data)
+{
+       DECLARE_COMPLETION_ONSTACK(done);
+
+       lockdep_assert_held(&p->migration_ctrl.mutex);
+       lockdep_assert_held(&rq->lock);
+       lockdep_assert_held(&p->pi_lock);
+
+       if (!p->migration_ctrl.disable_cnt)
+               return false;
+
+       BUG_ON(p == current);
+
+       /*
+        * Store a pointer to migration data in the migration control
+        * struct, which will be used by the task to set its own affinity
+        * when it leaves the migrate disabled section. The result is
+        * returned in @data::res.
+        */
+       data->mask = new_mask;
+       data->check = check;
+       data->done = &done;
+       p->migration_ctrl.pending = data;
+
+       /* Get a reference on @p, drop the locks and wait for it to complete */
+       get_task_struct(p);
+       task_rq_unlock(rq, p, rf);
+       wait_for_completion(&done);
+       put_task_struct(p);
+       return true;
+}
+
+#else /* defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) */
+static inline void task_lock_migration_ctrl(struct task_struct *p) { }
+static inline void task_unlock_migration_ctrl(struct task_struct *p) { }
+static bool task_self_migration(struct task_struct *p,
+                               const struct cpumask *new_mask, bool check,
+                               struct rq *rq, struct rq_flags *rf,
+                               struct task_migrate_data *data)
+{
+       return false;
+}
+#endif /* !(defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)) */
+
 /*
  * cmpxchg based fetch_or, macro so it works for different integer types
  */
@@ -1947,17 +2011,29 @@ static int set_cpus_allowed_ptr_locked(s
 static int __set_cpus_allowed_ptr(struct task_struct *p,
                                  const struct cpumask *new_mask, bool check)
 {
+       struct task_migrate_data sync_data;
        struct rq_flags rf;
        struct rq *rq;
        int ret = 0;
 
+       /*
+        * On RT kernels the affinity setting might be delayed if the task
+        * is in a migrate disabled region. The request for changing the
+        * affinity is queued in the target task which acts upon it when
+        * leaving the migrate disabled sections. This requires
+        * serialization to protect the relevant data structures.
+        */
+       task_lock_migration_ctrl(p);
        rq = task_rq_lock(p, &rf);
 
        if (cpumask_equal(&p->cpus_mask, new_mask))
                task_rq_unlock(rq, p, &rf);
+       else if (task_self_migration(p, new_mask, check, rq, &rf, &sync_data))
+               ret = task_self_migrate_result(&sync_data);
        else
                ret = set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf);
 
+       task_unlock_migration_ctrl(p);
        return ret;
 }
 

Reply via email to