cec_timer_fn() is a timer callback which reads ce_arr.array[]
and updates its decay values. Elements could be added to or
removed from this global array in parallel, although the array
itself will not grow or shrink. del_lru_elem_unlocked() uses
FULL_COUNT() as a key to find a right element to remove,
which could be affected by the parallel timer.

Fix this by converting the timer to a delayed work as suggested
by Borislav, to avoid using spinlock.

Fixes: 011d82611172 ("RAS: Add a Corrected Errors Collector")
Cc: Tony Luck <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Signed-off-by: Cong Wang <[email protected]>
---
 drivers/ras/cec.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index a4ff54e50673..5c2040a7389d 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -2,6 +2,7 @@
 #include <linux/mm.h>
 #include <linux/gfp.h>
 #include <linux/kernel.h>
+#include <linux/workqueue.h>
 
 #include <asm/mce.h>
 
@@ -131,7 +132,7 @@ static unsigned int count_threshold = COUNT_MASK;
 #define CEC_TIMER_DEFAULT_INTERVAL     24 * 60 * 60    /* 24 hrs */
 #define CEC_TIMER_MIN_INTERVAL          1 * 60 * 60    /* 1h */
 #define CEC_TIMER_MAX_INTERVAL    30 * 24 * 60 * 60    /* one month */
-static struct timer_list cec_timer;
+static struct delayed_work cec_work;
 static u64 timer_interval = CEC_TIMER_DEFAULT_INTERVAL;
 
 /*
@@ -160,20 +161,21 @@ static void do_spring_cleaning(struct ce_array *ca)
 /*
  * @interval in seconds
  */
-static void cec_mod_timer(struct timer_list *t, unsigned long interval)
+static void cec_mod_work(unsigned long interval)
 {
        unsigned long iv;
 
-       iv = interval * HZ + jiffies;
-
-       mod_timer(t, round_jiffies(iv));
+       iv = interval * HZ;
+       mod_delayed_work(system_wq, &cec_work, round_jiffies(iv));
 }
 
-static void cec_timer_fn(struct timer_list *unused)
+static void cec_work_fn(struct work_struct *work)
 {
+       mutex_lock(&ce_mutex);
        do_spring_cleaning(&ce_arr);
+       mutex_unlock(&ce_mutex);
 
-       cec_mod_timer(&cec_timer, timer_interval);
+       cec_mod_work(timer_interval);
 }
 
 /*
@@ -383,7 +385,7 @@ static int decay_interval_set(void *data, u64 val)
 
        timer_interval = val;
 
-       cec_mod_timer(&cec_timer, timer_interval);
+       cec_mod_work(timer_interval);
        return 0;
 }
 DEFINE_DEBUGFS_ATTRIBUTE(decay_interval_ops, u64_get, decay_interval_set, 
"%lld\n");
@@ -509,8 +511,8 @@ void __init cec_init(void)
        if (create_debugfs_nodes())
                return;
 
-       timer_setup(&cec_timer, cec_timer_fn, 0);
-       cec_mod_timer(&cec_timer, CEC_TIMER_DEFAULT_INTERVAL);
+       INIT_DELAYED_WORK(&cec_work, cec_work_fn);
+       schedule_delayed_work(&cec_work, CEC_TIMER_DEFAULT_INTERVAL);
 
        pr_info("Correctable Errors collector initialized.\n");
 }
-- 
2.20.1

Reply via email to