count_threshol == 1 isn't working as expected. CEC only does soft
offline the second time the same pfn is hit by a correctable error.

Signed-off-by: WANG Chao <[email protected]>
---
 drivers/ras/cec.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 702e4c02c713..ac879c45377c 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -272,7 +272,22 @@ static u64 __maybe_unused del_lru_elem(void)
        return pfn;
 }
 
+static void cec_valid_soft_offline(u64 pfn)
+{
+       if (!pfn_valid(pfn)) {
+               pr_warn("CEC: Invalid pfn: 0x%llx\n", pfn);
+       } else {
+               /* We have reached max count for this page, soft-offline it. */
+               pr_err("Soft-offlining pfn: 0x%llx\n", pfn);
+               memory_failure_queue(pfn, MF_SOFT_OFFLINE, &cec_chain);
+               ce_arr.pfns_poisoned++;
+       }
+}
 
+/*
+ * Return a >0 value to denote that we've reached the offlining
+ * threshold.
+ */
 int cec_add_elem(u64 pfn)
 {
        struct ce_array *ca = &ce_arr;
@@ -295,6 +310,11 @@ int cec_add_elem(u64 pfn)
 
        ret = find_elem(ca, pfn, &to);
        if (ret < 0) {
+               if (count_threshold == 1) {
+                       cec_valid_soft_offline(pfn);
+                       ret = 1;
+                       goto unlock;
+               }
                /*
                 * Shift range [to-end] to make room for one more element.
                 */
@@ -320,23 +340,9 @@ int cec_add_elem(u64 pfn)
 
                ret = 0;
        } else {
-               u64 pfn = ca->array[to] >> PAGE_SHIFT;
-
-               if (!pfn_valid(pfn)) {
-                       pr_warn("CEC: Invalid pfn: 0x%llx\n", pfn);
-               } else {
-                       /* We have reached max count for this page, 
soft-offline it. */
-                       pr_err("Soft-offlining pfn: 0x%llx\n", pfn);
-                       memory_failure_queue(pfn, MF_SOFT_OFFLINE);
-                       ca->pfns_poisoned++;
-               }
-
+               cec_valid_soft_offline(pfn);
                del_elem(ca, to);
 
-               /*
-                * Return a >0 value to denote that we've reached the offlining
-                * threshold.
-                */
                ret = 1;
 
                goto unlock;
-- 
2.21.0

Reply via email to