AMD General

I suggest defining AMDGPU_VIRT_RAS_BAD_PAGE_TABLE_MAX_CAPACITY as an absolute 
value rather than referencing or modifying anything in amdgpu_ras_eeprom.x,
Since amdgpu_ras_eeprom.x is also used by amdgpu, the maximum bad page capacity 
requirements for amdgpu and amdgpu_virt may differ.

Best Regards,
Thomas
-----Original Message-----
From: Xie, Chenglei <[email protected]>
Sent: Friday, May 22, 2026 12:35 AM
To: [email protected]
Cc: Chan, Hing Pong <[email protected]>; Luo, Zhigang <[email protected]>; 
Deucher, Alexander <[email protected]>; Xie, Chenglei 
<[email protected]>; Chai, Thomas <[email protected]>; Yi, Tony 
<[email protected]>
Subject: [PATCH v4] drm/amdgpu: grow VF RAS bad page table with bounded dynamic 
alloc

The VF RAS error handler used fixed-size bps[] / bps_bo[] arrays (512 slots). 
When the PF2VF bad-page block listed more entries than fit,
amdgpu_virt_ras_add_bps() could memcpy() past the end of those arrays.

Replace the fixed backing store with a dynamically grown table:
- Add capacity to track allocated slots separately from count.
- Start at 512 slots and realloc bps / bps_bo together when full.
- Refuse growth beyond RAS_EEPROM_PEAK_RECORD_COUNT, the largest bad
  page record count among supported RAS EEPROM layouts (v1 and v2.1).
- Return failure from amdgpu_virt_ras_add_bps() and stop processing
  the PF2VF block if allocation fails or the cap is reached.

Move shared RAS EEPROM size and per-layout record limit macros into 
amdgpu_ras_eeprom.h so the VF cap stays in sync with EEPROM layout.

Signed-off-by: Chenglei Xie <[email protected]>
Co-authored-by: Cursor <[email protected]>
Change-Id: Ic8dd487d91436759bee0e201ce90ea1f2266a161
---
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c    |  17 ---
 .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h    |  28 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c      | 110 ++++++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h      |   2 +
 4 files changed, 120 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index c61389a079822..c7a813e5fa0a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -58,15 +58,6 @@
 #define EEPROM_I2C_MADDR_0      0x0
 #define EEPROM_I2C_MADDR_4      0x40000

-/*
- * The 2 macros below represent the actual size in bytes that
- * those entities occupy in the EEPROM memory.
- * RAS_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which
- * uses uint64 to store 6b fields such as retired_page.
- */
-#define RAS_TABLE_HEADER_SIZE   20
-#define RAS_TABLE_RECORD_SIZE   24
-
 /* Table hdr is 'AMDR' */
 #define RAS_TABLE_HDR_VAL       0x414d4452

@@ -87,13 +78,9 @@
  * ---------------------------------
  */

-/* Assume 2-Mbit size EEPROM and take up the whole space. */
-#define RAS_TBL_SIZE_BYTES      (256 * 1024)
 #define RAS_TABLE_START         0
 #define RAS_HDR_START           RAS_TABLE_START
 #define RAS_RECORD_START        (RAS_HDR_START + RAS_TABLE_HEADER_SIZE)
-#define RAS_MAX_RECORD_COUNT    ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
-                                / RAS_TABLE_RECORD_SIZE)

 /*
  * EEPROM Table structrue v2.1
@@ -116,13 +103,9 @@
  */

 /* EEPROM Table V2_1 */
-#define RAS_TABLE_V2_1_INFO_SIZE       256
 #define RAS_TABLE_V2_1_INFO_START      RAS_TABLE_HEADER_SIZE
 #define RAS_RECORD_START_V2_1          (RAS_HDR_START + RAS_TABLE_HEADER_SIZE 
+ \
                                        RAS_TABLE_V2_1_INFO_SIZE)
-#define RAS_MAX_RECORD_COUNT_V2_1      ((RAS_TBL_SIZE_BYTES - 
RAS_TABLE_HEADER_SIZE - \
-                                       RAS_TABLE_V2_1_INFO_SIZE) \
-                                       / RAS_TABLE_RECORD_SIZE)

 #define RAS_SMU_MESSAGE_TIMEOUT_MS 1000 /* 1s */

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
index a62114800a92a..4ac0d67f53743 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h
@@ -30,6 +30,34 @@
 #define RAS_TABLE_VER_V2_1         0x00021000
 #define RAS_TABLE_VER_V3           0x00030000

+/* Assume 2-Mbit size EEPROM and take up the whole space. */
+#define RAS_TBL_SIZE_BYTES             (256 * 1024)
+#define RAS_TABLE_HEADER_SIZE          20
+#define RAS_TABLE_RECORD_SIZE          24
+
+/* Per-layout bad page record limits (add new EEPROM versions here). */
+#define RAS_MAX_RECORD_COUNT           ((RAS_TBL_SIZE_BYTES - 
RAS_TABLE_HEADER_SIZE) \
+                                        / RAS_TABLE_RECORD_SIZE)
+#define RAS_TABLE_V2_1_INFO_SIZE       256
+#define RAS_MAX_RECORD_COUNT_V2_1      ((RAS_TBL_SIZE_BYTES - 
RAS_TABLE_HEADER_SIZE - \
+                                         RAS_TABLE_V2_1_INFO_SIZE) \
+                                        / RAS_TABLE_RECORD_SIZE)
+
+#define RAS_RECORD_COUNT_MAX(a, b)     ((a) > (b) ? (a) : (b))
+
+/*
+ * Upper bound on bad page records for any supported EEPROM layout.
+ * Extend when a new RAS_TABLE_VER_* adds a RAS_MAX_RECORD_COUNT_* macro.
+ */
+#define RAS_EEPROM_PEAK_RECORD_COUNT                           \
+       RAS_RECORD_COUNT_MAX(RAS_MAX_RECORD_COUNT,              \
+                            RAS_MAX_RECORD_COUNT_V2_1)
+
+static inline uint32_t amdgpu_ras_eeprom_peak_record_count(void)
+{
+       return RAS_EEPROM_PEAK_RECORD_COUNT;
+}
+
 struct amdgpu_device;

 enum amdgpu_ras_gpu_health_status {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index f8e58d12e39ae..fa3df31d3c476 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -283,14 +283,65 @@ unsigned int amd_sriov_msg_checksum(void *obj,
        return ret;
 }

+#define AMDGPU_VIRT_RAS_BAD_PAGE_TABLE_INIT_CAPACITY   512
+#define AMDGPU_VIRT_RAS_BAD_PAGE_TABLE_MAX_CAPACITY    
RAS_EEPROM_PEAK_RECORD_COUNT
+
+/**
+ * amdgpu_virt_ras_realloc_eh_data_space - alloc/realloc VF bad-page
+@data->bps and @data->bps_bo
+ * @adev: amdgpu device
+ * @data: VF RAS error-handler data
+ * @pages: minimum number of new slots to add beyond @data->capacity
+ *
+ * Return: 0 on success, %-ENOMEM on failure.
+ */
+static int amdgpu_virt_ras_realloc_eh_data_space(struct amdgpu_device *adev,
+               struct amdgpu_virt_ras_err_handler_data *data,
+               int pages)
+{
+       struct eeprom_table_record *new_bps;
+       struct amdgpu_bo **new_bo;
+       unsigned int old_space;
+       unsigned int new_space;
+       unsigned int align_space;
+
+       old_space = (unsigned int)data->capacity;
+       new_space = old_space + max_t(unsigned int, (unsigned int)pages,
+                                     (unsigned 
int)AMDGPU_VIRT_RAS_BAD_PAGE_TABLE_INIT_CAPACITY);
+       if (new_space < old_space || new_space > 
AMDGPU_VIRT_RAS_BAD_PAGE_TABLE_MAX_CAPACITY)
+               return -ENOMEM;
+
+       align_space = ALIGN(new_space, 
AMDGPU_VIRT_RAS_BAD_PAGE_TABLE_INIT_CAPACITY);
+       if (align_space > AMDGPU_VIRT_RAS_BAD_PAGE_TABLE_MAX_CAPACITY)
+               return -ENOMEM;
+
+       new_bps = kmalloc_array(align_space, sizeof(*data->bps), GFP_KERNEL);
+       new_bo = kcalloc(align_space, sizeof(*data->bps_bo), GFP_KERNEL);
+       if (!new_bps || !new_bo) {
+               kfree(new_bps);
+               kfree(new_bo);
+               dev_warn_ratelimited(adev->dev,
+                                    "RAS WARN: failed to grow bad page table 
to %u slots\n",
+                                    align_space);
+               return -ENOMEM;
+       }
+
+       memcpy(new_bps, data->bps, data->count * sizeof(*data->bps));
+       memcpy(new_bo, data->bps_bo, data->count * sizeof(*data->bps_bo));
+
+       kfree(data->bps);
+       kfree(data->bps_bo);
+       data->bps = new_bps;
+       data->bps_bo = new_bo;
+       data->capacity = (int)align_space;
+
+       return 0;
+}
+
 static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)  {
        struct amdgpu_virt *virt = &adev->virt;
        struct amdgpu_virt_ras_err_handler_data **data = &virt->virt_eh_data;
-       /* GPU will be marked bad on host if bp count more then 10,
-        * so alloc 512 is enough.
-        */
-       unsigned int align_space = 512;
+       unsigned int align_space =
+AMDGPU_VIRT_RAS_BAD_PAGE_TABLE_INIT_CAPACITY;
        void *bps = NULL;
        struct amdgpu_bo **bps_bo = NULL;

@@ -302,12 +353,13 @@ static int amdgpu_virt_init_ras_err_handler_data(struct 
amdgpu_device *adev)
        if (!bps)
                goto bps_failure;

-       bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), 
GFP_KERNEL);
+       bps_bo = kcalloc(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL);
        if (!bps_bo)
                goto bps_bo_failure;

        (*data)->bps = bps;
        (*data)->bps_bo = bps_bo;
+       (*data)->capacity = align_space;
        (*data)->count = 0;
        (*data)->last_reserved = 0;

@@ -361,17 +413,33 @@ void amdgpu_virt_release_ras_err_handler_data(struct 
amdgpu_device *adev)
        virt->virt_eh_data = NULL;
 }

-static void amdgpu_virt_ras_add_bps(struct amdgpu_device *adev,
-               struct eeprom_table_record *bps, int pages)
+static bool amdgpu_virt_ras_add_bps(struct amdgpu_device *adev,
+               const struct eeprom_table_record *bps, int pages)
 {
        struct amdgpu_virt *virt = &adev->virt;
        struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
+       int need;

-       if (!data)
-               return;
+       if (!data || pages <= 0)
+               return false;
+
+       if (pages > AMDGPU_VIRT_RAS_BAD_PAGE_TABLE_MAX_CAPACITY - data->count) {
+               dev_warn_ratelimited(adev->dev,
+                                    "RAS WARN: bad page table at capacity 
(count=%d pages=%d max=%u)\n",
+                                    data->count, pages,
+                                    
AMDGPU_VIRT_RAS_BAD_PAGE_TABLE_MAX_CAPACITY);
+               return false;
+       }
+
+       need = data->count + pages;
+       if (need > data->capacity &&
+           amdgpu_virt_ras_realloc_eh_data_space(adev, data, need - 
data->capacity))
+               return false;

        memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
        data->count += pages;
+
+       return true;
 }

 static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) @@ -443,20 
+511,22 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,

        memset(&bp, 0, sizeof(bp));

-       if (bp_block_size) {
-               bp_cnt = bp_block_size / sizeof(uint64_t);
-               for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
-                       retired_page = *(uint64_t *)(vram_usage_va +
-                                       bp_block_offset + bp_idx * 
sizeof(uint64_t));
-                       bp.retired_page = retired_page;
+       if (!bp_block_size)
+               return;

-                       if (amdgpu_virt_ras_check_bad_page(adev, retired_page))
-                               continue;
+       bp_cnt = bp_block_size / sizeof(uint64_t);
+       for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
+               retired_page = *(uint64_t *)(vram_usage_va +
+                               bp_block_offset + bp_idx * sizeof(uint64_t));
+               bp.retired_page = retired_page;

-                       amdgpu_virt_ras_add_bps(adev, &bp, 1);
+               if (amdgpu_virt_ras_check_bad_page(adev, retired_page))
+                       continue;

-                       amdgpu_virt_ras_reserve_bps(adev);
-               }
+               if (!amdgpu_virt_ras_add_bps(adev, &bp, 1))
+                       break;
+
+               amdgpu_virt_ras_reserve_bps(adev);
        }
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index d563deec0916b..d8500c3e48a17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -265,6 +265,8 @@ struct amdgpu_virt_ras_err_handler_data {
        struct eeprom_table_record *bps;
        /* point to reserved bo array */
        struct amdgpu_bo **bps_bo;
+       /* number of slots in bps[] / bps_bo[] (always >= count) */
+       int capacity;
        /* the count of entries */
        int count;
        /* last reserved entry's index + 1 */
--
2.34.1

Reply via email to