When generating ras cper file, the original data nodes in the ras
log ring buffer may be deleted, leading to invalid pointer
access. Copy the data from the ras log ring instead of directly
referencing the pointers to avoid this issue.

Signed-off-by: YiPeng Chai <[email protected]>
---
 .../drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c | 12 +++---
 drivers/gpu/drm/amd/ras/rascore/ras_cmd.c     | 42 +++++++++++++------
 drivers/gpu/drm/amd/ras/rascore/ras_cper.c    | 20 ++++-----
 drivers/gpu/drm/amd/ras/rascore/ras_cper.h    |  2 +-
 .../gpu/drm/amd/ras/rascore/ras_log_ring.c    | 23 +++++-----
 .../gpu/drm/amd/ras/rascore/ras_log_ring.h    |  2 +-
 6 files changed, 58 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c 
b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c
index b8e9442b2ca5..537f709d8570 100644
--- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c
+++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_virt_ras_cmd.c
@@ -219,7 +219,7 @@ static bool amdgpu_virt_ras_check_batch_cached(struct 
ras_cmd_batch_trace_record
 }
 
 static int amdgpu_virt_ras_get_batch_records(struct ras_core_context 
*ras_core, uint64_t batch_id,
-                       struct ras_log_info **trace_arr, uint32_t arr_num,
+                       struct ras_log_info *trace_arr, uint32_t arr_num,
                        struct ras_cmd_batch_trace_record_rsp *rsp_cache)
 {
        struct ras_cmd_batch_trace_record_req req = {
@@ -255,7 +255,8 @@ static int amdgpu_virt_ras_get_batch_records(struct 
ras_core_context *ras_core,
        }
 
        for (i = 0; i < batch->trace_num && i < arr_num; i++)
-               trace_arr[i] = &rsp->records[batch->offset + i];
+               memcpy(&trace_arr[i],
+                       &rsp->records[batch->offset + i], sizeof(*trace_arr));
 
        return i;
 }
@@ -272,7 +273,8 @@ static int amdgpu_virt_ras_get_cper_records(struct 
ras_core_context *ras_core,
                (struct ras_cmd_cper_record_rsp *)cmd->output_buff_raw;
        struct ras_log_batch_overview *overview = 
&virt_ras->batch_mgr.batch_overview;
        struct ras_cmd_batch_trace_record_rsp *rsp_cache = 
&virt_ras->batch_mgr.batch_trace;
-       struct ras_log_info **trace;
+       struct ras_log_info *trace;
+       uint32_t trace_count = MAX_RECORD_PER_BATCH;
        uint32_t offset = 0, real_data_len = 0;
        uint64_t batch_id;
        uint8_t *out_buf;
@@ -289,7 +291,7 @@ static int amdgpu_virt_ras_get_cper_records(struct 
ras_core_context *ras_core,
            req->cper_num > RAS_CMD_MAX_CPER_FETCH_NUM)
                return RAS_CMD__ERROR_INVALID_INPUT_DATA;
 
-       trace = kcalloc(MAX_RECORD_PER_BATCH, sizeof(*trace), GFP_KERNEL);
+       trace = kcalloc(trace_count, sizeof(*trace), GFP_KERNEL);
        if (!trace)
                return RAS_CMD__ERROR_GENERIC;
 
@@ -306,7 +308,7 @@ static int amdgpu_virt_ras_get_cper_records(struct 
ras_core_context *ras_core,
                if (batch_id >= overview->last_batch_id)
                        break;
                count = amdgpu_virt_ras_get_batch_records(ras_core, batch_id,
-                                                         trace, 
MAX_RECORD_PER_BATCH,
+                                                         trace, trace_count,
                                                          rsp_cache);
                if (count > 0) {
                        ret = ras_cper_generate_cper(ras_core, trace, count,
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c 
b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c
index 5b7a36596b02..088b9b153f7f 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_cmd.c
@@ -202,11 +202,12 @@ static int ras_cmd_get_cper_records(struct 
ras_core_context *ras_core,
                        (struct ras_cmd_cper_record_req *)cmd->input_buff_raw;
        struct ras_cmd_cper_record_rsp *rsp =
                        (struct ras_cmd_cper_record_rsp *)cmd->output_buff_raw;
-       struct ras_log_info *trace[MAX_RECORD_PER_BATCH] = {0};
+       struct ras_log_info *trace = NULL;
+       uint32_t trace_count = MAX_RECORD_PER_BATCH;
        struct ras_log_batch_overview overview;
        uint32_t offset = 0, real_data_len = 0;
        uint64_t batch_id;
-       uint8_t *buffer;
+       uint8_t *buffer = NULL;
        int ret = 0, i, count;
 
        if ((cmd->input_size != sizeof(struct ras_cmd_cper_record_req)) ||
@@ -224,6 +225,12 @@ static int ras_cmd_get_cper_records(struct 
ras_core_context *ras_core,
        if (!buffer)
                return RAS_CMD__ERROR_GENERIC;
 
+       trace = kcalloc(trace_count, sizeof(*trace), GFP_KERNEL);
+       if (!trace) {
+               ret = RAS_CMD__ERROR_GENERIC;
+               goto out;
+       }
+
        ras_log_ring_get_batch_overview(ras_core, &overview);
        for (i = 0; i < req->cper_num; i++) {
                batch_id = req->cper_start_id + i;
@@ -231,7 +238,7 @@ static int ras_cmd_get_cper_records(struct ras_core_context 
*ras_core,
                        break;
 
                count = ras_log_ring_get_batch_records(ras_core, batch_id, 
trace,
-                                       ARRAY_SIZE(trace));
+                                       trace_count);
                if (count > 0) {
                        ret = ras_cper_generate_cper(ras_core, trace, count,
                                        &buffer[offset], req->buf_size - 
offset, &real_data_len);
@@ -244,8 +251,8 @@ static int ras_cmd_get_cper_records(struct ras_core_context 
*ras_core,
 
        if ((ret && (ret != -ENOMEM)) ||
                copy_to_user(u64_to_user_ptr(req->buf_ptr), buffer, offset)) {
-               kfree(buffer);
-               return RAS_CMD__ERROR_GENERIC;
+               ret = RAS_CMD__ERROR_GENERIC;
+               goto out;
        }
 
        rsp->real_data_size = offset;
@@ -254,10 +261,12 @@ static int ras_cmd_get_cper_records(struct 
ras_core_context *ras_core,
        rsp->version = 0;
 
        cmd->output_size = sizeof(struct ras_cmd_cper_record_rsp);
+       ret = RAS_CMD__SUCCESS;
 
+out:
+       kfree(trace);
        kfree(buffer);
-
-       return RAS_CMD__SUCCESS;
+       return ret;
 }
 
 static int ras_cmd_get_batch_trace_snapshot(struct ras_core_context *ras_core,
@@ -291,7 +300,8 @@ static int ras_cmd_get_batch_trace_records(struct 
ras_core_context *ras_core,
        struct ras_cmd_batch_trace_record_rsp *output_data =
                        (struct ras_cmd_batch_trace_record_rsp 
*)cmd->output_buff_raw;
        struct ras_log_batch_overview overview;
-       struct ras_log_info *trace_arry[MAX_RECORD_PER_BATCH] = {0};
+       struct ras_log_info *trace_arry = NULL;
+       uint32_t trace_count = MAX_RECORD_PER_BATCH;
        struct ras_log_info *record;
        int i, j, count = 0, offset = 0;
        uint64_t id;
@@ -309,6 +319,10 @@ static int ras_cmd_get_batch_trace_records(struct 
ras_core_context *ras_core,
            (input_data->start_batch_id >= overview.last_batch_id))
                return RAS_CMD__ERROR_INVALID_INPUT_SIZE;
 
+       trace_arry = kcalloc(trace_count, sizeof(*trace_arry), GFP_KERNEL);
+       if (!trace_arry)
+               return RAS_CMD__ERROR_GENERIC;
+
        for (i = 0; i < input_data->batch_num; i++) {
                id = input_data->start_batch_id + i;
                if (id >= overview.last_batch_id) {
@@ -317,17 +331,17 @@ static int ras_cmd_get_batch_trace_records(struct 
ras_core_context *ras_core,
                }
 
                count = ras_log_ring_get_batch_records(ras_core,
-                                       id, trace_arry, ARRAY_SIZE(trace_arry));
+                                       id, trace_arry, trace_count);
                if (count > 0) {
                        if ((offset + count) > RAS_CMD_MAX_TRACE_NUM)
                                break;
                        for (j = 0; j < count; j++) {
                                record = &output_data->records[offset + j];
-                               record->seqno = trace_arry[j]->seqno;
-                               record->timestamp = trace_arry[j]->timestamp;
-                               record->event = trace_arry[j]->event;
+                               record->seqno = trace_arry[j].seqno;
+                               record->timestamp = trace_arry[j].timestamp;
+                               record->event = trace_arry[j].event;
                                memcpy(&record->aca_reg,
-                                       &trace_arry[j]->aca_reg, 
sizeof(trace_arry[j]->aca_reg));
+                                       &trace_arry[j].aca_reg, 
sizeof(trace_arry[j].aca_reg));
                        }
                } else {
                        count = 0;
@@ -346,6 +360,8 @@ static int ras_cmd_get_batch_trace_records(struct 
ras_core_context *ras_core,
 
        cmd->output_size = sizeof(struct ras_cmd_batch_trace_record_rsp);
 
+       kfree(trace_arry);
+
        return RAS_CMD__SUCCESS;
 }
 
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_cper.c 
b/drivers/gpu/drm/amd/ras/rascore/ras_cper.c
index 0fc7522b7ab6..6e93a13bbc4c 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_cper.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_cper.c
@@ -175,14 +175,14 @@ static int fill_section_runtime(struct ras_core_context 
*ras_core,
 }
 
 static int cper_generate_runtime_record(struct ras_core_context *ras_core,
-       struct cper_section_hdr *hdr, struct ras_log_info **trace_arr, uint32_t 
arr_num,
+       struct cper_section_hdr *hdr, struct ras_log_info *trace_arr, uint32_t 
arr_num,
                enum ras_cper_severity sev)
 {
        struct cper_section_descriptor *descriptor;
        struct cper_section_runtime *runtime;
        int i;
 
-       fill_section_hdr(ras_core, hdr, RAS_CPER_TYPE_RUNTIME, sev, 
trace_arr[0]);
+       fill_section_hdr(ras_core, hdr, RAS_CPER_TYPE_RUNTIME, sev, 
&trace_arr[0]);
        hdr->record_length =  RAS_HDR_LEN + ((RAS_SEC_DESC_LEN + 
RAS_NONSTD_SEC_LEN) * arr_num);
        hdr->sec_cnt = arr_num;
        for (i = 0; i < arr_num; i++) {
@@ -194,21 +194,21 @@ static int cper_generate_runtime_record(struct 
ras_core_context *ras_core,
                fill_section_descriptor(ras_core, descriptor, sev, RUNTIME,
                        RAS_NONSTD_SEC_OFFSET(hdr->sec_cnt, i),
                        sizeof(struct cper_section_runtime));
-               fill_section_runtime(ras_core, runtime, trace_arr[i], sev);
+               fill_section_runtime(ras_core, runtime, &trace_arr[i], sev);
        }
 
        return 0;
 }
 
 static int cper_generate_fatal_record(struct ras_core_context *ras_core,
-       uint8_t *buffer, struct ras_log_info **trace_arr, uint32_t arr_num)
+       uint8_t *buffer, struct ras_log_info *trace_arr, uint32_t arr_num)
 {
        struct ras_cper_fatal_record record = {0};
        int i = 0;
 
        for (i = 0; i < arr_num; i++) {
                fill_section_hdr(ras_core, &record.hdr, RAS_CPER_TYPE_FATAL,
-                                RAS_CPER_SEV_FATAL_UE, trace_arr[i]);
+                                RAS_CPER_SEV_FATAL_UE, &trace_arr[i]);
                record.hdr.record_length =  RAS_HDR_LEN + RAS_SEC_DESC_LEN + 
RAS_FATAL_SEC_LEN;
                record.hdr.sec_cnt = 1;
 
@@ -216,7 +216,7 @@ static int cper_generate_fatal_record(struct 
ras_core_context *ras_core,
                                        CRASHDUMP, offsetof(struct 
ras_cper_fatal_record, fatal),
                                        sizeof(struct cper_section_fatal));
 
-               fill_section_fatal(ras_core, &record.fatal, trace_arr[i]);
+               fill_section_fatal(ras_core, &record.fatal, &trace_arr[i]);
 
                memcpy(buffer + (i * record.hdr.record_length),
                                &record, record.hdr.record_length);
@@ -271,7 +271,7 @@ static enum ras_cper_type 
cper_ras_log_event_to_cper_type(enum ras_log_event eve
 }
 
 int ras_cper_generate_cper(struct ras_core_context *ras_core,
-               struct ras_log_info **trace_list, uint32_t count,
+               struct ras_log_info *trace_list, uint32_t count,
                uint8_t *buf, uint32_t buf_len, uint32_t *real_data_len)
 {
        uint8_t *buffer = buf;
@@ -281,14 +281,14 @@ int ras_cper_generate_cper(struct ras_core_context 
*ras_core,
 
        /* All the batch traces share the same event */
        record_size = cper_get_record_size(
-                       cper_ras_log_event_to_cper_type(trace_list[0]->event), 
count);
+                       cper_ras_log_event_to_cper_type(trace_list[0].event), 
count);
 
        if ((record_size + saved_size) > buf_size)
                return -ENOMEM;
 
        hdr = (struct cper_section_hdr *)(buffer + saved_size);
 
-       switch (trace_list[0]->event) {
+       switch (trace_list[0].event) {
        case RAS_LOG_EVENT_RMA:
                cper_generate_runtime_record(ras_core, hdr, trace_list, count, 
RAS_CPER_SEV_RMA);
                break;
@@ -304,7 +304,7 @@ int ras_cper_generate_cper(struct ras_core_context 
*ras_core,
                cper_generate_fatal_record(ras_core, buffer + saved_size, 
trace_list, count);
                break;
        default:
-               RAS_DEV_WARN(ras_core->dev, "Unprocessed trace event: %d\n", 
trace_list[0]->event);
+               RAS_DEV_WARN(ras_core->dev, "Unprocessed trace event: %d\n", 
trace_list[0].event);
                break;
        }
 
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_cper.h 
b/drivers/gpu/drm/amd/ras/rascore/ras_cper.h
index 076c1883c1ce..e4e3615ecc2e 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_cper.h
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_cper.h
@@ -299,6 +299,6 @@ struct ras_cper_fatal_record {
 struct ras_core_context;
 struct ras_log_info;
 int ras_cper_generate_cper(struct ras_core_context *ras_core,
-               struct ras_log_info **trace_list, uint32_t count,
+               struct ras_log_info *trace_list, uint32_t count,
                uint8_t *buf, uint32_t buf_len, uint32_t *real_data_len);
 #endif
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c 
b/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c
index 0a838fdcb2f6..c2fca1a1e780 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.c
@@ -265,8 +265,8 @@ void ras_log_ring_add_log_event(struct ras_core_context 
*ras_core,
        ras_log_ring_add_data(ras_core, log, batch_tag);
 }
 
-static struct ras_log_info *ras_log_ring_lookup_data(struct ras_core_context 
*ras_core,
-                                       uint64_t idx)
+static int ras_log_ring_lookup_data(struct ras_core_context *ras_core,
+                                       uint64_t idx, struct ras_log_info *log)
 {
        struct ras_log_ring *log_ring = &ras_core->ras_log_ring;
        unsigned long flags = 0;
@@ -274,30 +274,27 @@ static struct ras_log_info 
*ras_log_ring_lookup_data(struct ras_core_context *ra
 
        spin_lock_irqsave(&log_ring->spin_lock, flags);
        data = radix_tree_lookup(&log_ring->ras_log_root, idx);
+       if (data)
+               memcpy(log, data, sizeof(*log));
        spin_unlock_irqrestore(&log_ring->spin_lock, flags);
 
-       return (struct ras_log_info *)data;
+       return data ? 0 : -ENODATA;
 }
 
 int ras_log_ring_get_batch_records(struct ras_core_context *ras_core, uint64_t 
batch_id,
-               struct ras_log_info **log_arr, uint32_t arr_num)
+               struct ras_log_info *log_arr, uint32_t arr_num)
 {
        struct ras_log_ring *log_ring = &ras_core->ras_log_ring;
        uint32_t i, idx, count = 0;
-       void *data;
 
-       if ((batch_id >= log_ring->mono_upward_batch_id) ||
+       if (!log_arr || !arr_num || (batch_id >= 
log_ring->mono_upward_batch_id) ||
                (batch_id < log_ring->last_del_batch_id))
                return -EINVAL;
 
-       for (i = 0; i < MAX_RECORD_PER_BATCH; i++) {
+       for (i = 0; i < MAX_RECORD_PER_BATCH && i < arr_num; i++) {
                idx = BATCH_IDX_TO_TREE_IDX(batch_id, i);
-               data = ras_log_ring_lookup_data(ras_core, idx);
-               if (data) {
-                       log_arr[count++] = data;
-                       if (count >= arr_num)
-                               break;
-               }
+               if (!ras_log_ring_lookup_data(ras_core, idx, &log_arr[count]))
+                       count++;
        }
 
        return count;
diff --git a/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.h 
b/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.h
index 0ff6cc35678d..cb66beaa9f43 100644
--- a/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.h
+++ b/drivers/gpu/drm/amd/ras/rascore/ras_log_ring.h
@@ -86,7 +86,7 @@ void ras_log_ring_add_log_event(struct ras_core_context 
*ras_core,
                enum ras_log_event event, void *data, struct ras_log_batch_tag 
*tag);
 
 int ras_log_ring_get_batch_records(struct ras_core_context *ras_core, uint64_t 
batch_idx,
-               struct ras_log_info **log_arr, uint32_t arr_num);
+               struct ras_log_info *log_arr, uint32_t arr_num);
 
 int ras_log_ring_get_batch_overview(struct ras_core_context *ras_core,
                struct ras_log_batch_overview *overview);
-- 
2.43.0

Reply via email to