From: Xiaofei Tan <tanxiao...@huawei.com> This patch logs detail error info of ROCEE ECC and AXI errors for debug purpose, and remove unnecessary reset for ROCEE overflow errors.
Signed-off-by: Xiaofei Tan <tanxiao...@huawei.com> Signed-off-by: Huazhong Tan <tanhuazh...@huawei.com> --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 2 + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 87 ++++++++++++++++++++-- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h | 1 + 3 files changed, 83 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 61cb10d..5e6c749 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -268,6 +268,8 @@ enum hclge_opcode_type { HCLGE_CONFIG_ROCEE_RAS_INT_EN = 0x1580, HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581, HCLGE_ROCEE_PF_RAS_INT_CMD = 0x1584, + HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD = 0x1585, + HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD = 0x1586, HCLGE_IGU_EGU_TNL_INT_EN = 0x1803, HCLGE_IGU_COMMON_INT_EN = 0x1806, HCLGE_TM_QCN_MEM_INT_CFG = 0x1A14, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 784512d..64defd3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1388,6 +1388,66 @@ static int hclge_handle_all_ras_errors(struct hclge_dev *hdev) return ret; } +static int hclge_log_rocee_axi_error(struct hclge_dev *hdev) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc[3]; + int ret; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, + true); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, + true); + hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, + true); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], 3); + if (ret) { + dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret); + return ret; + } + + dev_info(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n", + le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), + le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), + le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); + dev_info(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n", + le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]), + le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]), + le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5])); + dev_info(dev, "AXI3: %08X %08X %08X %08X\n", + le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]), + le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3])); + + return 0; +} + +static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc[2]; + int ret; + + ret = hclge_cmd_query_error(hdev, &desc[0], + HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD, + HCLGE_CMD_FLAG_NEXT, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret); + return ret; + } + + dev_info(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n", + le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), + le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), + le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); + dev_info(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]), + le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2])); + + return 0; +} + static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) { struct device *dev = &hdev->pdev->dev; @@ -1456,19 +1516,33 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) status = le32_to_cpu(desc[0].data[0]); - if (status & HCLGE_ROCEE_RERR_INT_MASK) { - dev_warn(dev, "ROCEE RAS AXI rresp error\n"); - reset_type = HNAE3_FUNC_RESET; - } + if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) { + if (status & HCLGE_ROCEE_RERR_INT_MASK) + dev_warn(dev, "ROCEE RAS AXI rresp error\n"); + + if (status & HCLGE_ROCEE_BERR_INT_MASK) + dev_warn(dev, "ROCEE RAS AXI bresp error\n"); - if (status & HCLGE_ROCEE_BERR_INT_MASK) { - dev_warn(dev, "ROCEE RAS AXI bresp error\n"); reset_type = HNAE3_FUNC_RESET; + + ret = hclge_log_rocee_axi_error(hdev); + if (ret) { + dev_err(dev, "failed(%d) to process axi error\n", ret); + /* reset everything for now */ + return HNAE3_GLOBAL_RESET; + } } if (status & HCLGE_ROCEE_ECC_INT_MASK) { dev_warn(dev, "ROCEE RAS 2bit ECC error\n"); reset_type = HNAE3_GLOBAL_RESET; + + ret = hclge_log_rocee_ecc_error(hdev); + if (ret) { + dev_err(dev, "failed(%d) to process ecc error\n", ret); + /* reset everything for now */ + return HNAE3_GLOBAL_RESET; + } } if (status & HCLGE_ROCEE_OVF_INT_MASK) { @@ -1478,7 +1552,6 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) /* reset everything for now */ return HNAE3_GLOBAL_RESET; } - reset_type = HNAE3_FUNC_RESET; } /* clear error status */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h index 81d115a..6684733 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -94,6 +94,7 @@ #define HCLGE_ROCEE_RAS_CE_INT_EN_MASK 0x1 #define HCLGE_ROCEE_RERR_INT_MASK BIT(0) #define HCLGE_ROCEE_BERR_INT_MASK BIT(1) +#define HCLGE_ROCEE_AXI_ERR_INT_MASK GENMASK(1, 0) #define HCLGE_ROCEE_ECC_INT_MASK BIT(2) #define HCLGE_ROCEE_OVF_INT_MASK BIT(3) #define HCLGE_ROCEE_OVF_ERR_INT_MASK 0x10000 -- 2.7.4