Add following fields in aer_event to better understand Advisory
Non-Fatal and other errors for external observation:

  - cor_status          (Correctable Error Status)
  - cor_mask            (Correctable Error Mask)
  - uncor_status        (Uncorrectable Error Status)
  - uncor_severity      (Uncorrectable Error Severity)
  - uncor_mask          (Uncorrectable Error Mask)
  - aer_cap_ctrl        (AER Capabilities and Control)
  - link_status         (Link Status)
  - device_status       (Device Status)
  - device_control_2    (Device Control 2)

In addition to the raw register value, value of following fields are
extracted and logged for better observability:

  - "First Error Pointer" and "Completion Timeout Prefix/Header Log
    Capable" from "AER Capabilities and Control"
  - "Completion Timeout Value" and "Completion Timeout Disable"
    from "Device Control 2"

Signed-off-by: "Wang, Qingshun" <qingshun.w...@linux.intel.com>
---
 drivers/pci/pcie/aer.c        | 17 +++++++++++--
 include/ras/ras_event.h       | 48 ++++++++++++++++++++++++++++++++---
 include/uapi/linux/pci_regs.h |  1 +
 3 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index eec3406f727a..2f5639f6c40f 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -757,6 +757,7 @@ void aer_print_error(struct pci_dev *dev, struct 
aer_err_info *info)
        int layer, agent;
        int id = pci_dev_id(dev);
        const char *level;
+       struct aer_capability_regs aer_caps;
 
        if (info->severity == AER_CORRECTABLE) {
                status = info->cor_status;
@@ -793,8 +794,18 @@ void aer_print_error(struct pci_dev *dev, struct 
aer_err_info *info)
        if (info->id && info->error_dev_num > 1 && info->id == id)
                pci_err(dev, "  Error of this Agent is reported first\n");
 
+       aer_caps = (struct aer_capability_regs) {
+         .cor_status = info->cor_status,
+         .cor_mask = info->cor_mask,
+         .uncor_status = info->uncor_status,
+         .uncor_severity = info->uncor_severity,
+         .uncor_mask = info->uncor_mask,
+         .cap_control = info->aer_cap_ctrl
+       };
        trace_aer_event(dev_name(&dev->dev), (status & ~mask),
-                       info->severity, info->tlp_header_valid, &info->tlp);
+                       info->severity, info->tlp_header_valid, &info->tlp,
+                       &aer_caps, info->link_status,
+                       info->device_status, info->device_control_2);
 }
 
 static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
@@ -870,7 +881,9 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
                __print_tlp_header(dev, &aer->header_log);
 
        trace_aer_event(dev_name(&dev->dev), (status & ~mask),
-                       aer_severity, tlp_header_valid, &aer->header_log);
+                       aer_severity, tlp_header_valid, &aer->header_log,
+                       aer, info.link_status,
+                       info.device_status, info.device_control_2);
 }
 EXPORT_SYMBOL_NS_GPL(pci_print_aer, CXL);
 
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index cbd3ddd7c33d..a94997073d90 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -300,9 +300,14 @@ TRACE_EVENT(aer_event,
                 const u32 status,
                 const u8 severity,
                 const u8 tlp_header_valid,
-                struct aer_header_log_regs *tlp),
+                struct aer_header_log_regs *tlp,
+                struct aer_capability_regs *aer_caps,
+                const u16 link_status,
+                const u16 device_status,
+                const u16 device_control_2),
 
-       TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
+       TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp,
+               aer_caps, link_status, device_status, device_control_2),
 
        TP_STRUCT__entry(
                __string(       dev_name,       dev_name        )
@@ -310,6 +315,10 @@ TRACE_EVENT(aer_event,
                __field(        u8,             severity        )
                __field(        u8,             tlp_header_valid)
                __array(        u32,            tlp_header, 4   )
+               __field_struct(struct aer_capability_regs, aer_caps)
+               __field(        u16,            link_status     )
+               __field(        u16,            device_status   )
+               __field(        u16,            device_control_2)
        ),
 
        TP_fast_assign(
@@ -317,6 +326,10 @@ TRACE_EVENT(aer_event,
                __entry->status         = status;
                __entry->severity       = severity;
                __entry->tlp_header_valid = tlp_header_valid;
+               __entry->aer_caps       = *aer_caps;
+               __entry->link_status    = link_status;
+               __entry->device_status  = device_status;
+               __entry->device_control_2 = device_control_2;
                if (tlp_header_valid) {
                        __entry->tlp_header[0] = tlp->dw0;
                        __entry->tlp_header[1] = tlp->dw1;
@@ -325,7 +338,20 @@ TRACE_EVENT(aer_event,
                }
        ),
 
-       TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
+       TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s, "
+                 "Correctable Error Status=0x%08x, "
+                 "Correctable Error Mask=0x%08x, "
+                 "Uncorrectable Error Status=0x%08x, "
+                 "Uncorrectable Error Severity=0x%08x, "
+                 "Uncorrectable Error Mask=0x%08x, "
+                 "AER Capability and Control=0x%08x, "
+                 "First Error Pointer=0x%x, "
+                 "Completion Timeout Prefix/Header Log Capable=%s, "
+                 "Link Status=0x%04x, "
+                 "Device Status=0x%04x, "
+                 "Device Control 2=0x%04x, "
+                 "Completion Timeout Value=0x%x, "
+                 "Completion Timeout Disable=%sn",
                __get_str(dev_name),
                __entry->severity == AER_CORRECTABLE ? "Corrected" :
                        __entry->severity == AER_FATAL ?
@@ -335,7 +361,21 @@ TRACE_EVENT(aer_event,
                __print_flags(__entry->status, "|", aer_uncorrectable_errors),
                __entry->tlp_header_valid ?
                        __print_array(__entry->tlp_header, 4, 4) :
-                       "Not available")
+                       "Not available",
+               __entry->aer_caps.cor_status,
+               __entry->aer_caps.cor_mask,
+               __entry->aer_caps.uncor_status,
+               __entry->aer_caps.uncor_severity,
+               __entry->aer_caps.uncor_mask,
+               __entry->aer_caps.cap_control,
+               PCI_ERR_CAP_FEP(__entry->aer_caps.cap_control),
+               __entry->aer_caps.cap_control & PCI_ERR_CAP_CTO_LOGC ? "True" : 
"False",
+               __entry->link_status,
+               __entry->device_status,
+               __entry->device_control_2,
+               __entry->device_control_2 & PCI_EXP_DEVCTL2_COMP_TIMEOUT,
+               __entry->device_control_2 & PCI_EXP_DEVCTL2_COMP_TMOUT_DIS ?
+                                           "True" : "False")
 );
 
 /*
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index a39193213ff2..54160ed2a8c9 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -787,6 +787,7 @@
 #define  PCI_ERR_CAP_ECRC_GENE 0x00000040      /* ECRC Generation Enable */
 #define  PCI_ERR_CAP_ECRC_CHKC 0x00000080      /* ECRC Check Capable */
 #define  PCI_ERR_CAP_ECRC_CHKE 0x00000100      /* ECRC Check Enable */
+#define  PCI_ERR_CAP_CTO_LOGC  0x00001000      /* Completion Timeout 
Prefix/Header Log Capable */
 #define PCI_ERR_HEADER_LOG     0x1c    /* Header Log Register (16 bytes) */
 #define PCI_ERR_ROOT_COMMAND   0x2c    /* Root Error Command */
 #define  PCI_ERR_ROOT_CMD_COR_EN       0x00000001 /* Correctable Err Reporting 
Enable */
-- 
2.42.0

Reply via email to