Add support for management firmware(MFW) crash dump collection. Signed-off-by: Rasesh Mody <rasesh.mody at qlogic.com> --- drivers/net/qede/base/ecore.h | 3 + drivers/net/qede/base/ecore_dev.c | 22 ++--- drivers/net/qede/base/ecore_dev_api.h | 29 ++++--- drivers/net/qede/base/ecore_mcp.c | 151 ++++++++++++++++++++++++++++++++++ drivers/net/qede/base/ecore_mcp.h | 45 ++++++++++ drivers/net/qede/base/ecore_mcp_api.h | 10 +++ drivers/net/qede/qede_main.c | 17 ++-- 7 files changed, 249 insertions(+), 28 deletions(-)
diff --git a/drivers/net/qede/base/ecore.h b/drivers/net/qede/base/ecore.h index 874c3a3..89e2bd0 100644 --- a/drivers/net/qede/base/ecore.h +++ b/drivers/net/qede/base/ecore.h @@ -735,6 +735,9 @@ struct ecore_dev { bool attn_clr_en; + /* Indicates whether allowing the MFW to collect a crash dump */ + bool mdump_en; + /* Indicates if the reg_fifo is checked after any register access */ bool chk_reg_fifo; diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c index 319edeb..b530173 100644 --- a/drivers/net/qede/base/ecore_dev.c +++ b/drivers/net/qede/base/ecore_dev.c @@ -1619,24 +1619,20 @@ static void ecore_reset_mb_shadow(struct ecore_hwfn *p_hwfn, } enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev, - struct ecore_tunn_start_params *p_tunn, - bool b_hw_start, - enum ecore_int_mode int_mode, - bool allow_npar_tx_switch, - const u8 *bin_fw_data) + struct ecore_hw_init_params *p_params) { enum _ecore_status_t rc, mfw_rc; u32 load_code, param; int i, j; - if ((int_mode == ECORE_INT_MODE_MSI) && (p_dev->num_hwfns > 1)) { + if (p_params->int_mode == ECORE_INT_MODE_MSI && p_dev->num_hwfns > 1) { DP_NOTICE(p_dev, false, "MSI mode is not supported for CMT devices\n"); return ECORE_INVAL; } if (IS_PF(p_dev)) { - rc = ecore_init_fw_data(p_dev, bin_fw_data); + rc = ecore_init_fw_data(p_dev, p_params->bin_fw_data); if (rc != ECORE_SUCCESS) return rc; } @@ -1733,9 +1729,11 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev, /* Fall into */ case FW_MSG_CODE_DRV_LOAD_FUNCTION: rc = ecore_hw_init_pf(p_hwfn, p_hwfn->p_main_ptt, - p_tunn, p_hwfn->hw_info.hw_mode, - b_hw_start, int_mode, - allow_npar_tx_switch); + p_params->p_tunn, + p_hwfn->hw_info.hw_mode, + p_params->b_hw_start, + p_params->int_mode, + p_params->allow_npar_tx_switch); break; default: rc = ECORE_NOTIMPL; @@ -1759,6 +1757,10 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev, return mfw_rc; } + ecore_mcp_mdump_get_info(p_hwfn, p_hwfn->p_main_ptt); + ecore_mcp_mdump_set_values(p_hwfn, p_hwfn->p_main_ptt, + p_params->epoch); + /* send DCBX attention request command */ DP_VERBOSE(p_hwfn, ECORE_MSG_DCB, "sending phony dcbx set command to trigger DCBx attention handling\n"); diff --git a/drivers/net/qede/base/ecore_dev_api.h b/drivers/net/qede/base/ecore_dev_api.h index 1a810b5..042c0af 100644 --- a/drivers/net/qede/base/ecore_dev_api.h +++ b/drivers/net/qede/base/ecore_dev_api.h @@ -57,26 +57,31 @@ enum _ecore_status_t ecore_resc_alloc(struct ecore_dev *p_dev); */ void ecore_resc_setup(struct ecore_dev *p_dev); +struct ecore_hw_init_params { + /* tunnelling parameters */ + struct ecore_tunn_start_params *p_tunn; + bool b_hw_start; + /* interrupt mode [msix, inta, etc.] to use */ + enum ecore_int_mode int_mode; +/* npar tx switching to be used for vports configured for tx-switching */ + + bool allow_npar_tx_switch; + /* binary fw data pointer in binary fw file */ + const u8 *bin_fw_data; + /* the OS Epoch time in seconds */ + u32 epoch; +}; + /** * @brief ecore_hw_init - * * @param p_dev - * @param p_tunn - tunneling parameters - * @param b_hw_start - * @param int_mode - interrupt mode [msix, inta, etc.] to use. - * @param allow_npar_tx_switch - npar tx switching to be used - * for vports configured for tx-switching. - * @param bin_fw_data - binary fw data pointer in binary fw file. - * Pass NULL if not using binary fw file. + * @param p_params * * @return enum _ecore_status_t */ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev, - struct ecore_tunn_start_params *p_tunn, - bool b_hw_start, - enum ecore_int_mode int_mode, - bool allow_npar_tx_switch, - const u8 *bin_fw_data); + struct ecore_hw_init_params *p_params); /** * @brief ecore_hw_timers_stop_all - diff --git a/drivers/net/qede/base/ecore_mcp.c b/drivers/net/qede/base/ecore_mcp.c index cf67fa1..500368e 100644 --- a/drivers/net/qede/base/ecore_mcp.c +++ b/drivers/net/qede/base/ecore_mcp.c @@ -1043,6 +1043,154 @@ static void ecore_mcp_handle_fan_failure(struct ecore_hwfn *p_hwfn, ecore_hw_err_notify(p_hwfn, ECORE_HW_ERR_FAN_FAIL); } +static enum _ecore_status_t +ecore_mcp_mdump_cmd(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt, + u32 mdump_cmd, union drv_union_data *p_data_src, + union drv_union_data *p_data_dst, u32 *p_mcp_resp) +{ + struct ecore_mcp_mb_params mb_params; + enum _ecore_status_t rc; + + OSAL_MEM_ZERO(&mb_params, sizeof(mb_params)); + mb_params.cmd = DRV_MSG_CODE_MDUMP_CMD; + mb_params.param = mdump_cmd; + mb_params.p_data_src = p_data_src; + mb_params.p_data_dst = p_data_dst; + rc = ecore_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + if (rc != ECORE_SUCCESS) + return rc; + + *p_mcp_resp = mb_params.mcp_resp; + if (*p_mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) { + DP_NOTICE(p_hwfn, false, + "MFW claims that the mdump command is illegal [mdump_cmd 0x%x]\n", + mdump_cmd); + rc = ECORE_INVAL; + } + + return rc; +} + +static enum _ecore_status_t ecore_mcp_mdump_ack(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt) +{ + u32 mcp_resp; + + return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MDUMP_ACK, + OSAL_NULL, OSAL_NULL, &mcp_resp); +} + +enum _ecore_status_t ecore_mcp_mdump_set_values(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt, + u32 epoch) +{ + union drv_union_data union_data; + u32 mcp_resp; + + OSAL_MEMCPY(&union_data.raw_data, &epoch, sizeof(epoch)); + + return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MDUMP_SET_VALUES, + &union_data, OSAL_NULL, &mcp_resp); +} + +enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt) +{ + u32 mcp_resp; + + return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MDUMP_TRIGGER, + OSAL_NULL, OSAL_NULL, &mcp_resp); +} + +enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt) +{ + u32 mcp_resp; + + return ecore_mcp_mdump_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MDUMP_CLEAR_LOGS, + OSAL_NULL, OSAL_NULL, &mcp_resp); +} + +static enum _ecore_status_t +ecore_mcp_mdump_get_config(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt, + struct mdump_config_stc *p_mdump_config) +{ + union drv_union_data union_data; + u32 mcp_resp; + enum _ecore_status_t rc; + + rc = ecore_mcp_mdump_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MDUMP_GET_CONFIG, + OSAL_NULL, &union_data, &mcp_resp); + if (rc != ECORE_SUCCESS) + return rc; + + /* A zero response implies that the mdump command is not supported */ + if (!mcp_resp) + return ECORE_NOTIMPL; + + if (mcp_resp != FW_MSG_CODE_OK) { + DP_NOTICE(p_hwfn, false, + "Failed to get the mdump configuration and logs info [mcp_resp 0x%x]\n", + mcp_resp); + rc = ECORE_UNKNOWN_ERROR; + } + + OSAL_MEMCPY(p_mdump_config, &union_data.mdump_config, + sizeof(*p_mdump_config)); + + return rc; +} + +enum _ecore_status_t ecore_mcp_mdump_get_info(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt) +{ + struct mdump_config_stc mdump_config; + enum _ecore_status_t rc; + + rc = ecore_mcp_mdump_get_config(p_hwfn, p_ptt, &mdump_config); + if (rc != ECORE_SUCCESS) + return rc; + + DP_VERBOSE(p_hwfn, ECORE_MSG_SP, + "MFW mdump_config: version 0x%x, config 0x%x, epoch 0x%x, num_of_logs 0x%x, valid_logs 0x%x\n", + mdump_config.version, mdump_config.config, mdump_config.epoc, + mdump_config.num_of_logs, mdump_config.valid_logs); + + if (mdump_config.valid_logs > 0) { + DP_NOTICE(p_hwfn, false, + "* * * IMPORTANT - HW ERROR register dump captured by device * * *\n"); + } + + return rc; +} + +void ecore_mcp_mdump_enable(struct ecore_dev *p_dev, bool mdump_enable) +{ + p_dev->mdump_en = mdump_enable; +} + +static void ecore_mcp_handle_critical_error(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt) +{ + /* In CMT mode - no need for more than a single acknowledgment to the + * MFW, and no more than a single notification to the upper driver. + */ + if (p_hwfn != ECORE_LEADING_HWFN(p_hwfn->p_dev)) + return; + + DP_NOTICE(p_hwfn, false, + "Received a critical error notification from the MFW!\n"); + + if (p_hwfn->p_dev->mdump_en) { + DP_NOTICE(p_hwfn, false, + "Not acknowledging the notification to allow the MFW crash dump\n"); + return; + } + + ecore_mcp_mdump_ack(p_hwfn, p_ptt); + ecore_hw_err_notify(p_hwfn, ECORE_HW_ERR_HW_ATTN); +} + enum _ecore_status_t ecore_mcp_handle_events(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt) { @@ -1104,6 +1252,9 @@ enum _ecore_status_t ecore_mcp_handle_events(struct ecore_hwfn *p_hwfn, case MFW_DRV_MSG_FAILURE_DETECTED: ecore_mcp_handle_fan_failure(p_hwfn, p_ptt); break; + case MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED: + ecore_mcp_handle_critical_error(p_hwfn, p_ptt); + break; default: /* @DPDK */ DP_NOTICE(p_hwfn, false, diff --git a/drivers/net/qede/base/ecore_mcp.h b/drivers/net/qede/base/ecore_mcp.h index 64c639f..d3103ff 100644 --- a/drivers/net/qede/base/ecore_mcp.h +++ b/drivers/net/qede/base/ecore_mcp.h @@ -303,6 +303,51 @@ int __ecore_configure_pf_min_bandwidth(struct ecore_hwfn *p_hwfn, enum _ecore_status_t ecore_mcp_mask_parities(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt, u32 mask_parities); +/** + * @brief - Sends crash mdump related info to the MFW. + * + * @param p_hwfn + * @param p_ptt + * + * @param return ECORE_SUCCESS upon success. + */ +enum _ecore_status_t ecore_mcp_mdump_set_values(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt, + u32 epoch); + +/** + * @brief - Triggers a MFW crash dump procedure. + * + * @param p_hwfn + * @param p_ptt + * + * @param return ECORE_SUCCESS upon success. + */ +enum _ecore_status_t ecore_mcp_mdump_trigger(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt); + +/** + * @brief - Clears the MFW crash dump logs. + * + * @param p_hwfn + * @param p_ptt + * + * @param return ECORE_SUCCESS upon success. + */ +enum _ecore_status_t ecore_mcp_mdump_clear_logs(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt); + +/** + * @brief - Gets the MFW crash dump configuration and logs info. + * + * @param p_hwfn + * @param p_ptt + * + * @param return ECORE_SUCCESS upon success. + */ +enum _ecore_status_t ecore_mcp_mdump_get_info(struct ecore_hwfn *p_hwfn, + struct ecore_ptt *p_ptt); + enum _ecore_status_t ecore_mcp_get_resc_info(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt, struct resource_info *p_resc_info, diff --git a/drivers/net/qede/base/ecore_mcp_api.h b/drivers/net/qede/base/ecore_mcp_api.h index ff4f1ca..c26b494 100644 --- a/drivers/net/qede/base/ecore_mcp_api.h +++ b/drivers/net/qede/base/ecore_mcp_api.h @@ -792,4 +792,14 @@ enum _ecore_status_t ecore_mcp_mem_ecc_events(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt, u64 *num_events); +/** + * @brief Sets whether a critical error notification from the MFW is acked, or + * is it being ignored and thus allowing the MFW crash dump. + * + * @param p_dev + * @param mdump_enable + * + */ +void ecore_mcp_mdump_enable(struct ecore_dev *p_dev, bool mdump_enable); + #endif diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c index e4ef4f0..60655b7 100644 --- a/drivers/net/qede/qede_main.c +++ b/drivers/net/qede/qede_main.c @@ -7,6 +7,7 @@ */ #include <limits.h> +#include <time.h> #include <rte_alarm.h> #include "qede_ethdev.h" @@ -221,6 +222,7 @@ static int qed_slowpath_start(struct ecore_dev *edev, const uint8_t *data = NULL; struct ecore_hwfn *hwfn; struct ecore_mcp_drv_version drv_version; + struct ecore_hw_init_params hw_init_params; struct qede_dev *qdev = (struct qede_dev *)edev; int rc; #ifdef QED_ENC_SUPPORTED @@ -259,7 +261,6 @@ static int qed_slowpath_start(struct ecore_dev *edev, qed_start_iov_task(edev); #endif - /* Start the slowpath */ #ifdef CONFIG_ECORE_BINARY_FW if (IS_PF(edev)) data = (const uint8_t *)edev->firmware + sizeof(u32); @@ -267,6 +268,8 @@ static int qed_slowpath_start(struct ecore_dev *edev, allow_npar_tx_switching = npar_tx_switching ? true : false; + /* Start the slowpath */ + memset(&hw_init_params, 0, sizeof(hw_init_params)); #ifdef QED_ENC_SUPPORTED memset(&tunn_info, 0, sizeof(tunn_info)); tunn_info.tunn_mode |= 1 << QED_MODE_VXLAN_TUNN | @@ -276,12 +279,14 @@ static int qed_slowpath_start(struct ecore_dev *edev, tunn_info.tunn_clss_vxlan = QED_TUNN_CLSS_MAC_VLAN; tunn_info.tunn_clss_l2gre = QED_TUNN_CLSS_MAC_VLAN; tunn_info.tunn_clss_ipgre = QED_TUNN_CLSS_MAC_VLAN; - rc = ecore_hw_init(edev, &tunn_info, true, ECORE_INT_MODE_MSIX, - allow_npar_tx_switching, data); -#else - rc = ecore_hw_init(edev, NULL, true, ECORE_INT_MODE_MSIX, - allow_npar_tx_switching, data); + hw_init_params.p_tunn = &tunn_info; #endif + hw_init_params.b_hw_start = true; + hw_init_params.int_mode = ECORE_INT_MODE_MSIX; + hw_init_params.allow_npar_tx_switch = allow_npar_tx_switching; + hw_init_params.bin_fw_data = data; + hw_init_params.epoch = (u32)time(NULL); + rc = ecore_hw_init(edev, &hw_init_params); if (rc) { DP_ERR(edev, "ecore_hw_init failed\n"); goto err2; -- 1.8.3.1