[PATCH 3/3] habanalabs: ignore f/w status error
In case firmware has a bug and erroneously reports a status error (e.g. device unusable) during boot, allow the user to tell the driver to continue the boot regardless of the error status. This will be done via kernel parameter which exposes a mask. The user that loads the driver can decide exactly which status error to ignore and which to take into account. The bitmask is according to defines in hl_boot_if.h Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c| 3 ++- drivers/misc/habanalabs/common/habanalabs.h | 7 +++ drivers/misc/habanalabs/common/habanalabs_drv.c | 7 +++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 377a7ca886fe..0713b2c12d54 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -400,7 +400,8 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, err_exists = true; } - if (err_exists) + if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) & + lower_32_bits(hdev->boot_error_status_mask))) return -EIO; return 0; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 91291a8e201e..6579f8767abd 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1962,6 +1962,12 @@ struct hl_mmu_funcs { * @clock_gating_mask: is clock gating enabled. bitmask that represents the * different engines. See debugfs-driver-habanalabs for * details. + * @boot_error_status_mask: contains a mask of the device boot error status. + * Each bit represents a different error, according to + * the defines in hl_boot_if.h. If the bit is cleared, + * the error will be ignored by the driver during + * device initialization. Mainly used to debug and + * workaround firmware bugs * @in_reset: is device in reset flow. * @curr_pll_profile: current PLL profile. * @card_type: Various ASICs have several card types. This indicates the card @@ -2077,6 +2083,7 @@ struct hl_device { u64 timeout_jiffies; u64 max_power; u64 clock_gating_mask; + u64 boot_error_status_mask; atomic_tin_reset; enum hl_pll_frequency curr_pll_profile; enum cpucp_card_types card_type; diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 7135f1e03864..64d1530db985 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -30,6 +30,7 @@ static DEFINE_MUTEX(hl_devs_idr_lock); static int timeout_locked = 30; static int reset_on_lockup = 1; static int memory_scrub = 1; +static ulong boot_error_status_mask = ULONG_MAX; module_param(timeout_locked, int, 0444); MODULE_PARM_DESC(timeout_locked, @@ -43,6 +44,10 @@ module_param(memory_scrub, int, 0444); MODULE_PARM_DESC(memory_scrub, "Scrub device memory in various states (0 = no, 1 = yes, default yes)"); +module_param(boot_error_status_mask, ulong, 0444); +MODULE_PARM_DESC(boot_error_status_mask, + "Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)"); + #define PCI_VENDOR_ID_HABANALABS 0x1da3 #define PCI_IDS_GOYA 0x0001 @@ -319,6 +324,8 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, hdev->major = hl_major; hdev->reset_on_lockup = reset_on_lockup; hdev->memory_scrub = memory_scrub; + hdev->boot_error_status_mask = boot_error_status_mask; + hdev->pldm = 0; set_driver_behavior_per_device(hdev); -- 2.25.1
[PATCH 2/3] habanalabs: change error level of security not ready
This error indicates a problem in the security initialization inside the f/w so we need to stop the device loading because it won't be usable. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 7 ++- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index fff29f057b6d..377a7ca886fe 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -362,12 +362,9 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, } if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) { - dev_warn(hdev->dev, + dev_err(hdev->dev, "Device boot warning - security not ready\n"); - /* This is a warning so we don't want it to disable the -* device -*/ - err_val &= ~CPU_BOOT_ERR0_SECURITY_NOT_RDY; + err_exists = true; } if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) { -- 2.25.1
[PATCH 1/3] habanalabs: skip reading f/w errors on bad status
If we read all FF from the boot status register, then something is totally wrong and there is no point of reading specific errors. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 7cf82da67dab..fff29f057b6d 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -850,8 +850,13 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, if (rc) { dev_err(hdev->dev, "Failed to read preboot version\n"); detect_cpu_boot_status(hdev, status); - fw_read_errors(hdev, boot_err0_reg, - cpu_security_boot_status_reg); + + /* If we read all FF, then something is totally wrong, no point +* of reading specific errors +*/ + if (status != -1) + fw_read_errors(hdev, boot_err0_reg, + cpu_security_boot_status_reg); return -EIO; } -- 2.25.1
Re: [PATCH 2/3] habanalabs: support legacy and new pll indexes
Thanks Nathan, I have already a pending patch that fixes this issue: https://lkml.org/lkml/2021/4/17/73 On Thu, Apr 15, 2021 at 5:17 PM Nathan Chancellor wrote: > > On Sun, Mar 21, 2021 at 10:11:29PM +0200, Oded Gabbay wrote: > > From: Ohad Sharabi > > > > In order to use minimum of hard coded values common to LKD and F/W > > a dynamic method to work with PLLs is introduced in this patch. > > Formerly asic specific PLL numbering is now common for all asics. > > To be backward compatible a bit in dev status is defined, if the bit is > > not set LKD will keep working with old PLL numbering. > > > > Signed-off-by: Ohad Sharabi > > Reviewed-by: Oded Gabbay > > Signed-off-by: Oded Gabbay > > --- > > drivers/misc/habanalabs/common/firmware_if.c | 49 ++- > > drivers/misc/habanalabs/common/habanalabs.h | 14 -- > > drivers/misc/habanalabs/common/sysfs.c| 24 ++--- > > drivers/misc/habanalabs/gaudi/gaudi.c | 33 + > > drivers/misc/habanalabs/goya/goya.c | 26 ++ > > .../misc/habanalabs/include/common/cpucp_if.h | 41 > > .../habanalabs/include/common/hl_boot_if.h| 6 +++ > > .../habanalabs/include/gaudi/gaudi_fw_if.h| 14 -- > > .../misc/habanalabs/include/goya/goya_fw_if.h | 11 - > > 9 files changed, 182 insertions(+), 36 deletions(-) > > > > diff --git a/drivers/misc/habanalabs/common/firmware_if.c > > b/drivers/misc/habanalabs/common/firmware_if.c > > index 2a58edaf984a..092691a8917d 100644 > > --- a/drivers/misc/habanalabs/common/firmware_if.c > > +++ b/drivers/misc/habanalabs/common/firmware_if.c > > @@ -539,18 +539,63 @@ int hl_fw_cpucp_total_energy_get(struct hl_device > > *hdev, u64 *total_energy) > > return rc; > > } > > > > -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, > > +int get_used_pll_index(struct hl_device *hdev, enum pll_index > > input_pll_index, > > + enum pll_index *pll_index) > > +{ > > + struct asic_fixed_properties *prop = &hdev->asic_prop; > > + u8 pll_byte, pll_bit_off; > > + bool dynamic_pll; > > + > > + if (input_pll_index >= PLL_MAX) { > > + dev_err(hdev->dev, "PLL index %d is out of range\n", > > + input_pll_index); > > + return -EINVAL; > > + } > > + > > + dynamic_pll = prop->fw_security_status_valid && > > + (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN); > > + > > + if (!dynamic_pll) { > > + /* > > + * in case we are working with legacy FW (each asic has unique > > + * PLL numbering) extract the legacy numbering > > + */ > > + *pll_index = hdev->legacy_pll_map[input_pll_index]; > > + return 0; > > + } > > + > > + /* PLL map is a u8 array */ > > + pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3]; > > + pll_bit_off = input_pll_index & 0x7; > > + > > + if (!(pll_byte & BIT(pll_bit_off))) { > > + dev_err(hdev->dev, "PLL index %d is not supported\n", > > + input_pll_index); > > + return -EINVAL; > > + } > > + > > + *pll_index = input_pll_index; > > + > > + return 0; > > +} > > + > > +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index > > pll_index, > > u16 *pll_freq_arr) > > { > > struct cpucp_packet pkt; > > + enum pll_index used_pll_idx; > > u64 result; > > int rc; > > > > + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); > > + if (rc) > > + return rc; > > + > > memset(&pkt, 0, sizeof(pkt)); > > > > pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET << > > CPUCP_PKT_CTL_OPCODE_SHIFT); > > - pkt.pll_type = __cpu_to_le16(pll_index); > > + pkt.pll_type = __cpu_to_le16((u16)used_pll_idx); > > > > rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, > > sizeof(pkt), > > HL_CPUCP_INFO_TIMEOUT_USEC, &result); > > diff --git a/drivers/misc/habanalabs/common/habanalabs.h > > b/drivers/misc/habanalabs/common/habanalabs.h > &
[PATCH 3/7] habanalabs: update firmware files to latest
Update the firmware files to the latest from the firmware team. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/include/common/cpucp_if.h | 4 +++- drivers/misc/habanalabs/include/common/hl_boot_if.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index 27cd0ba99aa3..bf10ca8d2457 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -107,7 +107,9 @@ enum pq_init_status { PQ_INIT_STATUS_NA = 0, PQ_INIT_STATUS_READY_FOR_CP, PQ_INIT_STATUS_READY_FOR_HOST, - PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI + PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI, + PQ_INIT_STATUS_LEN_NOT_POWER_OF_TWO_ERR, + PQ_INIT_STATUS_ILLEGAL_Q_ADDR_ERR }; /* diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index e0a259e0495c..84c14688d69a 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -8,7 +8,7 @@ #ifndef HL_BOOT_IF_H #define HL_BOOT_IF_H -#define LKD_HARD_RESET_MAGIC 0xED7BD694 +#define LKD_HARD_RESET_MAGIC 0xED7BD694 /* deprecated - do not use */ #define HL_POWER9_HOST_MAGIC 0x1DA30009 #define BOOT_FIT_SRAM_OFFSET 0x20 -- 2.25.1
[PATCH 2/7] habanalabs: increase ELBI reset timeout for PLDM
From: Moti Haimovski On PLDM, in case of NIC hangs, the ELBI reset to take much longer than expected. As a result an increase in the ELBI reset timeout is required. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c index e941b7eef346..5d07ca53d9ce 100644 --- a/drivers/misc/habanalabs/common/pci/pci.c +++ b/drivers/misc/habanalabs/common/pci/pci.c @@ -10,7 +10,7 @@ #include -#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10) +#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 100) #define IATU_REGION_CTRL_REGION_EN_MASKBIT(31) #define IATU_REGION_CTRL_MATCH_MODE_MASK BIT(30) -- 2.25.1
[PATCH 4/7] habanalabs: prepare preboot stage to dynamic f/w load
From: Ohad Sharabi Start the skeleton for the dynamic F/W load by marking current preboot code path as legacy. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 79 drivers/misc/habanalabs/common/habanalabs.h | 8 +- 2 files changed, 71 insertions(+), 16 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 7cf82da67dab..337c76fb5e3c 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -811,21 +811,15 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) } } -int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, - u32 cpu_security_boot_status_reg, u32 boot_err0_reg, - u32 timeout) +static int hl_fw_read_preboot_caps(struct hl_device *hdev, + u32 cpu_boot_status_reg, + u32 cpu_boot_caps_reg, + u32 boot_err0_reg, u32 timeout) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u32 status, security_status; + u32 status; int rc; - /* pldm was added for cases in which we use preboot on pldm and want -* to load boot fit, but we can't wait for preboot because it runs -* very slowly -*/ - if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm) - return 0; - /* Need to check two possible scenarios: * * CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where @@ -848,18 +842,41 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, timeout); if (rc) { - dev_err(hdev->dev, "Failed to read preboot version\n"); + dev_err(hdev->dev, "CPU boot ready status timeout\n"); detect_cpu_boot_status(hdev, status); - fw_read_errors(hdev, boot_err0_reg, - cpu_security_boot_status_reg); + fw_read_errors(hdev, boot_err0_reg, cpu_boot_status_reg); return -EIO; } + prop->fw_preboot_caps_map = RREG32(cpu_boot_caps_reg); + + /* +* For now- force dynamic_fw_load to false as LKD does not yet +* implements all necessary parts of it. +* TODO: once dynamic load is ready set to: +* prop->dynamic_fw_load = !!(prop->fw_preboot_caps_map & +*CPU_BOOT_DEV_STS0_FW_LD_COM_EN) +*/ + prop->dynamic_fw_load = 0; + + dev_dbg(hdev->dev, "Attempting %s FW load\n", + prop->dynamic_fw_load ? "dynamic" : "legacy"); + return 0; +} + +static int hl_fw_read_preboot_status_legacy(struct hl_device *hdev, + u32 cpu_boot_status_reg, u32 cpu_security_boot_status_reg, + u32 boot_err0_reg, u32 timeout) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 security_status; + int rc; + rc = hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT); if (rc) return rc; - security_status = RREG32(cpu_security_boot_status_reg); + security_status = prop->fw_preboot_caps_map; /* We read security status multiple times during boot: * 1. preboot - a. Check whether the security status bits are valid @@ -901,6 +918,38 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, return 0; } +int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 cpu_boot_caps_reg, u32 boot_err0_reg, + u32 timeout) +{ + int rc; + + /* pldm was added for cases in which we use preboot on pldm and want +* to load boot fit, but we can't wait for preboot because it runs +* very slowly +*/ + if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm) + return 0; + + /* +* In order to determine boot method (static VS dymanic) we need to +* read the boot caps register +*/ + rc = hl_fw_read_preboot_caps(hdev, cpu_boot_status_reg, + cpu_boot_caps_reg, boot_err0_reg, + timeout); + if (rc) + return rc; + + if (!hdev->asic_prop.dynamic_fw_load) + return hl_fw_read_preboot_status_legacy(hdev, cpu_boot_status_reg, + cpu_boot_caps_reg, boot_err0_reg, + timeout); + + dev_err(hdev->dev, "Dynamic FW load is not supported\n")
[PATCH 7/7] habanalabs: use mmu cache range invalidation
From: Alon Mizrahi Use mmu cache range invalidation instead of entire cache invalidation because it yields better performance. In GOYA and GAUDI, always use entire cache invalidation because these ASICs don't support range invalidation. Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 2 +- drivers/misc/habanalabs/common/memory.c | 8 ++-- drivers/misc/habanalabs/gaudi/gaudi.c | 49 ++-- drivers/misc/habanalabs/goya/goya.c | 51 ++--- 4 files changed, 16 insertions(+), 94 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 9ee7a583b614..07c2713ba372 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1035,7 +1035,7 @@ struct hl_asic_funcs { int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard, u32 flags); int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, - u32 asid, u64 va, u64 size); + u32 flags, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); void (*set_clock_gating)(struct hl_device *hdev); void (*disable_clock_gating)(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 2938cbbafbbc..b92878d76f23 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1117,7 +1117,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, goto map_err; } - rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type); + rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false, + *vm_type, ctx->asid, ret_vaddr, phys_pg_pack->total_size); mutex_unlock(&ctx->mmu_lock); @@ -1261,8 +1262,9 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, * at the loop end rather than for each iteration */ if (!ctx_free) - rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true, - *vm_type); + rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true, + *vm_type, ctx->asid, vaddr, + phys_pg_pack->total_size); mutex_unlock(&ctx->mmu_lock); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 45c40549ded4..1d5930578e07 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7863,52 +7863,13 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, } static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, - bool is_hard, u32 asid, u64 va, u64 size) + bool is_hard, u32 flags, + u32 asid, u64 va, u64 size) { - struct gaudi_device *gaudi = hdev->asic_specific; - u32 status, timeout_usec; - u32 inv_data; - u32 pi; - int rc; - - if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || - hdev->hard_reset_pending) - return 0; - - if (hdev->pldm) - timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; - else - timeout_usec = MMU_CONFIG_TIMEOUT_USEC; - - /* -* TODO: currently invalidate entire L0 & L1 as in regular hard -* invalidation. Need to apply invalidation of specific cache -* lines with mask of ASID & VA & size. -* Note that L1 with be flushed entirely in any case. + /* Treat as invalidate all because there is no range invalidation +* in Gaudi */ - - /* L0 & L1 invalidation */ - inv_data = RREG32(mmSTLB_CACHE_INV); - /* PI is 8 bit */ - pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF; - WREG32(mmSTLB_CACHE_INV, - (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi); - - rc = hl_poll_timeout( - hdev, - mmSTLB_INV_CONSUMER_INDEX, - status, - status == pi, - 1000, - timeout_usec); - - if (rc) { - dev_err_ratelimited(hdev->dev, - "MMU cache invalidation timeout\n"); - hl_device_reset(hdev, HL_RESET_HARD); - } - - return rc; + return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); } static int gaudi_mmu_update_a
[PATCH 5/7] habanalabs: request f/w in separate function
From: Ohad Sharabi This refactor is needed due to the dynamic FW load in which requesting the FW file (and getting its attributes) is not immediately followed by copying FW file content. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 65 +--- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 337c76fb5e3c..0a9e3cb86552 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -12,6 +12,47 @@ #include #define FW_FILE_MAX_SIZE 0x140 /* maximum size of 20MB */ + +static int hl_request_fw(struct hl_device *hdev, + const struct firmware **firmware_p, + const char *fw_name) +{ + size_t fw_size; + int rc; + + rc = request_firmware(firmware_p, fw_name, hdev->dev); + if (rc) { + dev_err(hdev->dev, "Firmware file %s is not found! (error %d)\n", + fw_name, rc); + goto out; + } + + fw_size = (*firmware_p)->size; + if ((fw_size % 4) != 0) { + dev_err(hdev->dev, "Illegal %s firmware size %zu\n", + fw_name, fw_size); + rc = -EINVAL; + goto release_fw; + } + + dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size); + + if (fw_size > FW_FILE_MAX_SIZE) { + dev_err(hdev->dev, + "FW file size %zu exceeds maximum of %u bytes\n", + fw_size, FW_FILE_MAX_SIZE); + rc = -EINVAL; + goto release_fw; + } + + return 0; + +release_fw: + release_firmware(*firmware_p); +out: + return rc; +} + /** * hl_fw_load_fw_to_device() - Load F/W code to device's memory. * @@ -33,29 +74,11 @@ int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, size_t fw_size; int rc; - rc = request_firmware(&fw, fw_name, hdev->dev); - if (rc) { - dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name); - goto out; - } + rc = hl_request_fw(hdev, &fw, fw_name); + if (rc) + return rc; fw_size = fw->size; - if ((fw_size % 4) != 0) { - dev_err(hdev->dev, "Illegal %s firmware size %zu\n", - fw_name, fw_size); - rc = -EINVAL; - goto out; - } - - dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size); - - if (fw_size > FW_FILE_MAX_SIZE) { - dev_err(hdev->dev, - "FW file size %zu exceeds maximum of %u bytes\n", - fw_size, FW_FILE_MAX_SIZE); - rc = -EINVAL; - goto out; - } if (size - src_offset > fw_size) { dev_err(hdev->dev, -- 2.25.1
[PATCH 6/7] habanalabs: refactor init device cpu code
From: Ohad Sharabi Replace multiple arguments to init device CPU function by passing firmware loader managing structure that is initialized per ASIC with the loader parameters. In addition, the FW loader management structure is now part of the habanalabs device, this way the loader parameters will be able to be communicated across various boot stages. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 30 -- drivers/misc/habanalabs/common/habanalabs.h | 32 +--- drivers/misc/habanalabs/gaudi/gaudi.c| 24 ++- drivers/misc/habanalabs/goya/goya.c | 24 ++- 4 files changed, 82 insertions(+), 28 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 0a9e3cb86552..a97d9c264adc 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -973,18 +973,26 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, return -EINVAL; } -int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, - u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, - u32 cpu_security_boot_status_reg, u32 boot_err0_reg, - bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout) +int hl_fw_init_cpu(struct hl_device *hdev) { + u32 cpu_msg_status_reg, cpu_timeout, msg_to_cpu_reg, status; + u32 cpu_boot_status_reg, cpu_security_boot_status_reg; struct asic_fixed_properties *prop = &hdev->asic_prop; - u32 status; + struct fw_load_mgr *fw_loader; int rc; if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) return 0; + /* init loader parameters */ + hdev->asic_funcs->init_firmware_loader(hdev); + fw_loader = &hdev->fw_loader; + cpu_security_boot_status_reg = fw_loader->cpu_boot_status_reg; + cpu_msg_status_reg = fw_loader->cpu_cmd_status_to_host_reg; + cpu_boot_status_reg = fw_loader->cpu_boot_status_reg; + msg_to_cpu_reg = fw_loader->kmd_msg_to_cpu_reg; + cpu_timeout = fw_loader->cpu_timeout; + dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n", cpu_timeout / USEC_PER_SEC); @@ -995,7 +1003,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, status, status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT, 1, - boot_fit_timeout); + fw_loader->boot_fit_timeout); if (rc) { dev_dbg(hdev->dev, @@ -1018,7 +1026,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, status, status == CPU_MSG_OK, 1, - boot_fit_timeout); + fw_loader->boot_fit_timeout); if (rc) { dev_err(hdev->dev, @@ -1088,7 +1096,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, if (rc) goto out; - if (skip_bmc) { + if (fw_loader->skip_bmc) { WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC); rc = hl_poll_timeout( @@ -1135,7 +1143,8 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, goto out; } - rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg); + rc = fw_read_errors(hdev, fw_loader->boot_err0_reg, + cpu_security_boot_status_reg); if (rc) return rc; @@ -1164,7 +1173,8 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, return 0; out: - fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg); + fw_read_errors(hdev, fw_loader->boot_err0_reg, + cpu_security_boot_status_reg); return rc; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 6441a270e10a..9ee7a583b614 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -818,6 +818,28 @@ enum div_select_defs { DIV_SEL_DIVIDED_PLL = 3, }; +/** + * struct fw_load_mgr - manager FW loading process + * @kmd_msg_to_cpu_reg: register address for KMD->CPU messages + * @cpu_cmd_status_to_host_reg: register address for CPU command status response + * @cpu_boot_status_reg: boot status register + * @cpu_boot_dev_status_reg: boot device status register + * @boot_err0_reg: boot error register + * @cpu_timeout: CPU response timeout in usec + * @boot_fit_timeout: Boot fit load timeout in usec + * @skip_bmc:
[PATCH 1/7] habanalabs: expose ASIC specific PLL index
From: Bharat Jauhari Currently the user cannot interpret the PLL information based on index as its exposed as an integer. This commit exposes ASIC specific PLL indexes and maps it to a generic FW compatible index. Signed-off-by: Bharat Jauhari Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 34 +++- drivers/misc/habanalabs/common/habanalabs.h | 16 +++--- drivers/misc/habanalabs/common/sysfs.c | 4 +- drivers/misc/habanalabs/gaudi/gaudi.c| 55 +++- drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c | 12 ++--- drivers/misc/habanalabs/goya/goya.c | 47 +++-- drivers/misc/habanalabs/goya/goya_hwmgr.c| 40 +++--- include/uapi/misc/habanalabs.h | 33 8 files changed, 127 insertions(+), 114 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 832dd5c5bb06..7cf82da67dab 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -661,18 +661,13 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) return rc; } -int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, +int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index, enum pll_index *pll_index) { struct asic_fixed_properties *prop = &hdev->asic_prop; u8 pll_byte, pll_bit_off; bool dynamic_pll; - - if (input_pll_index >= PLL_MAX) { - dev_err(hdev->dev, "PLL index %d is out of range\n", - input_pll_index); - return -EINVAL; - } + int fw_pll_idx; dynamic_pll = prop->fw_security_status_valid && (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN); @@ -680,28 +675,39 @@ int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, if (!dynamic_pll) { /* * in case we are working with legacy FW (each asic has unique -* PLL numbering) extract the legacy numbering +* PLL numbering) use the driver based index as they are +* aligned with fw legacy numbering */ - *pll_index = hdev->legacy_pll_map[input_pll_index]; + *pll_index = input_pll_index; return 0; } + /* retrieve a FW compatible PLL index based on +* ASIC specific user request +*/ + fw_pll_idx = hdev->asic_funcs->map_pll_idx_to_fw_idx(input_pll_index); + if (fw_pll_idx < 0) { + dev_err(hdev->dev, "Invalid PLL index (%u) error %d\n", + input_pll_index, fw_pll_idx); + return -EINVAL; + } + /* PLL map is a u8 array */ - pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3]; - pll_bit_off = input_pll_index & 0x7; + pll_byte = prop->cpucp_info.pll_map[fw_pll_idx >> 3]; + pll_bit_off = fw_pll_idx & 0x7; if (!(pll_byte & BIT(pll_bit_off))) { dev_err(hdev->dev, "PLL index %d is not supported\n", - input_pll_index); + fw_pll_idx); return -EINVAL; } - *pll_index = input_pll_index; + *pll_index = fw_pll_idx; return 0; } -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index, u16 *pll_freq_arr) { struct cpucp_packet pkt; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 44e89da30b4a..91291a8e201e 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -930,6 +930,9 @@ enum div_select_defs { * driver is ready to receive asynchronous events. This * function should be called during the first init and * after every hard-reset of the device + * @get_msi_info: Retrieve asic-specific MSI ID of the f/w async event + * @map_pll_idx_to_fw_idx: convert driver specific per asic PLL index to + * generic f/w compatible PLL Indexes */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -1054,6 +1057,7 @@ struct hl_asic_funcs { u32 block_id, u32 block_size); void (*enable_events_from_fw)(struct hl_device *hdev); void (*get_msi_info)(u32 *table); + int (*map_pll_idx_to_fw_idx)(u32 pll_idx); }; @@ -1950,8 +1954,6 @@ struct hl_mmu_
Re: [PATCH][next] habanalabs/gaudi: Fix uninitialized return code rc when read size is zero
On Mon, Apr 12, 2021 at 9:41 PM Arnd Bergmann wrote: > > On Mon, Apr 12, 2021 at 6:11 PM Colin King wrote: > > > > From: Colin Ian King > > > > In the case where size is zero the while loop never assigns rc and the > > return value is uninitialized. Fix this by initializing rc to zero. > > > > Addresses-Coverity: ("Uninitialized scalar variable") > > Fixes: 639781dcab82 ("habanalabs/gaudi: add debugfs to DMA from the device") > > Signed-off-by: Colin Ian King > > --- > > drivers/misc/habanalabs/gaudi/gaudi.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c > > b/drivers/misc/habanalabs/gaudi/gaudi.c > > index 8730b691ec61..b751652f80a8 100644 > > --- a/drivers/misc/habanalabs/gaudi/gaudi.c > > +++ b/drivers/misc/habanalabs/gaudi/gaudi.c > > @@ -6252,7 +6252,7 @@ static int gaudi_debugfs_read_dma(struct hl_device > > *hdev, u64 addr, u32 size, > > dma_addr_t dma_addr; > > void *kernel_addr; > > bool is_eng_idle; > > - int rc, dma_id; > > + int rc = 0, dma_id; > > > > kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent( > > hdev, SZ_2M, > > > In general, I don't like adding initializations during the declaration as that > tends to hide warnings for the cases where a later initialization is > missing. In this case it looks correct though. > > Acked-by: Arnd Bergmann I don't mind taking this patch for eliminating the warning but fyi, the caller function (hl_dma_size_write) checks that the size is not zero. If the size is zero, we never reach this function. Greg, do you mind applying it directly to your -next branch ? I don't have anything pending and I'm too lazy sending a pull request on a single patch ;) Reviewed-by: Oded Gabbay Thanks, Oded
[git pull] habanalabs pull request for kernel 5.13
Hi Greg, This is habanalabs pull request for the merge window of kernel 5.13. It contains changes and new features, support for new firmware. Details are in the tag. Thanks, Oded The following changes since commit b195b20b7145bcae22ad261abc52d68336f5e913: Merge tag 'extcon-next-for-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/extcon into char-misc-next (2021-04-08 08:45:30 +0200) are available in the Git repository at: https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git tags/misc-habanalabs-next-2021-04-10 for you to fetch changes up to b575a7673e3d0396992fc72fce850723d39264e3: habanalabs: print f/w boot unknown error (2021-04-09 14:10:32 +0300) This tag contains habanalabs driver changes for v5.13: - Add support to reset device after the user closes the file descriptor. Because we support a single user, we can reset the device (if needs to) after a user closes its file descriptor to make sure the device is in idle and clean state for the next user. - Add a new feature to allow the user to wait on interrupt. This is needed for future ASICs - Replace GFP_ATOMIC with GFP_KERNEL wherever possible and add code to support failure of allocating with GFP_ATOMIC. - Update code to support the latest firmware image: - More security features are done in the firmware - Remove hard-coded assumptions and replace them with values that are sent to the firmware on loading. - Print device unusable error - Reset device in case the communication between driver and firmware gets out of sync. - Support new PCI device ids for secured GAUDI. - Expose current power draw through the INFO IOCTL. - Support resetting the device upon a request from the BMC (through F/W). - Always use only a single MSI in GAUDI, due to H/W limitation. - Improve data-path code by taking out code from spinlock protection. - Allow user to specify custom timeout per Command Submission. - Some enhancements to debugfs. - Various minor changes and improvements. Alon Mizrahi (1): habanalabs: add custom timeout flag per cs Bharat Jauhari (1): habanalabs: move dram scrub to free sequence Koby Elbaz (2): habanalabs: improve utilization calculation habanalabs: support DEVICE_UNUSABLE error indication from FW Oded Gabbay (11): habanalabs: reset after device is actually released habanalabs: fail reset if device is not idle habanalabs: reset_upon_device_release is for bring-up habanalabs: print if device is used on FD close habanalabs: change default CS timeout to 30 seconds habanalabs: use correct define for 32-bit max value habanalabs/gaudi: always use single-msi mode habanalabs/gaudi: add debugfs to DMA from the device habanalabs: remove the store jobs array from CS IOCTL habanalabs: use strscpy instead of sprintf and strlcpy habanalabs: print f/w boot unknown error Ofir Bitton (13): habanalabs: add reset support when user closes FD habanalabs: enable all IRQs for user interrupt support habanalabs: wait for interrupt support habanalabs: use a single FW loading bringup flag habanalabs/gaudi: update extended async event header habanalabs: replace GFP_ATOMIC with GFP_KERNEL habanalabs: debugfs access to user mapped host addresses habanalabs/gaudi: reset device upon BMC request habanalabs/gaudi: unsecure TPC cfg status registers habanalabs/gaudi: Update async events header habanalabs: move relevant datapath work outside cs lock habanalabs/gaudi: derive security status from pci id habanalabs/gaudi: skip iATU if F/W security is enabled Ohad Sharabi (6): habanalabs: reset device in case of sync error habanalabs: skip DISABLE PCI packet to FW on heartbeat habanalabs: update hl_boot_if.h habanalabs: support legacy and new pll indexes habanalabs: send dynamic msi-x indexes to f/w habanalabs: update to latest F/W communication header Sagiv Ozeri (2): habanalabs: support HW blocks vm show habanalabs: return current power via INFO IOCTL Tomer Tayar (1): habanalabs/gaudi: clear QM errors only if not in stop_on_err mode Yang Li (1): habanalabs: Switch to using the new API kobj_to_dev() farah kassabri (3): habanalabs: set max asid to 2 habanalabs: avoid soft lockup bug upon mapping error habanalabs/gaudi: sync stream add protection to SOB reset flow .../ABI/testing/debugfs-driver-habanalabs | 70 +++- drivers/misc/habanalabs/common/command_buffer.c| 12 +- .../misc/habanalabs/common/command_submission.c| 368 + drivers/misc/habanalabs/common/context.c | 14 +- drivers/misc/habanalabs/common/debugfs.c | 224 +++-- drivers/misc/habanal
[PATCH 7/7] habanalabs: update to latest F/W communication header
From: Ohad Sharabi update files to latest version from F/W team. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/include/common/cpucp_if.h | 3 +- .../habanalabs/include/common/hl_boot_if.h| 198 ++ 2 files changed, 200 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index 20a710f7a369..27cd0ba99aa3 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -11,6 +11,8 @@ #include #include +#include "hl_boot_if.h" + #define NUM_HBM_PSEUDO_CH 2 #define NUM_HBM_CH_PER_DEV 8 #define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT0 @@ -564,7 +566,6 @@ struct eq_generic_event { */ #define CARD_NAME_MAX_LEN 16 -#define VERSION_MAX_LEN128 #define CPUCP_MAX_SENSORS 128 #define CPUCP_MAX_NICS 128 #define CPUCP_LANES_PER_NIC4 diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 980b432fd76e..e0a259e0495c 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -13,6 +13,8 @@ #define BOOT_FIT_SRAM_OFFSET 0x20 +#define VERSION_MAX_LEN128 + /* * CPU error bits in BOOT_ERROR registers * @@ -251,6 +253,7 @@ enum kmd_msg { KMD_MSG_SKIP_BMC, RESERVED, KMD_MSG_RST_DEV, + KMD_MSG_LAST }; enum cpu_msg_status { @@ -259,4 +262,199 @@ enum cpu_msg_status { CPU_MSG_ERR, }; +/* communication registers mapping - consider ABI when changing */ +struct cpu_dyn_regs { + uint32_t cpu_pq_base_addr_low; + uint32_t cpu_pq_base_addr_high; + uint32_t cpu_pq_length; + uint32_t cpu_pq_init_status; + uint32_t cpu_eq_base_addr_low; + uint32_t cpu_eq_base_addr_high; + uint32_t cpu_eq_length; + uint32_t cpu_eq_ci; + uint32_t cpu_cq_base_addr_low; + uint32_t cpu_cq_base_addr_high; + uint32_t cpu_cq_length; + uint32_t cpu_pf_pq_pi; + uint32_t cpu_boot_dev_sts0; + uint32_t cpu_boot_dev_sts1; + uint32_t cpu_boot_err0; + uint32_t cpu_boot_err1; + uint32_t cpu_boot_status; + uint32_t fw_upd_sts; + uint32_t fw_upd_cmd; + uint32_t fw_upd_pending_sts; + uint32_t fuse_ver_offset; + uint32_t preboot_ver_offset; + uint32_t uboot_ver_offset; + uint32_t hw_state; + uint32_t kmd_msg_to_cpu; + uint32_t cpu_cmd_status_to_host; + uint32_t reserved1[32]; /* reserve for future use */ +}; + +/* HCDM - Habana Communications Descriptor Magic */ +#define HL_COMMS_DESC_MAGIC0x4843444D +#define HL_COMMS_DESC_VER 1 + +/* this is the comms descriptor header - meta data */ +struct comms_desc_header { + uint32_t magic; /* magic for validation */ + uint32_t crc32; /* CRC32 of the descriptor w/o header */ + uint16_t size; /* size of the descriptor w/o header */ + uint8_t version;/* descriptor version */ + uint8_t reserved[5];/* pad to 64 bit */ +}; + +/* this is the main FW descriptor - consider ABI when changing */ +struct lkd_fw_comms_desc { + struct comms_desc_header header; + struct cpu_dyn_regs cpu_dyn_regs; + char fuse_ver[VERSION_MAX_LEN]; + char cur_fw_ver[VERSION_MAX_LEN]; + /* can be used for 1 more version w/o ABI change */ + char reserved0[VERSION_MAX_LEN]; + uint64_t img_addr; /* address for next FW component load */ +}; + +/* + * LKD commands: + * + * COMMS_NOOP Used to clear the command register and no actual + * command is send. + * + * COMMS_CLR_STS Clear status command - FW should clear the + * status register. Used for synchronization + * between the commands as part of the race free + * protocol. + * + * COMMS_RST_STATE Reset the current communication state which is + * kept by FW for proper responses. + * Should be used in the beginning of the + * communication cycle to clean any leftovers from + * previous communication attempts. + * + * COMMS_PREP_DESC Prepare descriptor for setting up the + * communication and other dynamic data: + * struct lkd_fw_comms_desc. + * This command has a parameter stating the next FW + * component size, so the FW can actually
[PATCH 6/7] habanalabs: print on f/w boot unknown error
We need to print a message to the kernel log in case we encounter an unknown error in the f/w boot to help the user understand what happened. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 652571d3b8e6..4c096b6132b5 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -351,8 +351,12 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, dev_dbg(hdev->dev, "Device security status %#x\n", security_val); - if (err_val & ~CPU_BOOT_ERR0_ENABLED) + if (err_val & ~CPU_BOOT_ERR0_ENABLED) { + dev_err(hdev->dev, + "Device boot error - unknown error 0x%08x\n", + err_val); return -EIO; + } return 0; } -- 2.25.1
[PATCH 5/7] habanalabs/gaudi: skip iATU if F/W security is enabled
From: Ofir Bitton As part of the securing GAUDI, the F/W will configure the PCI iATU regions. If the driver identifies a secured PCI ID, it will know to skip iATU configuration in a very early stage. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 3 ++ drivers/misc/habanalabs/common/pci/pci.c| 52 + drivers/misc/habanalabs/gaudi/gaudi.c | 23 + drivers/misc/habanalabs/goya/goya.c | 24 +- 4 files changed, 101 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index c1b46126c522..44e89da30b4a 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -445,6 +445,7 @@ struct hl_mmu_properties { * @dram_supports_virtual_memory: is there an MMU towards the DRAM * @hard_reset_done_by_fw: true if firmware is handling hard reset flow * @num_functional_hbms: number of functional HBMs in each DCORE. + * @iatu_done_by_fw: true if iATU configuration is being done by FW. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -508,6 +509,7 @@ struct asic_fixed_properties { u8 dram_supports_virtual_memory; u8 hard_reset_done_by_fw; u8 num_functional_hbms; + u8 iatu_done_by_fw; }; /** @@ -2400,6 +2402,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); +int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data); int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data); int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, struct hl_inbound_pci_region *pci_region); diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c index b799f9258fb0..e941b7eef346 100644 --- a/drivers/misc/habanalabs/common/pci/pci.c +++ b/drivers/misc/habanalabs/common/pci/pci.c @@ -85,6 +85,58 @@ static void hl_pci_bars_unmap(struct hl_device *hdev) pci_release_regions(pdev); } +int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data) +{ + struct pci_dev *pdev = hdev->pdev; + ktime_t timeout; + u64 msec; + u32 val; + + if (hdev->pldm) + msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC; + else + msec = HL_PCI_ELBI_TIMEOUT_MSEC; + + /* Clear previous status */ + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0); + + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr); + pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, 0); + + timeout = ktime_add_ms(ktime_get(), msec); + for (;;) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val); + if (val & PCI_CONFIG_ELBI_STS_MASK) + break; + if (ktime_compare(ktime_get(), timeout) > 0) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, + &val); + break; + } + + usleep_range(300, 500); + } + + if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) { + pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data); + + return 0; + } + + if (val & PCI_CONFIG_ELBI_STS_ERR) { + dev_err(hdev->dev, "Error reading from ELBI\n"); + return -EIO; + } + + if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { + dev_err(hdev->dev, "ELBI read didn't finish in time\n"); + return -EIO; + } + + dev_err(hdev->dev, "ELBI read has undefined bits in status\n"); + return -EIO; +} + /** * hl_pci_elbi_write() - Write through the ELBI interface. * @hdev: Pointer to hl_device structure. diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 841748392e49..8730b691ec61 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -629,6 +629,11 @@ static int gaudi_init_iatu(struct hl_device *hdev) struct hl_outbound_pci_region outbound_region; int rc; + if (hdev->asic_prop.iatu_done_by_fw) { + hdev->asic_funcs->set_dma_mask_from_fw(hdev); + return 0; + } + /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ inbound_region.mode = PCI_BAR_MATCH_MODE; inbound_region.bar = SRAM_BAR_ID; @@ -673,6 +678,7 @@ static int gaudi_early_init(struct hl_device *
[PATCH 4/7] habanalabs/gaudi: derive security status from pci id
From: Ofir Bitton As F/ security indication must be available before driver approaches PCI bus, F/W security should be derived from PCI id rather than be fetched during boot handshake with F/W. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 4 drivers/misc/habanalabs/common/firmware_if.c | 6 +++--- drivers/misc/habanalabs/common/habanalabs.h | 4 +++- .../misc/habanalabs/common/habanalabs_drv.c | 21 +++ drivers/misc/habanalabs/common/mmu/mmu.c | 1 + drivers/misc/habanalabs/common/sysfs.c| 3 +++ drivers/misc/habanalabs/gaudi/gaudi.c | 2 -- drivers/misc/habanalabs/goya/goya.c | 2 -- 8 files changed, 35 insertions(+), 8 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 2ed4f2bedc08..00e92b678828 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -327,6 +327,10 @@ static int device_early_init(struct hl_device *hdev) gaudi_set_asic_funcs(hdev); strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name)); break; + case ASIC_GAUDI_SEC: + gaudi_set_asic_funcs(hdev); + strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name)); + break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", hdev->asic_type); diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 532a2fd7bfb4..652571d3b8e6 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -819,16 +819,16 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, if (security_status & CPU_BOOT_DEV_STS0_ENABLED) { prop->fw_security_status_valid = 1; + /* FW security should be derived from PCI ID, we keep this +* check for backward compatibility +*/ if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN) prop->fw_security_disabled = false; - else - prop->fw_security_disabled = true; if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) prop->hard_reset_done_by_fw = true; } else { prop->fw_security_status_valid = 0; - prop->fw_security_disabled = true; } dev_dbg(hdev->dev, "Firmware preboot security status %#x\n", diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 867986ef4588..c1b46126c522 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -766,11 +766,13 @@ struct hl_eq { * @ASIC_INVALID: Invalid ASIC type. * @ASIC_GOYA: Goya device. * @ASIC_GAUDI: Gaudi device. + * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000). */ enum hl_asic_type { ASIC_INVALID, ASIC_GOYA, - ASIC_GAUDI + ASIC_GAUDI, + ASIC_GAUDI_SEC }; struct hl_cs_parser; diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 59896566dca1..7135f1e03864 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -47,10 +47,12 @@ MODULE_PARM_DESC(memory_scrub, #define PCI_IDS_GOYA 0x0001 #define PCI_IDS_GAUDI 0x1000 +#define PCI_IDS_GAUDI_SEC 0x1010 static const struct pci_device_id ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, + { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), }, { 0, } }; MODULE_DEVICE_TABLE(pci, ids); @@ -74,6 +76,9 @@ static enum hl_asic_type get_asic_type(u16 device) case PCI_IDS_GAUDI: asic_type = ASIC_GAUDI; break; + case PCI_IDS_GAUDI_SEC: + asic_type = ASIC_GAUDI_SEC; + break; default: asic_type = ASIC_INVALID; break; @@ -82,6 +87,16 @@ static enum hl_asic_type get_asic_type(u16 device) return asic_type; } +static bool is_asic_secured(enum hl_asic_type asic_type) +{ + switch (asic_type) { + case ASIC_GAUDI_SEC: + return true; + default: + return false; + } +} + /* * hl_device_open - open function for habanalabs device * @@ -287,6 +302,12 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, hdev->asic_type =
[PATCH 3/7] habanalabs: move dram scrub to free sequence
From: Bharat Jauhari DRAM scrubbing can take time hence it adds to latency during allocation. To minimize latency during initialization, scrubbing is moved to release call. In case scrubbing fails it means the device is in a bad state, hence HARD reset is initiated. Signed-off-by: Bharat Jauhari Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/memory.c | 87 ++--- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 6530fddbbc21..2938cbbafbbc 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -81,16 +81,6 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, num_pgs, total_size); return -ENOMEM; } - - if (hdev->memory_scrub) { - rc = hdev->asic_funcs->scrub_device_mem(hdev, paddr, - total_size); - if (rc) { - dev_err(hdev->dev, - "Failed to scrub contiguous device memory\n"); - goto pages_pack_err; - } - } } phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); @@ -128,17 +118,6 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, goto page_err; } - if (hdev->memory_scrub) { - rc = hdev->asic_funcs->scrub_device_mem(hdev, - phys_pg_pack->pages[i], - page_size); - if (rc) { - dev_err(hdev->dev, - "Failed to scrub device memory\n"); - goto page_err; - } - } - num_curr_pgs++; } } @@ -280,37 +259,67 @@ static void dram_pg_pool_do_release(struct kref *ref) * @phys_pg_pack: physical page pack to free. * * This function does the following: - * - For DRAM memory only, iterate over the pack and free each physical block - * structure by returning it to the general pool. + * - For DRAM memory only + * - iterate over the pack, scrub and free each physical block structure by + * returning it to the general pool. + * In case of error during scrubbing, initiate hard reset. + * Once hard reset is triggered, scrubbing is bypassed while freeing the + * memory continues. * - Free the hl_vm_phys_pg_pack structure. */ -static void free_phys_pg_pack(struct hl_device *hdev, +static int free_phys_pg_pack(struct hl_device *hdev, struct hl_vm_phys_pg_pack *phys_pg_pack) { struct hl_vm *vm = &hdev->vm; u64 i; + int rc = 0; + + if (phys_pg_pack->created_from_userptr) + goto end; - if (!phys_pg_pack->created_from_userptr) { - if (phys_pg_pack->contiguous) { - gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], + if (phys_pg_pack->contiguous) { + if (hdev->memory_scrub && !hdev->disabled) { + rc = hdev->asic_funcs->scrub_device_mem(hdev, + phys_pg_pack->pages[0], phys_pg_pack->total_size); + if (rc) + dev_err(hdev->dev, + "Failed to scrub contiguous device memory\n"); + } - for (i = 0; i < phys_pg_pack->npages ; i++) - kref_put(&vm->dram_pg_pool_refcount, - dram_pg_pool_do_release); - } else { - for (i = 0 ; i < phys_pg_pack->npages ; i++) { - gen_pool_free(vm->dram_pg_pool, + gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], + phys_pg_pack->total_size); + + for (i = 0; i < phys_pg_pack->npages ; i++) + kref_put(&vm->dram_pg_pool_refcount, + dram_pg_pool_do_release); + } else { + for (i = 0 ; i < phys_pg_pack->npages ; i++) { + if (hdev->memory_scrub && !hdev->disabled && rc == 0) { +
[PATCH 2/7] habanalabs: send dynamic msi-x indexes to f/w
From: Ohad Sharabi In order to minimize hard coded values between F/W and the driver, we send msi-x indexes dynamically to the F/W. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 67 + drivers/misc/habanalabs/common/habanalabs.h | 4 + drivers/misc/habanalabs/gaudi/gaudi.c | 2 +- drivers/misc/habanalabs/goya/goya.c | 2 +- .../misc/habanalabs/include/common/cpucp_if.h | 75 ++- 5 files changed, 131 insertions(+), 19 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index d81a8d537373..532a2fd7bfb4 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -422,6 +422,73 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev, return rc; } +static int hl_fw_send_msi_info_msg(struct hl_device *hdev) +{ + struct cpucp_array_data_packet *pkt; + size_t total_pkt_size, data_size; + u64 result; + int rc; + + /* skip sending this info for unsupported ASICs */ + if (!hdev->asic_funcs->get_msi_info) + return 0; + + data_size = CPUCP_NUM_OF_MSI_TYPES * sizeof(u32); + total_pkt_size = sizeof(struct cpucp_array_data_packet) + data_size; + + /* data should be aligned to 8 bytes in order to CPU-CP to copy it */ + total_pkt_size = (total_pkt_size + 0x7) & ~0x7; + + /* total_pkt_size is casted to u16 later on */ + if (total_pkt_size > USHRT_MAX) { + dev_err(hdev->dev, "CPUCP array data is too big\n"); + return -EINVAL; + } + + pkt = kzalloc(total_pkt_size, GFP_KERNEL); + if (!pkt) + return -ENOMEM; + + pkt->length = cpu_to_le32(CPUCP_NUM_OF_MSI_TYPES); + + hdev->asic_funcs->get_msi_info((u32 *)&pkt->data); + + pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_MSI_INFO_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)pkt, + total_pkt_size, 0, &result); + + /* +* in case packet result is invalid it means that FW does not support +* this feature and will use default/hard coded MSI values. no reason +* to stop the boot +*/ + if (rc && result == cpucp_packet_invalid) + rc = 0; + + if (rc) + dev_err(hdev->dev, "failed to send CPUCP array data\n"); + + kfree(pkt); + + return rc; +} + +int hl_fw_cpucp_handshake(struct hl_device *hdev, + u32 cpu_security_boot_status_reg, + u32 boot_err0_reg) +{ + int rc; + + rc = hl_fw_cpucp_info_get(hdev, cpu_security_boot_status_reg, + boot_err0_reg); + if (rc) + return rc; + + return hl_fw_send_msi_info_msg(hdev); +} + int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) { struct cpucp_packet pkt = {}; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index d89ae4c3d634..867986ef4588 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1049,6 +1049,7 @@ struct hl_asic_funcs { int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, u32 block_id, u32 block_size); void (*enable_events_from_fw)(struct hl_device *hdev); + void (*get_msi_info)(u32 *table); }; @@ -2374,6 +2375,9 @@ int hl_fw_send_heartbeat(struct hl_device *hdev); int hl_fw_cpucp_info_get(struct hl_device *hdev, u32 cpu_security_boot_status_reg, u32 boot_err0_reg); +int hl_fw_cpucp_handshake(struct hl_device *hdev, + u32 cpu_security_boot_status_reg, + u32 boot_err0_reg); int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 791434278904..62e3c63bec20 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7966,7 +7966,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0); + rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0); if (rc) return rc; diff --git a/driv
[PATCH 1/7] habanalabs/gaudi: clear QM errors only if not in stop_on_err mode
From: Tomer Tayar Clearing QM errors by the driver will prevent these H/W blocks from stopping in case they are configured to stop on errors, so perform this clearing only if this mode is not in use. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 03d3fb643e79..791434278904 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7086,7 +7086,8 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev, } /* Write 1 clear errors */ - WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); + if (!hdev->stop_on_err) + WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); } arb_err_val = RREG32(arb_err_addr); -- 2.25.1
[PATCH] habanalabs: support DEVICE_UNUSABLE error indication from FW
From: Koby Elbaz In case of multiple ECC errors, FW will set the DEVICE_UNUSABLE bit. On boot-up, the driver will therefore fail inserting the device. Signed-off-by: Koby Elbaz Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c| 3 +++ drivers/misc/habanalabs/include/common/hl_boot_if.h | 4 2 files changed, 7 insertions(+) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 092691a8917d..d81a8d537373 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -342,6 +342,9 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, dev_err(hdev->dev, "Device boot error - eFuse failure\n"); if (err_val & CPU_BOOT_ERR0_PLL_FAIL) dev_err(hdev->dev, "Device boot error - PLL failure\n"); + if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) + dev_err(hdev->dev, + "Device boot error - device unusable failure\n"); security_val = RREG32(cpu_security_boot_status_reg); if (security_val & CPU_BOOT_DEV_STS0_ENABLED) diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 1717874ff306..980b432fd76e 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -73,6 +73,9 @@ * CPU_BOOT_ERR0_PLL_FAIL PLL settings failed, meaning that one * of the PLLs remains in REF_CLK * + * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL Device is unusable and customer support + * should be contacted. + * * CPU_BOOT_ERR0_ENABLED Error registers enabled. * This is a main indication that the * running FW populates the error @@ -92,6 +95,7 @@ #define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << 10) #define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << 11) #define CPU_BOOT_ERR0_PLL_FAIL (1 << 12) +#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << 13) #define CPU_BOOT_ERR0_ENABLED (1 << 31) /* -- 2.25.1
[PATCH] habanalabs: use strscpy instead of sprintf and strlcpy
Prefer the use of strscpy when copying the ASIC name into a char array, to prevent accidentally exceeding the array's length. In addition, strlcpy is frowned upon so replace it. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index e64c60d48d42..2ed4f2bedc08 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -321,11 +321,11 @@ static int device_early_init(struct hl_device *hdev) switch (hdev->asic_type) { case ASIC_GOYA: goya_set_asic_funcs(hdev); - strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); + strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); break; case ASIC_GAUDI: gaudi_set_asic_funcs(hdev); - sprintf(hdev->asic_name, "GAUDI"); + strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name)); break; default: dev_err(hdev->dev, "Unrecognized ASIC type %d\n", -- 2.25.1
[PATCH] habanalabs: remove the store jobs array from CS IOCTL
The store part was never implemented in the code and never been used by the userspace applications. We currently use the related parameters to a different purpose with a defined union. However, there is no point in that and it is better to just remove the union and the store parameters. Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 35 ++ 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 90798eaac728..d3e017b5f0db 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -644,17 +644,10 @@ struct hl_cs_in { /* holds address of array of hl_cs_chunk for execution phase */ __u64 chunks_execute; - union { - /* this holds address of array of hl_cs_chunk for store phase - -* Currently not in use -*/ - __u64 chunks_store; - - /* Sequence number of a staged submission CS -* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set -*/ - __u64 seq; - }; + /* Sequence number of a staged submission CS +* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set +*/ + __u64 seq; /* Number of chunks in restore phase array. Maximum number is * HL_MAX_JOBS_PER_CS @@ -666,18 +659,10 @@ struct hl_cs_in { */ __u32 num_chunks_execute; - union { - /* Number of chunks in restore phase array - -* Currently not in use -*/ - __u32 num_chunks_store; - - /* timeout in seconds - valid only if HL_CS_FLAGS_CUSTOM_TIMEOUT -* is set. this parameter is ignored in case of future multiple -* users support. -*/ - __u32 timeout; - }; + /* timeout in seconds - valid only if HL_CS_FLAGS_CUSTOM_TIMEOUT +* is set +*/ + __u32 timeout; /* HL_CS_FLAGS_* */ __u32 cs_flags; @@ -1051,8 +1036,8 @@ struct hl_debug_args { * Each JOB will be enqueued on a specific queue, according to the user's input. * There can be more then one JOB per queue. * - * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase, - * a second set is for "execution" phase and a third set is for "store" phase. + * The CS IOCTL will receive two sets of JOBS. One set is for "restore" phase + * and a second set is for "execution" phase. * The JOBS on the "restore" phase are enqueued only after context-switch * (or if its the first CS for this context). The user can also order the * driver to run the "restore" phase explicitly -- 2.25.1
[PATCH 3/3] habanalabs/gaudi: add debugfs to DMA from the device
When trying to debug program, the user often needs to dump large parts of the device's DRAM, which can reach to tens of GBs. Because reading from the device's internal memory through the PCI BAR is extremely slow, the debug can take hours. Instead, we can provide the user to copy data through one of the DMA engines. This will make the operation much faster. Currently, only GAUDI is supported. In GAUDI, we need to find a PCI DMA engine that is IDLE and set the DMA as secured to be able to bypass our MMU as we currently don't map the temporary buffer to the MMU. Example bash one-line to dump entire HBM to file (~2 minutes): for (( i=0x0; i < 0x8; i+=0x800 )); do \ printf '0x%x\n' $i | sudo tee /sys/kernel/debug/habanalabs/hl0/addr ; \ echo 0x800 | sudo tee /sys/kernel/debug/habanalabs/hl0/dma_size ; \ sudo cat /sys/kernel/debug/habanalabs/hl0/data_dma >> hbm.txt ; done Signed-off-by: Oded Gabbay --- .../ABI/testing/debugfs-driver-habanalabs | 68 ++-- drivers/misc/habanalabs/common/debugfs.c | 118 - drivers/misc/habanalabs/common/habanalabs.h | 17 +- drivers/misc/habanalabs/gaudi/gaudi.c | 159 ++ drivers/misc/habanalabs/gaudi/gaudiP.h| 2 + drivers/misc/habanalabs/goya/goya.c | 8 + 6 files changed, 352 insertions(+), 20 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index f9e233cbdc37..c78fc9282876 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -82,6 +82,24 @@ Description:Allows the root user to read or write 64 bit data directly If the IOMMU is disabled, it also allows the root user to read or write from the host a device VA of a host mapped memory +What: /sys/kernel/debug/habanalabs/hl/data_dma +Date: Apr 2021 +KernelVersion: 5.13 +Contact:ogab...@kernel.org +Description:Allows the root user to read from the device's internal +memory (DRAM/SRAM) through a DMA engine. +This property is a binary blob that contains the result of the +DMA transfer. +This custom interface is needed (instead of using the generic +Linux user-space PCI mapping) because the amount of internal +memory is huge (>32GB) and reading it via the PCI bar will take +a very long time. +This interface doesn't support concurrency in the same device. +In GAUDI and GOYA, this action can cause undefined behavior +in case the it is done while the device is executing user +workloads. +Only supported on GAUDI at this stage. + What: /sys/kernel/debug/habanalabs/hl/device Date: Jan 2019 KernelVersion: 5.1 @@ -90,6 +108,24 @@ Description:Enables the root user to set the device to specific state. Valid values are "disable", "enable", "suspend", "resume". User can read this property to see the valid values +What: /sys/kernel/debug/habanalabs/hl/dma_size +Date: Apr 2021 +KernelVersion: 5.13 +Contact:ogab...@kernel.org +Description:Specify the size of the DMA transaction when using DMA to read +from the device's internal memory. The value can not be larger +than 128MB. Writing to this value initiates the DMA transfer. +When the write is finished, the user can read the "data_dma" +blob + +What: /sys/kernel/debug/habanalabs/hl/dump_security_violations +Date: Jan 2021 +KernelVersion: 5.12 +Contact:ogab...@kernel.org +Description:Dumps all security violations to dmesg. This will also ack +all security violations meanings those violations will not be +dumped next time user calls this API + What: /sys/kernel/debug/habanalabs/hl/engines Date: Jul 2019 KernelVersion: 5.3 @@ -154,6 +190,16 @@ Description:Displays the hop values and physical address for a given ASID e.g. to display info about VA 0x1000 for ASID 1 you need to do: echo "1 0x1000" > /sys/kernel/debug/habanalabs/hl0/mmu +What: /sys/kernel/debug/habanalabs/hl/mmu_error +Date: Mar 2021 +KernelVersion: 5.12 +Contact:fkassa...@habana.ai +Description:Check and display page fault or access violation mmu errors for +all MMUs specified in mmu_cap_mask. +e.g. to display error info for MMU hw cap bit 9, you need to do: +echo "0x200" > /sys/kernel/debug/habanalabs/hl0/mmu_error +
[PATCH 2/3] habanalabs/gaudi: sync stream add protection to SOB reset flow
From: farah kassabri Since we moved the SOB reset flow to workqueue and not part of the fence release flow, we might reach a scenario where new context is created while we in the middle of resetting the SOB. in such cases the reset may fail due to idle check. This will mess up the streams sync since the SOB value is invalid. so we protect this area with a mutex, to delay context creation. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 16 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index f273b792bc5d..bd711c8d3874 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -5729,18 +5729,26 @@ static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, static int gaudi_schedule_register_memset(struct hl_device *hdev, u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val) { - struct hl_ctx *ctx = hdev->compute_ctx; + struct hl_ctx *ctx; struct hl_pending_cb *pending_cb; struct packet_msg_long *pkt; u32 cb_size, ctl; struct hl_cb *cb; - int i; + int i, rc; + + mutex_lock(&hdev->fpriv_list_lock); + ctx = hdev->compute_ctx; /* If no compute context available or context is going down * memset registers directly */ - if (!ctx || kref_read(&ctx->refcount) == 0) - return gaudi_memset_registers(hdev, reg_base, num_regs, val); + if (!ctx || kref_read(&ctx->refcount) == 0) { + rc = gaudi_memset_registers(hdev, reg_base, num_regs, val); + mutex_unlock(&hdev->fpriv_list_lock); + return rc; + } + + mutex_unlock(&hdev->fpriv_list_lock); cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot) * 2; -- 2.25.1
[PATCH 1/3] habanalabs: add custom timeout flag per cs
From: Alon Mizrahi There is a need to allow to user to send command submissions with custom timeout as some CS take longer than the max timeout that is used by default. Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c| 35 +++ drivers/misc/habanalabs/common/habanalabs.h | 2 ++ drivers/misc/habanalabs/common/hw_queue.c | 2 +- include/uapi/misc/habanalabs.h| 15 ++-- 4 files changed, 36 insertions(+), 18 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 21a60b7c2091..ff8791a651fd 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -467,8 +467,7 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) if (next_entry_found && !next->tdr_active) { next->tdr_active = true; - schedule_delayed_work(&next->work_tdr, - hdev->timeout_jiffies); + schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); } spin_unlock(&hdev->cs_mirror_lock); @@ -622,7 +621,7 @@ static void cs_timedout(struct work_struct *work) static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, enum hl_cs_type cs_type, u64 user_sequence, - struct hl_cs **cs_new) + struct hl_cs **cs_new, u32 flags, u32 timeout) { struct hl_cs_counters_atomic *cntr; struct hl_fence *other = NULL; @@ -649,6 +648,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs->submitted = false; cs->completed = false; cs->type = cs_type; + cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); + cs->timeout_jiffies = timeout; INIT_LIST_HEAD(&cs->job_list); INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); kref_init(&cs->refcount); @@ -1092,7 +1093,8 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, } static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, - u32 num_chunks, u64 *cs_seq, u32 flags) + u32 num_chunks, u64 *cs_seq, u32 flags, + u32 timeout) { bool staged_mid, int_queues_only = true; struct hl_device *hdev = hpriv->hdev; @@ -1121,11 +1123,11 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, staged_mid = false; rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, - staged_mid ? user_sequence : ULLONG_MAX, &cs); + staged_mid ? user_sequence : ULLONG_MAX, &cs, flags, + timeout); if (rc) goto free_cs_chunk_array; - cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); *cs_seq = cs->sequence; hl_debugfs_add_cs(cs); @@ -1323,7 +1325,8 @@ static int hl_submit_pending_cb(struct hl_fpriv *hpriv) list_move_tail(&pending_cb->cb_node, &local_cb_list); spin_unlock(&ctx->pending_cb_lock); - rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs); + rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0, + hdev->timeout_jiffies); if (rc) goto add_list_elements; @@ -1424,7 +1427,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, rc = 0; } else { rc = cs_ioctl_default(hpriv, chunks, num_chunks, - cs_seq, 0); + cs_seq, 0, hdev->timeout_jiffies); } mutex_unlock(&hpriv->restore_phase_mutex); @@ -1594,7 +1597,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, void __user *chunks, u32 num_chunks, - u64 *cs_seq, bool timestamp) + u64 *cs_seq, u32 flags, u32 timeout) { struct hl_cs_chunk *cs_chunk_array, *chunk; struct hw_queue_properties *hw_queue_prop; @@ -1700,7 +1703,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, } } - rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs); + rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout); if (rc) { if (cs_type == CS_TYPE_WAIT ||
[PATCH 3/3] habanalabs: improve utilization calculation
From: Koby Elbaz The new approach is based on the notion that the relative current power consumption is in relation of proportionality to device's true utilization. Utilization info ranges between [0,100]% Currently, dc_power values are hard-coded. Signed-off-by: Koby Elbaz Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c| 18 --- drivers/misc/habanalabs/common/device.c | 121 ++ drivers/misc/habanalabs/common/habanalabs.h | 25 +--- .../misc/habanalabs/common/habanalabs_ioctl.c | 11 +- drivers/misc/habanalabs/common/hw_queue.c | 8 -- drivers/misc/habanalabs/gaudi/gaudi.c | 20 ++- drivers/misc/habanalabs/gaudi/gaudiP.h| 3 + drivers/misc/habanalabs/goya/goya.c | 1 + drivers/misc/habanalabs/goya/goyaP.h | 2 + 9 files changed, 40 insertions(+), 169 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index ba6d3e317255..21a60b7c2091 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -505,24 +505,6 @@ static void cs_do_release(struct kref *ref) goto out; } - hdev->asic_funcs->hw_queues_lock(hdev); - - hdev->cs_active_cnt--; - if (!hdev->cs_active_cnt) { - struct hl_device_idle_busy_ts *ts; - - ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; - ts->busy_to_idle_ts = ktime_get(); - - if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) - hdev->idle_busy_ts_idx = 0; - } else if (hdev->cs_active_cnt < 0) { - dev_crit(hdev->dev, "CS active cnt %d is negative\n", - hdev->cs_active_cnt); - } - - hdev->asic_funcs->hw_queues_unlock(hdev); - /* Need to update CI for all queue jobs that does not get completion */ hl_hw_queue_update_ci(cs); diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 53bc5ccb612f..49f0ceac4b81 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -383,17 +383,9 @@ static int device_early_init(struct hl_device *hdev) goto free_sob_reset_wq; } - hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE, - sizeof(struct hl_device_idle_busy_ts), - (GFP_KERNEL | __GFP_ZERO)); - if (!hdev->idle_busy_ts_arr) { - rc = -ENOMEM; - goto free_chip_info; - } - rc = hl_mmu_if_set_funcs(hdev); if (rc) - goto free_idle_busy_ts_arr; + goto free_chip_info; hl_cb_mgr_init(&hdev->kernel_cb_mgr); @@ -422,8 +414,6 @@ static int device_early_init(struct hl_device *hdev) free_cb_mgr: hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); -free_idle_busy_ts_arr: - kfree(hdev->idle_busy_ts_arr); free_chip_info: kfree(hdev->hl_chip_info); free_sob_reset_wq: @@ -461,7 +451,6 @@ static void device_early_fini(struct hl_device *hdev) hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); - kfree(hdev->idle_busy_ts_arr); kfree(hdev->hl_chip_info); destroy_workqueue(hdev->sob_reset_wq); @@ -582,100 +571,24 @@ static void device_late_fini(struct hl_device *hdev) hdev->late_init_done = false; } -uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms) +int hl_device_utilization(struct hl_device *hdev, u32 *utilization) { - struct hl_device_idle_busy_ts *ts; - ktime_t zero_ktime, curr = ktime_get(); - u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx; - s64 period_us, last_start_us, last_end_us, last_busy_time_us, - total_busy_time_us = 0, total_busy_time_ms; - - zero_ktime = ktime_set(0, 0); - period_us = period_ms * USEC_PER_MSEC; - ts = &hdev->idle_busy_ts_arr[last_index]; - - /* check case that device is currently in idle */ - if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) && - !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) { - - last_index--; - /* Handle case idle_busy_ts_idx was 0 */ - if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE) - last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1; - - ts = &hdev->idle_busy_ts_arr[last_index]; - } - - while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) { - /* Check if we are in last sample case. i.e. if the sample -* begun before the sampling period
[PATCH 2/3] habanalabs: support legacy and new pll indexes
From: Ohad Sharabi In order to use minimum of hard coded values common to LKD and F/W a dynamic method to work with PLLs is introduced in this patch. Formerly asic specific PLL numbering is now common for all asics. To be backward compatible a bit in dev status is defined, if the bit is not set LKD will keep working with old PLL numbering. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 49 ++- drivers/misc/habanalabs/common/habanalabs.h | 14 -- drivers/misc/habanalabs/common/sysfs.c| 24 ++--- drivers/misc/habanalabs/gaudi/gaudi.c | 33 + drivers/misc/habanalabs/goya/goya.c | 26 ++ .../misc/habanalabs/include/common/cpucp_if.h | 41 .../habanalabs/include/common/hl_boot_if.h| 6 +++ .../habanalabs/include/gaudi/gaudi_fw_if.h| 14 -- .../misc/habanalabs/include/goya/goya_fw_if.h | 11 - 9 files changed, 182 insertions(+), 36 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 2a58edaf984a..092691a8917d 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -539,18 +539,63 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) return rc; } -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, +int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, + enum pll_index *pll_index) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u8 pll_byte, pll_bit_off; + bool dynamic_pll; + + if (input_pll_index >= PLL_MAX) { + dev_err(hdev->dev, "PLL index %d is out of range\n", + input_pll_index); + return -EINVAL; + } + + dynamic_pll = prop->fw_security_status_valid && + (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN); + + if (!dynamic_pll) { + /* +* in case we are working with legacy FW (each asic has unique +* PLL numbering) extract the legacy numbering +*/ + *pll_index = hdev->legacy_pll_map[input_pll_index]; + return 0; + } + + /* PLL map is a u8 array */ + pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3]; + pll_bit_off = input_pll_index & 0x7; + + if (!(pll_byte & BIT(pll_bit_off))) { + dev_err(hdev->dev, "PLL index %d is not supported\n", + input_pll_index); + return -EINVAL; + } + + *pll_index = input_pll_index; + + return 0; +} + +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, u16 *pll_freq_arr) { struct cpucp_packet pkt; + enum pll_index used_pll_idx; u64 result; int rc; + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx); + if (rc) + return rc; + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_type = __cpu_to_le16(pll_index); + pkt.pll_type = __cpu_to_le16((u16)used_pll_idx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 65f34918faed..dc8126b270d1 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1946,6 +1946,8 @@ struct hl_mmu_funcs { * @aggregated_cs_counters: aggregated cs counters among all contexts * @mmu_priv: device-specific MMU data. * @mmu_func: device-related MMU functions. + * @legacy_pll_map: map holding map between dynamic (common) PLL indexes and + * static (asic specific) PLL indexes. * @dram_used_mem: current DRAM memory consumption. * @timeout_jiffies: device CS timeout value. * @max_power: the max power of the device, as configured by the sysadmin. This @@ -2070,6 +2072,8 @@ struct hl_device { struct hl_mmu_priv mmu_priv; struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS]; + enum pll_index *legacy_pll_map; + atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; @@ -2383,7 +2387,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
[PATCH 1/3] habanalabs: move relevant datapath work outside cs lock
From: Ofir Bitton In order to shorten the time cs lock is being held, we move any possible work outside of the cs lock. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c| 86 +++ drivers/misc/habanalabs/common/device.c | 13 ++- drivers/misc/habanalabs/common/habanalabs.h | 4 + 3 files changed, 68 insertions(+), 35 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 720588aed28b..ba6d3e317255 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -84,6 +84,38 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask) return 0; } +static void sob_reset_work(struct work_struct *work) +{ + struct hl_cs_compl *hl_cs_cmpl = + container_of(work, struct hl_cs_compl, sob_reset_work); + struct hl_device *hdev = hl_cs_cmpl->hdev; + + /* +* A signal CS can get completion while the corresponding wait +* for signal CS is on its way to the PQ. The wait for signal CS +* will get stuck if the signal CS incremented the SOB to its +* max value and there are no pending (submitted) waits on this +* SOB. +* We do the following to void this situation: +* 1. The wait for signal CS must get a ref for the signal CS as +*soon as possible in cs_ioctl_signal_wait() and put it +*before being submitted to the PQ but after it incremented +*the SOB refcnt in init_signal_wait_cs(). +* 2. Signal/Wait for signal CS will decrement the SOB refcnt +*here. +* These two measures guarantee that the wait for signal CS will +* reset the SOB upon completion rather than the signal CS and +* hence the above scenario is avoided. +*/ + kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); + + if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) + hdev->asic_funcs->reset_sob_group(hdev, + hl_cs_cmpl->sob_group); + + kfree(hl_cs_cmpl); +} + static void hl_fence_release(struct kref *kref) { struct hl_fence *fence = @@ -109,28 +141,9 @@ static void hl_fence_release(struct kref *kref) hl_cs_cmpl->hw_sob->sob_id, hl_cs_cmpl->sob_val); - /* -* A signal CS can get completion while the corresponding wait -* for signal CS is on its way to the PQ. The wait for signal CS -* will get stuck if the signal CS incremented the SOB to its -* max value and there are no pending (submitted) waits on this -* SOB. -* We do the following to void this situation: -* 1. The wait for signal CS must get a ref for the signal CS as -*soon as possible in cs_ioctl_signal_wait() and put it -*before being submitted to the PQ but after it incremented -*the SOB refcnt in init_signal_wait_cs(). -* 2. Signal/Wait for signal CS will decrement the SOB refcnt -*here. -* These two measures guarantee that the wait for signal CS will -* reset the SOB upon completion rather than the signal CS and -* hence the above scenario is avoided. -*/ - kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); + queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work); - if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) - hdev->asic_funcs->reset_sob_group(hdev, - hl_cs_cmpl->sob_group); + return; } free: @@ -670,9 +683,23 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, goto free_cs; } + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); + if (!cs->jobs_in_queue_cnt) + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); + + if (!cs->jobs_in_queue_cnt) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); + rc = -ENOMEM; + goto free_cs_cmpl; + } + cs_cmpl->hdev = hdev; cs_cmpl->type = cs->type; spin_lock_init(&cs_cmpl->lock); + INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work); cs->fence = &cs_cmpl->base_fence;
[PATCH 4/4] habanalabs: support dynamic PLL numbering
From: Ohad Sharabi As part of the effort remove hard-coded assumptions from the F/W-driver communication, introduce support for dynamic pll numbering. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 22 +- drivers/misc/habanalabs/common/habanalabs.h | 2 +- .../misc/habanalabs/include/common/cpucp_if.h | 41 +++ .../habanalabs/include/gaudi/gaudi_fw_if.h| 14 --- .../misc/habanalabs/include/goya/goya_fw_if.h | 11 - 5 files changed, 62 insertions(+), 28 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 2a58edaf984a..8843e040c660 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -539,18 +539,36 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) return rc; } -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, u16 *pll_freq_arr) { + struct asic_fixed_properties *prop = &hdev->asic_prop; + u8 pll_byte, pll_bit_off; struct cpucp_packet pkt; u64 result; int rc; + if (pll_index >= PLL_MAX) { + dev_err(hdev->dev, "PLL index %d is out of range\n", + pll_index); + return -EINVAL; + } + + /* PLL map is a u8 array */ + pll_byte = prop->cpucp_info.pll_map[pll_index >> 3]; + pll_bit_off = pll_index & 0x7; + + if (!(pll_byte & BIT(pll_bit_off))) { + dev_err(hdev->dev, "PLL index %d is not supported\n", + pll_index); + return -EINVAL; + } + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); - pkt.pll_type = __cpu_to_le16(pll_index); + pkt.pll_type = __cpu_to_le16((u16)pll_index); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), HL_CPUCP_INFO_TIMEOUT_USEC, &result); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 2dcefd6485e5..5f930cb5e33d 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2379,7 +2379,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy); -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, u16 *pll_freq_arr); int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power); int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index 6ba480a316ce..e745c78dd8fd 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -28,6 +28,9 @@ #define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT6 #define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x07C0 +#define PLL_MAP_MAX_BITS 128 +#define PLL_MAP_LEN(PLL_MAP_MAX_BITS / 8) + /* * info of the pkt queue pointers in the first async occurrence */ @@ -473,6 +476,42 @@ enum cpucp_pll_type_attributes { cpucp_pll_pci, }; +/* + * PLL enumeration table used for all ASICs and future SW versions. + * For future ASIC-LKD compatibility, we can only add new enumerations. + * at the end of the table. + * Changing the order of entries or removing entries is not allowed. + */ +enum pll_index { + CPU_PLL = 0, + PCI_PLL = 1, + NIC_PLL = 2, + DMA_PLL = 3, + MESH_PLL = 4, + MME_PLL = 5, + TPC_PLL = 6, + IF_PLL = 7, + SRAM_PLL = 8, + NS_DCORE_PLL = 9, + MESH_DCORE_PLL = 10, + HBM_PLL = 11, + TPC_DCORE_PLL = 12, + VIDEO_DCORE_PLL = 13, + SRAM_DCORE_PLL = 14, + NIC_PHY_DCORE_PLL = 15, + MSS_DCORE_PLL = 16, + DMA_DCORE_PLL = 17, + SIF_PLL = 18, + DDR_PLL = 19, + VID_PLL = 20, + BANK_PLL = 21, + MMU_PLL = 22, + IC_PLL = 23, + MC_PLL = 24, + EMMC_PLL = 25, + PLL_MAX +}; + /* Event Queue Packets */ struct eq_generic_event { @@ -547,6 +586,7 @@ struct cpucp_security_info { * @dram_size: available DRAM size. * @card_name: card name that will be displayed in HWMON subsystem on th
[PATCH 3/4] habanalabs: avoid soft lockup bug upon mapping error
From: farah kassabri Add a little sleep between page unmappings in case mapping of large number of host pages failed, in order to avoid soft lockup bug during the rollback. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/memory.c | 20 +--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 1b69573369cf..6530fddbbc21 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -857,6 +857,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; u32 page_size = phys_pg_pack->page_size; int rc = 0; + bool is_host_addr; for (i = 0 ; i < phys_pg_pack->npages ; i++) { paddr = phys_pg_pack->pages[i]; @@ -878,6 +879,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, return 0; err: + is_host_addr = !hl_is_dram_va(hdev, vaddr); + next_vaddr = vaddr; for (i = 0 ; i < mapped_pg_cnt ; i++) { if (hl_mmu_unmap_page(ctx, next_vaddr, page_size, @@ -888,6 +891,17 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, phys_pg_pack->pages[i], page_size); next_vaddr += page_size; + + /* +* unmapping on Palladium can be really long, so avoid a CPU +* soft lockup bug by sleeping a little between unmapping pages +* +* In addition, on host num of pages could be huge, +* because page size could be 4KB, so when unmapping host +* pages sleep every 32K pages to avoid soft lockup +*/ + if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) + usleep_range(50, 200); } return rc; @@ -921,9 +935,9 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, * unmapping on Palladium can be really long, so avoid a CPU * soft lockup bug by sleeping a little between unmapping pages * -* In addition, when unmapping host memory we pass through -* the Linux kernel to unpin the pages and that takes a long -* time. Therefore, sleep every 32K pages to avoid soft lockup +* In addition, on host num of pages could be huge, +* because page size could be 4KB, so when unmapping host +* pages sleep every 32K pages to avoid soft lockup */ if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) usleep_range(50, 200); -- 2.25.1
[PATCH 1/4] habanalabs/gaudi: unsecure TPC cfg status registers
From: Ofir Bitton Unsecure relevant registers as TPC engine need access to TPC status. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi_security.c | 8 1 file changed, 8 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c b/drivers/misc/habanalabs/gaudi/gaudi_security.c index 7085f45814ae..9a706c5980ef 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_security.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c @@ -9556,7 +9556,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC0_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC0_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2); @@ -10011,7 +10010,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC1_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC1_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC1_CFG_TPC_STALL & 0x7F) >> 2); @@ -10465,7 +10463,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC2_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC2_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC2_CFG_TPC_STALL & 0x7F) >> 2); @@ -10919,7 +10916,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC3_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC3_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC3_CFG_TPC_STALL & 0x7F) >> 2); @@ -11373,7 +11369,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC4_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC4_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC4_CFG_TPC_STALL & 0x7F) >> 2); @@ -11827,7 +11822,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC5_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC5_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC5_CFG_TPC_STALL & 0x7F) >> 2); @@ -12283,7 +12277,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC6_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC6_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC6_CFG_TPC_STALL & 0x7F) >> 2); @@ -12739,7 +12732,6 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev) mask = 1U << ((mmTPC7_CFG_PROT & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_VFLAGS & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_SFLAGS & 0x7F) >> 2); - mask |= 1U << ((mmTPC7_CFG_STATUS & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2); mask |= 1U << ((mmTPC7_CFG_TPC_STALL & 0x7F) >> 2); -- 2.25.1
[PATCH 2/4] habanalabs/gaudi: Update async events header
From: Ofir Bitton Update with latest version from the Firmware team. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 2 +- .../include/gaudi/gaudi_async_events.h| 2 +- .../gaudi/gaudi_async_ids_map_extended.h | 35 --- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 0bcee675e1db..a65ae0dbdb92 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7574,7 +7574,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, event_type, cause); break; - case GAUDI_EVENT_BMC_RESET_CMD: + case GAUDI_EVENT_DEV_RESET_REQ: gaudi_print_irq_info(hdev, event_type, false); goto reset_device; diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h index 25f835ba2cd6..e8651abf84f2 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h @@ -303,7 +303,7 @@ enum gaudi_async_event_id { GAUDI_EVENT_NIC3_QP1 = 619, GAUDI_EVENT_NIC4_QP0 = 620, GAUDI_EVENT_NIC4_QP1 = 621, - GAUDI_EVENT_BMC_RESET_CMD = 646, + GAUDI_EVENT_DEV_RESET_REQ = 646, GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647, GAUDI_EVENT_FIX_POWER_ENV_S = 658, GAUDI_EVENT_FIX_POWER_ENV_E = 659, diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h index 079be1fb8f70..3dc79c131805 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h @@ -670,18 +670,29 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = { { .fc_id = 643, .cpu_id = 492, .valid = 0, .name = "" }, { .fc_id = 644, .cpu_id = 493, .valid = 0, .name = "" }, { .fc_id = 645, .cpu_id = 494, .valid = 0, .name = "" }, - { .fc_id = 646, .cpu_id = 495, .valid = 1, .name = "BMC_RST_CMD" }, - { .fc_id = 647, .cpu_id = 496, .valid = 0, .name = "" }, - { .fc_id = 648, .cpu_id = 497, .valid = 0, .name = "" }, - { .fc_id = 649, .cpu_id = 498, .valid = 0, .name = "" }, - { .fc_id = 650, .cpu_id = 499, .valid = 0, .name = "" }, - { .fc_id = 651, .cpu_id = 500, .valid = 0, .name = "" }, - { .fc_id = 652, .cpu_id = 501, .valid = 0, .name = "" }, - { .fc_id = 653, .cpu_id = 502, .valid = 0, .name = "" }, - { .fc_id = 654, .cpu_id = 503, .valid = 0, .name = "" }, - { .fc_id = 655, .cpu_id = 504, .valid = 0, .name = "" }, - { .fc_id = 656, .cpu_id = 505, .valid = 0, .name = "" }, - { .fc_id = 657, .cpu_id = 506, .valid = 1, .name = "PKT_QUEUE_ASYNC" }, + { .fc_id = 646, .cpu_id = 495, .valid = 1, .name = "DEV_RESET_REQ" }, + { .fc_id = 647, .cpu_id = 496, .valid = 1, + .name = "PKT_QUEUE_OUT_SYNC" }, + { .fc_id = 648, .cpu_id = 497, .valid = 1, + .name = "STATUS_NIC0_ENG0" }, + { .fc_id = 649, .cpu_id = 498, .valid = 1, + .name = "STATUS_NIC0_ENG1" }, + { .fc_id = 650, .cpu_id = 499, .valid = 1, + .name = "STATUS_NIC1_ENG0" }, + { .fc_id = 651, .cpu_id = 500, .valid = 1, + .name = "STATUS_NIC1_ENG1" }, + { .fc_id = 652, .cpu_id = 501, .valid = 1, + .name = "STATUS_NIC2_ENG0" }, + { .fc_id = 653, .cpu_id = 502, .valid = 1, + .name = "STATUS_NIC2_ENG1" }, + { .fc_id = 654, .cpu_id = 503, .valid = 1, + .name = "STATUS_NIC3_ENG0" }, + { .fc_id = 655, .cpu_id = 504, .valid = 1, + .name = "STATUS_NIC3_ENG1" }, + { .fc_id = 656, .cpu_id = 505, .valid = 1, + .name = "STATUS_NIC4_ENG0" }, + { .fc_id = 657, .cpu_id = 506, .valid = 1, + .name = "STATUS_NIC4_ENG1" }, { .fc_id = 658, .cpu_id = 507, .valid = 1, .name = "FIX_POWER_ENV_S" }, { .fc_id = 659, .cpu_id = 508, .valid = 1, .name = "FIX_POWER_ENV_E" }, { .fc_id = 660, .cpu_id = 509, .valid = 1, -- 2.25.1
[PATCH 1/2] habanalabs/gaudi: reset device upon BMC request
From: Ofir Bitton In case the BMC of the devices' box wants to initiate a reset of a specific device, it must go through driver. Once driver will receive the request it will initiate a hard reset flow. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 4 drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h| 1 + .../habanalabs/include/gaudi/gaudi_async_ids_map_extended.h | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 8be75f7c97f8..099c51350be6 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7575,6 +7575,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev, event_type, cause); break; + case GAUDI_EVENT_BMC_RESET_CMD: + gaudi_print_irq_info(hdev, event_type, false); + goto reset_device; + case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: gaudi_print_irq_info(hdev, event_type, false); gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h index 0a0fa57024f8..25f835ba2cd6 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h @@ -303,6 +303,7 @@ enum gaudi_async_event_id { GAUDI_EVENT_NIC3_QP1 = 619, GAUDI_EVENT_NIC4_QP0 = 620, GAUDI_EVENT_NIC4_QP1 = 621, + GAUDI_EVENT_BMC_RESET_CMD = 646, GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647, GAUDI_EVENT_FIX_POWER_ENV_S = 658, GAUDI_EVENT_FIX_POWER_ENV_E = 659, diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h index 9106395eb920..079be1fb8f70 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h @@ -670,7 +670,7 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = { { .fc_id = 643, .cpu_id = 492, .valid = 0, .name = "" }, { .fc_id = 644, .cpu_id = 493, .valid = 0, .name = "" }, { .fc_id = 645, .cpu_id = 494, .valid = 0, .name = "" }, - { .fc_id = 646, .cpu_id = 495, .valid = 0, .name = "" }, + { .fc_id = 646, .cpu_id = 495, .valid = 1, .name = "BMC_RST_CMD" }, { .fc_id = 647, .cpu_id = 496, .valid = 0, .name = "" }, { .fc_id = 648, .cpu_id = 497, .valid = 0, .name = "" }, { .fc_id = 649, .cpu_id = 498, .valid = 0, .name = "" }, -- 2.25.1
[PATCH 2/2] habanalabs/gaudi: always use single-msi mode
The device can get into deadlock in case it use indirect mode for MSI interrupts (multi-msi) and have hard-reset during interrupt storm. To prevent that, always use direct mode which means single-msi mode. The F/W will prevent the host from writing to the indirect MSI registers to prevent any malicious user from causing this scenario. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 099c51350be6..0bcee675e1db 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -1766,8 +1766,7 @@ static int gaudi_enable_msi(struct hl_device *hdev) if (gaudi->hw_cap_initialized & HW_CAP_MSI) return 0; - rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES, - PCI_IRQ_MSI); + rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); if (rc < 0) { dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); return rc; -- 2.25.1
[PATCH] habanalabs: debugfs access to user mapped host addresses
From: Ofir Bitton In order to have a better debuggability we allow debugfs access to user mmu mapped host memory. Non-user host memory access will be rejected. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c| 88 - drivers/misc/habanalabs/common/habanalabs.h | 12 ++- drivers/misc/habanalabs/common/mmu/mmu.c| 2 + drivers/misc/habanalabs/gaudi/gaudi.c | 40 -- drivers/misc/habanalabs/goya/goya.c | 44 +-- 5 files changed, 145 insertions(+), 41 deletions(-) diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index fd3135c422b6..fff79b31af32 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -457,21 +457,58 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr) return false; } -static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, - u64 *phys_addr) +static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size, + u64 *phys_addr) { + struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_ctx *ctx = hdev->compute_ctx; - int rc = 0; + struct hl_vm_hash_node *hnode; + struct hl_userptr *userptr; + enum vm_type_t *vm_type; + bool valid = false; + u64 end_address; + u32 range_size; + int i, rc = 0; if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); return -EINVAL; } + /* Verify address is mapped */ + mutex_lock(&ctx->mem_hash_lock); + hash_for_each(ctx->mem_hash, i, hnode, node) { + vm_type = hnode->ptr; + + if (*vm_type == VM_TYPE_USERPTR) { + userptr = hnode->ptr; + range_size = userptr->size; + } else { + phys_pg_pack = hnode->ptr; + range_size = phys_pg_pack->total_size; + } + + end_address = virt_addr + size; + if ((virt_addr >= hnode->vaddr) && + (end_address <= hnode->vaddr + range_size)) { + valid = true; + break; + } + } + mutex_unlock(&ctx->mem_hash_lock); + + if (!valid) { + dev_err(hdev->dev, + "virt addr 0x%llx is not mapped\n", + virt_addr); + return -EINVAL; + } + rc = hl_mmu_va_to_pa(ctx, virt_addr, phys_addr); if (rc) { - dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", - virt_addr); + dev_err(hdev->dev, + "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); rc = -EINVAL; } @@ -483,10 +520,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; + u64 va, addr = entry->addr; char tmp_buf[32]; - u64 addr = entry->addr; - u32 val; + bool user_address; ssize_t rc; + u32 val; if (atomic_read(&hdev->in_reset)) { dev_warn_ratelimited(hdev->dev, "Can't read during reset\n"); @@ -496,13 +534,15 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, if (*ppos) return 0; - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(hdev, addr, &addr); + user_address = hl_is_device_va(hdev, addr); + if (user_address) { + va = addr; + rc = device_va_to_pa(hdev, addr, sizeof(val), &addr); if (rc) return rc; } - rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val); + rc = hdev->asic_funcs->debugfs_read32(hdev, addr, user_address, &val); if (rc) { dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); return rc; @@ -519,6 +559,7 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; u64 addr = entry->addr; + bool user_address; u32 value; ssize_t rc; @@ -531,13 +572,14 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, if (rc) return rc; - if (hl_is_device_va(hdev, addr)) { - rc = device_va_to_pa(
[git pull resend] habanalabs fixes for 5.12-rc2
Hi Greg, This pull request contains some fixes of the habanalabs driver for 5.12-rc2. Nothing too scary, more details are in the tag. Thanks, Oded The following changes since commit fe07bfda2fb9cdef8a4d4008a409bb02f35f1bd8: Linux 5.12-rc1 (2021-02-28 16:05:19 -0800) are available in the Git repository at: https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git tags/misc-habanalabs-fixes-2021-03-03 for you to fetch changes up to 3612e9f5df4c0c605ab2e4b569214786cc701cb9: habanalabs: fix debugfs address translation (2021-03-03 09:56:58 +0200) This tag contains the following fixes for 5.12-rc2: - Call put_pid() when the user releases the control device. - Disable file operations after a PCI device is removed. - Fix address translation when displaying the memory mappings through our debugFS. - Remove unused dentry pointer for debugFS files. Greg Kroah-Hartman (1): drivers: habanalabs: remove unused dentry pointer for debugfs files Oded Gabbay (1): habanalabs: mark hl_eq_inc_ptr() as static Tomer Tayar (2): habanalabs: Call put_pid() when releasing control device habanalabs: Disable file operations after device is removed farah kassabri (1): habanalabs: fix debugfs address translation drivers/misc/habanalabs/common/debugfs.c | 5 +-- drivers/misc/habanalabs/common/device.c | 40 --- drivers/misc/habanalabs/common/habanalabs.h | 2 -- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 12 +++ drivers/misc/habanalabs/common/irq.c | 2 +- drivers/misc/habanalabs/common/mmu/mmu.c | 38 ++--- 6 files changed, 75 insertions(+), 24 deletions(-)
[git pull resend] habanalabs fixes for 5.12-rc2
Hi Greg, This pull request contains some fixes of the habanalabs driver for 5.12-rc2. Nothing too scary, more details are in the tag. Thanks, Oded The following changes since commit fe07bfda2fb9cdef8a4d4008a409bb02f35f1bd8: Linux 5.12-rc1 (2021-02-28 16:05:19 -0800) are available in the Git repository at: https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git tags/misc-habanalabs-fixes-2021-03-03 for you to fetch changes up to 3612e9f5df4c0c605ab2e4b569214786cc701cb9: habanalabs: fix debugfs address translation (2021-03-03 09:56:58 +0200) This tag contains the following fixes for 5.12-rc2: - Call put_pid() when the user releases the control device. - Disable file operations after a PCI device is removed. - Fix address translation when displaying the memory mappings through our debugFS. - Remove unused dentry pointer for debugFS files. Greg Kroah-Hartman (1): drivers: habanalabs: remove unused dentry pointer for debugfs files Oded Gabbay (1): habanalabs: mark hl_eq_inc_ptr() as static Tomer Tayar (2): habanalabs: Call put_pid() when releasing control device habanalabs: Disable file operations after device is removed farah kassabri (1): habanalabs: fix debugfs address translation drivers/misc/habanalabs/common/debugfs.c | 5 +-- drivers/misc/habanalabs/common/device.c | 40 --- drivers/misc/habanalabs/common/habanalabs.h | 2 -- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 12 +++ drivers/misc/habanalabs/common/irq.c | 2 +- drivers/misc/habanalabs/common/mmu/mmu.c | 38 ++--- 6 files changed, 75 insertions(+), 24 deletions(-)
[git pull] habanalabs fixes for 5.12-rc2
Hi Greg, This pull request contains some fixes of the habanalabs driver for 5.12-rc2. Nothing too scary, more details are in the tag. Thanks, Oded The following changes since commit fe07bfda2fb9cdef8a4d4008a409bb02f35f1bd8: Linux 5.12-rc1 (2021-02-28 16:05:19 -0800) are available in the Git repository at: https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git tags/misc-habanalabs-fixes-2021-03-02 for you to fetch changes up to bbcbfc85556ed9b2a15955eb6f9b5b732e6b1795: habanalabs: fix debugfs address translation (2021-03-02 15:07:09 +0200) This tag contains the following fixes for 5.12-rc2: - Call put_pid() when the user releases the control device. - Disable file operations after a PCI device is removed. - Fix address translation when displaying the memory mappings through our debugFS. - Remove unused dentry pointer for debugFS files. - Mark function as static. Greg Kroah-Hartman (1): drivers: habanalabs: remove unused dentry pointer for debugfs files Oded Gabbay (1): habanalabs: mark hl_eq_inc_ptr() as static Tomer Tayar (2): habanalabs: Call put_pid() when releasing control device habanalabs: Disable file operations after device is removed farah kassabri (1): habanalabs: fix debugfs address translation drivers/misc/habanalabs/common/debugfs.c | 5 +-- drivers/misc/habanalabs/common/device.c | 40 --- drivers/misc/habanalabs/common/habanalabs.h | 2 -- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 12 +++ drivers/misc/habanalabs/common/irq.c | 2 +- drivers/misc/habanalabs/common/mmu/mmu.c | 38 ++--- 6 files changed, 75 insertions(+), 24 deletions(-)
[PATCH] habanalabs: fix debugfs address translation
From: farah kassabri when user uses virtual addresses to access dram through debugfs, driver translate this address to physical and use it for the access through the pcie bar. in case dram page size is different than the dmmu page size, we need to have special treatment for adding the page offset to the actual address, which is to use the dram page size mask to fetch the page offset from the virtual address, instead of the dmmu last hop shift. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/mmu/mmu.c | 38 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 71703a32350f..5251b336215c 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -499,18 +499,32 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, else /* HL_VA_RANGE_TYPE_DRAM */ p = &prop->dmmu; - /* -* find the correct hop shift field in hl_mmu_properties structure -* in order to determine the right maks for the page offset. -*/ - hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift); - p = (char *)p + hop0_shift_off; - p = (char *)p + ((hops->used_hops - 1) * sizeof(u64)); - hop_shift = *(u64 *)p; - offset_mask = (1ull << hop_shift) - 1; - addr_mask = ~(offset_mask); - *phys_addr = (tmp_phys_addr & addr_mask) | - (virt_addr & offset_mask); + if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) && + !is_power_of_2(prop->dram_page_size)) { + u32 bit; + u64 page_offset_mask; + u64 phys_addr_mask; + + bit = __ffs64((u64)prop->dram_page_size); + page_offset_mask = ((1 << bit) - 1); + phys_addr_mask = ~page_offset_mask; + *phys_addr = (tmp_phys_addr & phys_addr_mask) | + (virt_addr & page_offset_mask); + } else { + /* +* find the correct hop shift field in hl_mmu_properties +* structure in order to determine the right masks +* for the page offset. +*/ + hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift); + p = (char *)p + hop0_shift_off; + p = (char *)p + ((hops->used_hops - 1) * sizeof(u64)); + hop_shift = *(u64 *)p; + offset_mask = (1ull << hop_shift) - 1; + addr_mask = ~(offset_mask); + *phys_addr = (tmp_phys_addr & addr_mask) | + (virt_addr & offset_mask); + } } int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr) -- 2.25.1
Re: [PATCH] banalabs: Switch to using the new API kobj_to_dev()
On Tue, Feb 23, 2021 at 11:12 AM Yang Li wrote: > > fixed the following coccicheck: > ./drivers/misc/habanalabs/common/sysfs.c:347:60-61: WARNING opportunity > for kobj_to_dev() > > Reported-by: Abaci Robot > Signed-off-by: Yang Li > --- > drivers/misc/habanalabs/common/sysfs.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/misc/habanalabs/common/sysfs.c > b/drivers/misc/habanalabs/common/sysfs.c > index 4366d8f..79c1ddf 100644 > --- a/drivers/misc/habanalabs/common/sysfs.c > +++ b/drivers/misc/habanalabs/common/sysfs.c > @@ -344,7 +344,7 @@ static ssize_t eeprom_read_handler(struct file *filp, > struct kobject *kobj, > struct bin_attribute *attr, char *buf, loff_t offset, > size_t max_size) > { > - struct device *dev = container_of(kobj, struct device, kobj); > + struct device *dev = kobj_to_dev(kobj); > struct hl_device *hdev = dev_get_drvdata(dev); > char *data; > int rc; > -- > 1.8.3.1 > Reviewed-by: Oded Gabbay Applied to -next. Thanks, Oded
[PATCH 2/2] habanalabs: update hl_boot_if.h
From: Ohad Sharabi Update to the latest version of the file as supplied by the F/W. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/include/common/hl_boot_if.h | 11 +++ 1 file changed, 11 insertions(+) diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index e87f5a98e193..d17185b6aea9 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -170,6 +170,15 @@ * is set to the PI counter. * Initialized in: linux * + * CPU_BOOT_DEV_STS0_FW_LD_COM_EN Flexible FW loading communication + * protocol is enabled. + * Initialized in: preboot + * + * CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN FW iATU configuration is enabled. + * This bit if set, means the iATU has been + * configured and is ready for use. + * Initialized in: ppboot + * * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * This is a main indication that the * running FW populates the device status @@ -195,6 +204,8 @@ #define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13) #define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << 14) #define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN(1 << 15) +#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16) +#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN (1 << 17) #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) enum cpu_boot_status { -- 2.25.1
[PATCH 1/2] habanalabs: skip DISABLE PCI packet to FW on heartbeat
From: Ohad Sharabi if reset is due to heartbeat, device CPU is no responsive in which case no point sending PCI disable message to it. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c| 4 +- drivers/misc/habanalabs/common/debugfs.c | 2 +- drivers/misc/habanalabs/common/device.c | 25 +++- drivers/misc/habanalabs/common/habanalabs.h | 20 +- drivers/misc/habanalabs/common/sysfs.c| 4 +- drivers/misc/habanalabs/gaudi/gaudi.c | 40 --- drivers/misc/habanalabs/goya/goya.c | 8 ++-- 7 files changed, 59 insertions(+), 44 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 7d0d3e61a6e7..34a98e3c3bf8 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -620,7 +620,7 @@ static void cs_timedout(struct work_struct *work) cs_put(cs); if (hdev->reset_on_lockup) - hl_device_reset(hdev, false, false); + hl_device_reset(hdev, 0); else hdev->needs_reset = true; } @@ -1473,7 +1473,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, out: if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) - hl_device_reset(hdev, false, false); + hl_device_reset(hdev, 0); return rc; } diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index d4a49d6d1753..94d574953dd0 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -887,7 +887,7 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf, hdev->stop_on_err = value ? 1 : 0; - hl_device_reset(hdev, false, false); + hl_device_reset(hdev, 0); return count; } diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 2cbe9d04c229..756d7f2a7a2d 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -72,7 +72,7 @@ static void hpriv_release(struct kref *ref) kfree(hpriv); if (hdev->reset_upon_device_release) - hl_device_reset(hdev, false, false); + hl_device_reset(hdev, 0); } void hl_hpriv_get(struct hl_fpriv *hpriv) @@ -276,7 +276,7 @@ static void device_hard_reset_pending(struct work_struct *work) struct hl_device *hdev = device_reset_work->hdev; int rc; - rc = hl_device_reset(hdev, true, true); + rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD); if ((rc == -EBUSY) && !hdev->device_fini_pending) { dev_info(hdev->dev, "Could not reset device. will try again in %u seconds", @@ -478,7 +478,7 @@ static void hl_device_heartbeat(struct work_struct *work) goto reschedule; dev_err(hdev->dev, "Device heartbeat failed!\n"); - hl_device_reset(hdev, true, false); + hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT); return; @@ -802,7 +802,7 @@ int hl_device_resume(struct hl_device *hdev) hdev->disabled = false; atomic_set(&hdev->in_reset, 0); - rc = hl_device_reset(hdev, true, false); + rc = hl_device_reset(hdev, HL_RESET_HARD); if (rc) { dev_err(hdev->dev, "Failed to reset device during resume\n"); goto disable_device; @@ -898,9 +898,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) * hl_device_reset - reset the device * * @hdev: pointer to habanalabs device structure - * @hard_reset: should we do hard reset to all engines or just reset the - * compute/dma engines - * @from_hard_reset_thread: is the caller the hard-reset thread + * @flags: reset flags. * * Block future CS and wait for pending CS to be enqueued * Call ASIC H/W fini @@ -912,10 +910,10 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) * * Returns 0 for success or an error on failure. */ -int hl_device_reset(struct hl_device *hdev, bool hard_reset, - bool from_hard_reset_thread) +int hl_device_reset(struct hl_device *hdev, u32 flags) { u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; + bool hard_reset, from_hard_reset_thread; int i, rc; if (!hdev->init_done) { @@ -924,6 +922,9 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, return 0; } + hard_reset = (flags & HL_RESET_HARD) != 0; + from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0; + if ((
[PATCH 4/4] habanalabs: replace GFP_ATOMIC with GFP_KERNEL
From: Ofir Bitton As there are incorrect assumptions in which some of the initialization and data path flows cannot sleep, most allocations are being done using GFP_ATOMIC. We modify the code to use GFP_ATOMIC only when realy needed, as sleepable flow should use GFP_KERNEL. In addition add a fallback to allocate memory using GFP_KERNEL, once ATOMIC allocation fails. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/command_buffer.c | 12 +++--- .../habanalabs/common/command_submission.c| 22 ++- drivers/misc/habanalabs/common/device.c | 2 +- drivers/misc/habanalabs/common/memory.c | 10 +++-- drivers/misc/habanalabs/gaudi/gaudi.c | 2 +- drivers/misc/habanalabs/goya/goya.c | 2 +- 6 files changed, 36 insertions(+), 14 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index d9adb9a5e4d8..719168c980a4 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -181,7 +181,7 @@ static void cb_release(struct kref *ref) static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, int ctx_id, bool internal_cb) { - struct hl_cb *cb; + struct hl_cb *cb = NULL; u32 cb_offset; void *p; @@ -193,9 +193,10 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, * the kernel's copy. Hence, we must never sleep in this code section * and must use GFP_ATOMIC for all memory allocations. */ - if (ctx_id == HL_KERNEL_ASID_ID) + if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled) cb = kzalloc(sizeof(*cb), GFP_ATOMIC); - else + + if (!cb) cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (!cb) @@ -214,6 +215,9 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, } else if (ctx_id == HL_KERNEL_ASID_ID) { p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, GFP_ATOMIC); + if (!p) + p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, + cb_size, &cb->bus_address, GFP_KERNEL); } else { p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, &cb->bus_address, @@ -310,6 +314,8 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, spin_lock(&mgr->cb_lock); rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); + if (rc < 0) + rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL); spin_unlock(&mgr->cb_lock); if (rc < 0) { diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 98fdb98ff4e4..7d0d3e61a6e7 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -638,6 +638,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cntr = &hdev->aggregated_cs_counters; cs = kzalloc(sizeof(*cs), GFP_ATOMIC); + if (!cs) + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); @@ -657,6 +660,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, spin_lock_init(&cs->job_lock); cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); + if (!cs_cmpl) + cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL); + if (!cs_cmpl) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); @@ -698,6 +704,10 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); + if (!cs->jobs_in_queue_cnt) + cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, + sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); + if (!cs->jobs_in_queue_cnt) { atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); @@ -927,6 +937,9 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, struct hl_cs_job *job; job = kzalloc(sizeof(*job), GFP_ATOMIC); + if (!job) + j
[PATCH 3/4] habanalabs/gaudi: update extended async event header
From: Ofir Bitton Update to the latest definition of the firmware Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../include/gaudi/gaudi_async_ids_map_extended.h | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h index 737176ba06fb..9106395eb920 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h @@ -301,10 +301,10 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = { { .fc_id = 274, .cpu_id = 128, .valid = 0, .name = "" }, { .fc_id = 275, .cpu_id = 128, .valid = 0, .name = "" }, { .fc_id = 276, .cpu_id = 128, .valid = 0, .name = "" }, - { .fc_id = 277, .cpu_id = 129, .valid = 0, .name = "" }, - { .fc_id = 278, .cpu_id = 129, .valid = 0, .name = "" }, - { .fc_id = 279, .cpu_id = 129, .valid = 0, .name = "" }, - { .fc_id = 280, .cpu_id = 129, .valid = 0, .name = "" }, + { .fc_id = 277, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_0" }, + { .fc_id = 278, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_1" }, + { .fc_id = 279, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_2" }, + { .fc_id = 280, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_3" }, { .fc_id = 281, .cpu_id = 130, .valid = 0, .name = "" }, { .fc_id = 282, .cpu_id = 131, .valid = 0, .name = "" }, { .fc_id = 283, .cpu_id = 132, .valid = 0, .name = "" }, @@ -681,7 +681,7 @@ static struct gaudi_async_events_ids_map gaudi_irq_map_table[] = { { .fc_id = 654, .cpu_id = 503, .valid = 0, .name = "" }, { .fc_id = 655, .cpu_id = 504, .valid = 0, .name = "" }, { .fc_id = 656, .cpu_id = 505, .valid = 0, .name = "" }, - { .fc_id = 657, .cpu_id = 506, .valid = 0, .name = "" }, + { .fc_id = 657, .cpu_id = 506, .valid = 1, .name = "PKT_QUEUE_ASYNC" }, { .fc_id = 658, .cpu_id = 507, .valid = 1, .name = "FIX_POWER_ENV_S" }, { .fc_id = 659, .cpu_id = 508, .valid = 1, .name = "FIX_POWER_ENV_E" }, { .fc_id = 660, .cpu_id = 509, .valid = 1, -- 2.25.1
[PATCH 2/4] habanalabs: return current power via INFO IOCTL
From: Sagiv Ozeri Add driver implementation for reading the current power from the device CPU F/W. Signed-off-by: Sagiv Ozeri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 23 +++ drivers/misc/habanalabs/common/habanalabs.h | 1 + .../misc/habanalabs/common/habanalabs_ioctl.c | 22 ++ .../misc/habanalabs/include/common/cpucp_if.h | 5 include/uapi/misc/habanalabs.h| 9 5 files changed, 60 insertions(+) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 6f3692bf5eff..2a58edaf984a 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -565,6 +565,29 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, return rc; } +int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power) +{ + struct cpucp_packet pkt; + u64 result; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + HL_CPUCP_INFO_TIMEOUT_USEC, &result); + if (rc) { + dev_err(hdev->dev, "Failed to read power, error %d\n", rc); + return rc; + } + + *power = result; + + return rc; +} + static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) { /* Some of the status codes below are deprecated in newer f/w diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 9129582e9339..33ada9425a62 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2363,6 +2363,7 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy); int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index, u16 *pll_freq_arr); +int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power); int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, u32 cpu_security_boot_status_reg, u32 boot_err0_reg, diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 95a0b144604d..3f762e021428 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -444,6 +444,25 @@ static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args) min((size_t) max_size, sizeof(freq_info))) ? -EFAULT : 0; } +static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + struct hl_device *hdev = hpriv->hdev; + u32 max_size = args->return_size; + struct hl_power_info power_info = {0}; + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + rc = hl_fw_cpucp_power_get(hdev, &power_info.power); + if (rc) + return rc; + + return copy_to_user(out, &power_info, + min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -524,6 +543,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_PLL_FREQUENCY: return pll_frequency_info(hpriv, args); + case HL_INFO_POWER: + return power_info(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -ENOTTY; diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index bf4e7900d8c8..6ba480a316ce 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -296,6 +296,9 @@ enum pq_init_status { * The result is composed of 4 outputs, each is 16-bit * frequency in MHz. * + * CPUCP_PACKET_POWER_GET + * Fetch the present power consumption of the device (Current * Voltage). + * */ enum cpucp_packet_id { @@ -329,6 +332,8 @@ enum cpucp_packet_id { CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */ CPUCP_PACKET_PLL_INFO_GET, /* internal */ + CPUCP_PACKET_NIC_STATUS,/* internal */ + CPUCP_PACKET_POWER_GET, /* internal */ }; #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 diff --git a/in
[PATCH 1/4] habanalabs: support HW blocks vm show
From: Sagiv Ozeri Improve "vm" debugfs node to print also the virtual addresses which are currently mapped to HW blocks in the device. Signed-off-by: Sagiv Ozeri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../ABI/testing/debugfs-driver-habanalabs | 2 +- drivers/misc/habanalabs/common/context.c | 14 +++- drivers/misc/habanalabs/common/debugfs.c | 16 + drivers/misc/habanalabs/common/habanalabs.h | 24 +++ drivers/misc/habanalabs/common/memory.c | 65 +-- 5 files changed, 113 insertions(+), 8 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index d447a611c41b..f9e233cbdc37 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -174,7 +174,7 @@ Date: Jan 2019 KernelVersion: 5.1 Contact:ogab...@kernel.org Description:Displays a list with information about all the active virtual -address mappings per ASID +address mappings per ASID and all user mappings of HW blocks What: /sys/kernel/debug/habanalabs/hl/stop_on_err Date: Mar 2020 diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index cda871afb8f4..62d705889ca8 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -20,6 +20,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx) */ hl_pending_cb_list_flush(ctx); + /* Release all allocated HW block mapped list entries and destroy +* the mutex. +*/ + hl_hw_block_mem_fini(ctx); + /* * If we arrived here, there are no jobs waiting for this context * on its queues so we can safely remove it. @@ -160,13 +165,15 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) if (!ctx->cs_pending) return -ENOMEM; + hl_hw_block_mem_init(ctx); + if (is_kernel_ctx) { ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */ rc = hl_vm_ctx_init(ctx); if (rc) { dev_err(hdev->dev, "Failed to init mem ctx module\n"); rc = -ENOMEM; - goto err_free_cs_pending; + goto err_hw_block_mem_fini; } rc = hdev->asic_funcs->ctx_init(ctx); @@ -179,7 +186,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) if (!ctx->asid) { dev_err(hdev->dev, "No free ASID, failed to create context\n"); rc = -ENOMEM; - goto err_free_cs_pending; + goto err_hw_block_mem_fini; } rc = hl_vm_ctx_init(ctx); @@ -214,7 +221,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) err_asid_free: if (ctx->asid != HL_KERNEL_ASID_ID) hl_asid_free(hdev, ctx->asid); -err_free_cs_pending: +err_hw_block_mem_fini: + hl_hw_block_mem_fini(ctx); kfree(ctx->cs_pending); return rc; diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index df847a6d19f4..d4a49d6d1753 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -229,6 +229,7 @@ static int vm_show(struct seq_file *s, void *data) { struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; + struct hl_vm_hw_block_list_node *lnode; struct hl_ctx *ctx; struct hl_vm *vm; struct hl_vm_hash_node *hnode; @@ -272,6 +273,21 @@ static int vm_show(struct seq_file *s, void *data) } mutex_unlock(&ctx->mem_hash_lock); + if (ctx->asid != HL_KERNEL_ASID_ID && + !list_empty(&ctx->hw_block_mem_list)) { + seq_puts(s, "\nhw_block mappings:\n\n"); + seq_puts(s, "virtual addresssizeHW block id\n"); + seq_puts(s, "---\n"); + mutex_lock(&ctx->hw_block_list_lock); + list_for_each_entry(lnode, &ctx->hw_block_mem_list, + node) { + seq_printf(s, + "0x%-14lx %-6u %-9u\n", + lnode->vaddr, lnode->size, lnode->id); + } + mutex_unlock(&ctx-&
[PATCH] habanalabs: use a single FW loading bringup flag
From: Ofir Bitton For simplicity, use a single bringup flag indicating which FW binaries should loaded to device. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c| 10 +++--- drivers/misc/habanalabs/common/habanalabs.h | 11 +++ drivers/misc/habanalabs/common/habanalabs_drv.c | 3 +-- drivers/misc/habanalabs/gaudi/gaudi.c | 2 +- drivers/misc/habanalabs/goya/goya.c | 2 +- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 09706c571e95..6f3692bf5eff 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -623,7 +623,11 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 status, security_status; int rc; - if (!hdev->cpu_enable) + /* pldm was added for cases in which we use preboot on pldm and want +* to load boot fit, but we can't wait for preboot because it runs +* very slowly +*/ + if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm) return 0; /* Need to check two possible scenarios: @@ -710,7 +714,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 status; int rc; - if (!(hdev->fw_loading & FW_TYPE_BOOT_CPU)) + if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) return 0; dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n", @@ -801,7 +805,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, goto out; } - if (!(hdev->fw_loading & FW_TYPE_LINUX)) { + if (!(hdev->fw_components & FW_TYPE_LINUX)) { dev_info(hdev->dev, "Skip loading Linux F/W\n"); goto out; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index ee94953f9ea2..442eca71fd2a 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -171,15 +171,19 @@ enum hl_fw_component { }; /** - * enum hl_fw_types - F/W types to load + * enum hl_fw_types - F/W types present in the system * @FW_TYPE_LINUX: Linux image for device CPU * @FW_TYPE_BOOT_CPU: Boot image for device CPU + * @FW_TYPE_PREBOOT_CPU: Indicates pre-loaded CPUs are present in the system + * (preboot, ppboot etc...) * @FW_TYPE_ALL_TYPES: Mask for all types */ enum hl_fw_types { FW_TYPE_LINUX = 0x1, FW_TYPE_BOOT_CPU = 0x2, - FW_TYPE_ALL_TYPES = (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU) + FW_TYPE_PREBOOT_CPU = 0x4, + FW_TYPE_ALL_TYPES = + (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU | FW_TYPE_PREBOOT_CPU) }; /** @@ -2066,10 +2070,9 @@ struct hl_device { /* Parameters for bring-up */ u64 nic_ports_mask; - u64 fw_loading; + u64 fw_components; u8 mmu_enable; u8 mmu_huge_page_opt; - u8 cpu_enable; u8 reset_pcilink; u8 cpu_queues_enable; u8 pldm; diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index f695ea6253c1..59896566dca1 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -234,8 +234,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) static void set_driver_behavior_per_device(struct hl_device *hdev) { - hdev->cpu_enable = 1; - hdev->fw_loading = FW_TYPE_ALL_TYPES; + hdev->fw_components = FW_TYPE_ALL_TYPES; hdev->cpu_queues_enable = 1; hdev->heartbeat = 1; hdev->mmu_enable = 1; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index c1f00237273c..4f39737b4808 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3701,7 +3701,7 @@ static int gaudi_init_cpu(struct hl_device *hdev) struct gaudi_device *gaudi = hdev->asic_specific; int rc; - if (!hdev->cpu_enable) + if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) return 0; if (gaudi->hw_cap_initialized & HW_CAP_CPU) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index a40c428fed94..20abed6ef7e6 100644 --- a/drivers/misc/habanalabs/goya/goya.c +
[PATCH 2/3] habanalabs: wait for interrupt support
From: Ofir Bitton In order to support command submissions from user space, the driver need to add support for user interrupt completions. The driver will allow multiple user threads to wait for an interrupt and perform a comparison with a given user address once interrupt expires. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c| 213 +- drivers/misc/habanalabs/common/device.c | 44 ++-- drivers/misc/habanalabs/common/habanalabs.h | 28 ++- .../misc/habanalabs/common/habanalabs_ioctl.c | 2 +- drivers/misc/habanalabs/common/irq.c | 22 +- include/uapi/misc/habanalabs.h| 42 +++- 6 files changed, 322 insertions(+), 29 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 7bd4a03b3429..138cb77420bd 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -778,6 +778,44 @@ void hl_pending_cb_list_flush(struct hl_ctx *ctx) } } +static void +wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) +{ + struct hl_user_pending_interrupt *pend; + + spin_lock(&interrupt->wait_list_lock); + list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) { + pend->fence.error = -EIO; + complete_all(&pend->fence.completion); + } + spin_unlock(&interrupt->wait_list_lock); +} + +void hl_release_pending_user_interrupts(struct hl_device *hdev) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_user_interrupt *interrupt; + int i; + + if (!prop->user_interrupt_count) + return; + + /* We iterate through the user interrupt requests and waking up all +* user threads waiting for interrupt completion. We iterate the +* list under a lock, this is why all user threads, once awake, +* will wait on the same lock and will release the waiting object upon +* unlock. +*/ + + for (i = 0 ; i < prop->user_interrupt_count ; i++) { + interrupt = &hdev->user_interrupt[i]; + wake_pending_user_interrupt_threads(interrupt); + } + + interrupt = &hdev->common_user_interrupt; + wake_pending_user_interrupt_threads(interrupt); +} + static void job_wq_completion(struct work_struct *work) { struct hl_cs_job *job = container_of(work, struct hl_cs_job, @@ -1818,7 +1856,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, return rc; } -int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) +static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) { struct hl_device *hdev = hpriv->hdev; union hl_wait_cs_args *args = data; @@ -1873,3 +1911,176 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) return 0; } + +static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, + u32 timeout_us, u64 user_address, + u32 target_value, u16 interrupt_offset, + enum hl_cs_wait_status *status) +{ + struct hl_user_pending_interrupt *pend; + struct hl_user_interrupt *interrupt; + unsigned long timeout; + long completion_rc; + u32 completion_value; + int rc = 0; + + if (timeout_us == MAX_SCHEDULE_TIMEOUT) + timeout = timeout_us; + else + timeout = usecs_to_jiffies(timeout_us); + + hl_ctx_get(hdev, ctx); + + pend = kmalloc(sizeof(*pend), GFP_ATOMIC); + if (!pend) { + hl_ctx_put(ctx); + return -ENOMEM; + } + + hl_fence_init(&pend->fence, ULONG_MAX); + + if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID) + interrupt = &hdev->common_user_interrupt; + else + interrupt = &hdev->user_interrupt[interrupt_offset]; + + spin_lock(&interrupt->wait_list_lock); + if (!hl_device_operational(hdev, NULL)) { + rc = -EPERM; + goto unlock_and_free_fence; + } + + if (copy_from_user(&completion_value, (void *) user_address, 4)) { + dev_err(hdev->dev, + "Failed to copy completion value from user\n"); + rc = -EFAULT; + goto unlock_and_free_fence; + } + + if (completion_value >= target_value) + *status = CS_WAIT_STATUS_COMPLETED; + else + *status = CS_WAIT_STATUS_BUSY; + + if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED)) + goto unlock_and_free_fence; + + /* Add pending user in
[PATCH 3/3] habanalabs: use correct define for 32-bit max value
Timeout in wait for interrupt is in 32-bit variable so we need to use the correct maximum value to compare. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 138cb77420bd..98fdb98ff4e4 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -1924,7 +1924,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u32 completion_value; int rc = 0; - if (timeout_us == MAX_SCHEDULE_TIMEOUT) + if (timeout_us == U32_MAX) timeout = timeout_us; else timeout = usecs_to_jiffies(timeout_us); -- 2.25.1
[PATCH 1/3] habanalabs: enable all IRQs for user interrupt support
From: Ofir Bitton In order to support user interrupts, driver must enable all MSI-X interrupts for any case user will trigger them. We differentiate between a valid user interrupt and a non valid one. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 20 +-- drivers/misc/habanalabs/common/habanalabs.h | 16 + drivers/misc/habanalabs/common/irq.c| 40 + 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 6948a1c54083..06395c79f07d 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1223,7 +1223,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, */ int hl_device_init(struct hl_device *hdev, struct class *hclass) { - int i, rc, cq_cnt, cq_ready_cnt; + int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt; char *name; bool add_cdev_sysfs_on_err = false; @@ -1312,6 +1312,19 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) hdev->completion_queue[i].cq_idx = i; } + user_interrupt_cnt = hdev->asic_prop.user_interrupt_count; + + if (user_interrupt_cnt) { + hdev->user_interrupt = kcalloc(user_interrupt_cnt, + sizeof(*hdev->user_interrupt), + GFP_KERNEL); + + if (!hdev->user_interrupt) { + rc = -ENOMEM; + goto cq_fini; + } + } + /* * Initialize the event queue. Must be done before hw_init, * because there the address of the event queue is being @@ -1320,7 +1333,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) rc = hl_eq_init(hdev, &hdev->event_queue); if (rc) { dev_err(hdev->dev, "failed to initialize event queue\n"); - goto cq_fini; + goto user_interrupts_fini; } /* MMU S/W must be initialized before kernel context is created */ @@ -1458,6 +1471,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) hl_mmu_fini(hdev); eq_fini: hl_eq_fini(hdev, &hdev->event_queue); +user_interrupts_fini: + kfree(hdev->user_interrupt); cq_fini: for (i = 0 ; i < cq_ready_cnt ; i++) hl_cq_fini(hdev, &hdev->completion_queue[i]); @@ -1595,6 +1610,7 @@ void hl_device_fini(struct hl_device *hdev) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) hl_cq_fini(hdev, &hdev->completion_queue[i]); kfree(hdev->completion_queue); + kfree(hdev->user_interrupt); hl_hw_queues_destroy(hdev); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 046bb44f70f9..bb6cb88d7257 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -412,6 +412,7 @@ struct hl_mmu_properties { * @first_available_user_msix_interrupt: first available msix interrupt * reserved for the user * @first_available_cq: first available CQ for the user. + * @user_interrupt_count: number of user interrupts. * @tpc_enabled_mask: which TPCs are enabled. * @completion_queues_count: number of completion queues. * @fw_security_disabled: true if security measures are disabled in firmware, @@ -475,6 +476,7 @@ struct asic_fixed_properties { u16 first_available_user_mon[HL_MAX_DCORES]; u16 first_available_user_msix_interrupt; u16 first_available_cq[HL_MAX_DCORES]; + u16 user_interrupt_count; u8 tpc_enabled_mask; u8 completion_queues_count; u8 fw_security_disabled; @@ -689,6 +691,16 @@ struct hl_cq { atomic_tfree_slots_cnt; }; +/** + * struct hl_user_interrupt - holds user interrupt information + * @hdev: pointer to the device structure + * @interrupt_id: msix interrupt id + */ +struct hl_user_interrupt { + struct hl_device*hdev; + u32 interrupt_id; +}; + /** * struct hl_eq - describes the event queue (single one per device) * @hdev: pointer to the device structure @@ -1821,6 +1833,7 @@ struct hl_mmu_funcs { * @asic_name: ASIC specific name. * @asic_type: ASIC specific type. * @completion_queue: array of hl_cq. + * @user_interrupt: array of hl_user_interrupt. * @cq_wq: work queues of completion queues for executing work in process * context. * @eq_wq: w
[PATCH 1/2] habanalabs: change default CS timeout to 30 seconds
Because our graph contains network operations, we need to account for delay in the network. 5 seconds timeout per CS is not enough to account for that. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 032d114f01ea..f695ea6253c1 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -27,13 +27,13 @@ static struct class *hl_class; static DEFINE_IDR(hl_devs_idr); static DEFINE_MUTEX(hl_devs_idr_lock); -static int timeout_locked = 5; +static int timeout_locked = 30; static int reset_on_lockup = 1; static int memory_scrub = 1; module_param(timeout_locked, int, 0444); MODULE_PARM_DESC(timeout_locked, - "Device lockup timeout in seconds (0 = disabled, default 5s)"); + "Device lockup timeout in seconds (0 = disabled, default 30s)"); module_param(reset_on_lockup, int, 0444); MODULE_PARM_DESC(reset_on_lockup, -- 2.25.1
[PATCH 2/2] habanalabs: reset device in case of sync error
From: Ohad Sharabi As the F/wW is the first to detect out of sync event, a new event is added to notify the driver on such event. In which case the driver performs hard reset. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 18 +++ drivers/misc/habanalabs/goya/goya.c | 23 +++ .../misc/habanalabs/include/common/cpucp_if.h | 9 .../include/gaudi/gaudi_async_events.h| 1 + .../include/goya/goya_async_events.h | 1 + 5 files changed, 52 insertions(+) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 9152242778f5..c1f00237273c 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7097,6 +7097,15 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, } } +static void gaudi_print_out_of_sync_info(struct hl_device *hdev, + struct cpucp_pkt_sync_err *sync_err) +{ + struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; + + dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n", + sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci)); +} + static int gaudi_soft_reset_late_init(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; @@ -7552,6 +7561,15 @@ static void gaudi_handle_eqe(struct hl_device *hdev, event_type, cause); break; + case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: + gaudi_print_irq_info(hdev, event_type, false); + gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); + if (hdev->hard_reset_on_fw_events) + hl_device_reset(hdev, true, false); + else + hl_fw_unmask_irq(hdev, event_type); + break; + default: dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", event_type); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index ed566c52ccaa..a40c428fed94 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4401,6 +4401,8 @@ static const char *_goya_get_event_desc(u16 event_type) return "THERMAL_ENV_S"; case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: return "THERMAL_ENV_E"; + case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC: + return "QUEUE_OUT_OF_SYNC"; default: return "N/A"; } @@ -4483,6 +4485,9 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size) index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0; snprintf(desc, size, _goya_get_event_desc(event_type), index); break; + case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC: + snprintf(desc, size, _goya_get_event_desc(event_type)); + break; default: snprintf(desc, size, _goya_get_event_desc(event_type)); break; @@ -4534,6 +4539,15 @@ static void goya_print_mmu_error_info(struct hl_device *hdev) } } +static void goya_print_out_of_sync_info(struct hl_device *hdev, + struct cpucp_pkt_sync_err *sync_err) +{ + struct hl_hw_queue *q = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ]; + + dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n", + sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci)); +} + static void goya_print_irq_info(struct hl_device *hdev, u16 event_type, bool razwi) { @@ -4754,6 +4768,15 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) goya_unmask_irq(hdev, event_type); break; + case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC: + goya_print_irq_info(hdev, event_type, false); + goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); + if (hdev->hard_reset_on_fw_events) + hl_device_reset(hdev, true, false); + else + hl_fw_unmask_irq(hdev, event_type); + break; + default: dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", event_type); diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index b77c1c16c32c..bf4e7900d8c8 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/
[PATCH 5/5] habanalabs: print if device is used on FD close
Notify to the user that although he closed the FD, the device is still in use because there are live CS and/or memory mappings (mmaps). Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 8 +--- drivers/misc/habanalabs/common/habanalabs.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 48d301a03d62..6948a1c54083 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -80,9 +80,9 @@ void hl_hpriv_get(struct hl_fpriv *hpriv) kref_get(&hpriv->refcount); } -void hl_hpriv_put(struct hl_fpriv *hpriv) +int hl_hpriv_put(struct hl_fpriv *hpriv) { - kref_put(&hpriv->refcount, hpriv_release); + return kref_put(&hpriv->refcount, hpriv_release); } /* @@ -103,7 +103,9 @@ static int hl_device_release(struct inode *inode, struct file *filp) filp->private_data = NULL; - hl_hpriv_put(hpriv); + if (!hl_hpriv_put(hpriv)) + dev_warn(hdev->dev, + "Device is still in use because there are live CS and/or memory mappings\n"); return 0; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 9ba48f322964..046bb44f70f9 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2182,7 +2182,7 @@ int hl_device_resume(struct hl_device *hdev); int hl_device_reset(struct hl_device *hdev, bool hard_reset, bool from_hard_reset_thread); void hl_hpriv_get(struct hl_fpriv *hpriv); -void hl_hpriv_put(struct hl_fpriv *hpriv); +int hl_hpriv_put(struct hl_fpriv *hpriv); int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq); uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms); -- 2.25.1
[PATCH 3/5] habanalabs: fail reset if device is not idle
After any reset (soft or hard) the device (the engines/QMANs) should be idle. If they are not idle, fail the reset. If it is soft-reset, the driver will try to do hard-reset automatically. If it is hard-reset, the driver will make the device non-operational. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 25 +++-- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 8cc3264ae378..48d301a03d62 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -71,21 +71,8 @@ static void hpriv_release(struct kref *ref) kfree(hpriv); - if (hdev->reset_upon_device_release) { - u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; - - /* We try soft reset first */ + if (hdev->reset_upon_device_release) hl_device_reset(hdev, false, false); - - /* If device is not idle perform hard reset */ - if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, - HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { - dev_info(hdev->dev, - "device is not idle (mask %#llx %#llx) after soft reset, performing hard reset", - idle_mask[0], idle_mask[1]); - hl_device_reset(hdev, true, false); - } - } } void hl_hpriv_get(struct hl_fpriv *hpriv) @@ -921,6 +908,7 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) int hl_device_reset(struct hl_device *hdev, bool hard_reset, bool from_hard_reset_thread) { + u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; int i, rc; if (!hdev->init_done) { @@ -1140,6 +1128,15 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, goto out_err; } + /* If device is not idle fail the reset process */ + if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, + HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { + dev_err(hdev->dev, + "device is not idle (mask %#llx %#llx) after reset\n", + idle_mask[0], idle_mask[1]); + goto out_err; + } + /* Check that the communication with the device is working */ rc = hdev->asic_funcs->test_queues(hdev); if (rc) { -- 2.25.1
[PATCH 4/5] habanalabs: reset_upon_device_release is for bring-up
Move the field to correct location in structure and remove comment. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 706361a81410..9ba48f322964 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1920,7 +1920,6 @@ struct hl_mmu_funcs { * @device_fini_pending: true if device_fini was called and might be * waiting for the reset thread to finish * @supports_staged_submission: true if staged submissions are supported - * @reset_upon_device_release: true if reset is required upon device release */ struct hl_device { struct pci_dev *pdev; @@ -2027,7 +2026,6 @@ struct hl_device { u8 process_kill_trial_cnt; u8 device_fini_pending; u8 supports_staged_submission; - u8 reset_upon_device_release; /* Parameters for bring-up */ u64 nic_ports_mask; @@ -2045,6 +2043,7 @@ struct hl_device { u8 bmc_enable; u8 rl_enable; u8 reset_on_preboot_fail; + u8 reset_upon_device_release; }; -- 2.25.1
[PATCH 2/5] habanalabs: reset after device is actually released
The device is actually released only after the refcnt of the hpriv structure is 0, which means all its contexts were closed. If we reset the device while a context is still open, there are possibilities for unexpected behavior and crashes. For example, if the process has a mapping of a register block that is now currently being reset, and the process writes/reads to that block during the reset, the device can get stuck. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 32 - 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index ed1838c15c78..8cc3264ae378 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -70,6 +70,22 @@ static void hpriv_release(struct kref *ref) mutex_unlock(&hdev->fpriv_list_lock); kfree(hpriv); + + if (hdev->reset_upon_device_release) { + u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; + + /* We try soft reset first */ + hl_device_reset(hdev, false, false); + + /* If device is not idle perform hard reset */ + if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, + HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { + dev_info(hdev->dev, + "device is not idle (mask %#llx %#llx) after soft reset, performing hard reset", + idle_mask[0], idle_mask[1]); + hl_device_reset(hdev, true, false); + } + } } void hl_hpriv_get(struct hl_fpriv *hpriv) @@ -98,22 +114,6 @@ static int hl_device_release(struct inode *inode, struct file *filp) hl_cb_mgr_fini(hdev, &hpriv->cb_mgr); hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); - if (hdev->reset_upon_device_release) { - u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; - - /* We try soft reset first */ - hl_device_reset(hdev, false, false); - - /* If device is not idle perform hard reset */ - if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, - HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { - dev_info(hdev->dev, - "device is not idle (mask %#llx %#llx) after soft reset, performing hard reset", - idle_mask[0], idle_mask[1]); - hl_device_reset(hdev, true, false); - } - } - filp->private_data = NULL; hl_hpriv_put(hpriv); -- 2.25.1
[PATCH 1/5] habanalabs: add reset support when user closes FD
From: Ofir Bitton In order to support command submissions that are done directly from user space, the driver must perform soft reset once user closes its FD. In case the soft reset fails or device is not idle, a hard reset should be performed. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 21 +++-- drivers/misc/habanalabs/common/habanalabs.h | 2 ++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 15fcb5c31c4b..ed1838c15c78 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -93,9 +93,26 @@ void hl_hpriv_put(struct hl_fpriv *hpriv) static int hl_device_release(struct inode *inode, struct file *filp) { struct hl_fpriv *hpriv = filp->private_data; + struct hl_device *hdev = hpriv->hdev; - hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); - hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); + hl_cb_mgr_fini(hdev, &hpriv->cb_mgr); + hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); + + if (hdev->reset_upon_device_release) { + u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; + + /* We try soft reset first */ + hl_device_reset(hdev, false, false); + + /* If device is not idle perform hard reset */ + if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, + HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { + dev_info(hdev->dev, + "device is not idle (mask %#llx %#llx) after soft reset, performing hard reset", + idle_mask[0], idle_mask[1]); + hl_device_reset(hdev, true, false); + } + } filp->private_data = NULL; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index dc9f5a83dfc9..706361a81410 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1920,6 +1920,7 @@ struct hl_mmu_funcs { * @device_fini_pending: true if device_fini was called and might be * waiting for the reset thread to finish * @supports_staged_submission: true if staged submissions are supported + * @reset_upon_device_release: true if reset is required upon device release */ struct hl_device { struct pci_dev *pdev; @@ -2026,6 +2027,7 @@ struct hl_device { u8 process_kill_trial_cnt; u8 device_fini_pending; u8 supports_staged_submission; + u8 reset_upon_device_release; /* Parameters for bring-up */ u64 nic_ports_mask; -- 2.25.1
[PATCH 2/2] habanalabs: Disable file operations after device is removed
From: Tomer Tayar A device can be removed from the PCI subsystem while a process holds the file descriptor opened. In such a case, the driver attempts to kill the process, but as it is still possible that the process will be alive after this step, the device removal will complete, and we will end up with a process object that points to a device object which was already released. To prevent the usage of this released device object, disable the following file operations for this process object, and avoid the cleanup steps when the file descriptor is eventually closed. The latter is just a best effort, as memory leak will occur. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 40 --- .../misc/habanalabs/common/habanalabs_ioctl.c | 12 ++ 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 9ecd805f0e88..334009e83823 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -93,12 +93,19 @@ void hl_hpriv_put(struct hl_fpriv *hpriv) static int hl_device_release(struct inode *inode, struct file *filp) { struct hl_fpriv *hpriv = filp->private_data; + struct hl_device *hdev = hpriv->hdev; + + filp->private_data = NULL; + + if (!hdev) { + pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n"); + put_pid(hpriv->taskpid); + return 0; + } hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); - filp->private_data = NULL; - hl_hpriv_put(hpriv); return 0; @@ -107,16 +114,19 @@ static int hl_device_release(struct inode *inode, struct file *filp) static int hl_device_release_ctrl(struct inode *inode, struct file *filp) { struct hl_fpriv *hpriv = filp->private_data; - struct hl_device *hdev; + struct hl_device *hdev = hpriv->hdev; filp->private_data = NULL; - hdev = hpriv->hdev; + if (!hdev) { + pr_err("Closing FD after device was removed\n"); + goto out; + } mutex_lock(&hdev->fpriv_list_lock); list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_list_lock); - +out: put_pid(hpriv->taskpid); kfree(hpriv); @@ -136,8 +146,14 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp) static int hl_mmap(struct file *filp, struct vm_area_struct *vma) { struct hl_fpriv *hpriv = filp->private_data; + struct hl_device *hdev = hpriv->hdev; unsigned long vm_pgoff; + if (!hdev) { + pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n"); + return -ENODEV; + } + vm_pgoff = vma->vm_pgoff; vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); @@ -885,6 +901,16 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) return -EBUSY; } +static void device_disable_open_processes(struct hl_device *hdev) +{ + struct hl_fpriv *hpriv; + + mutex_lock(&hdev->fpriv_list_lock); + list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) + hpriv->hdev = NULL; + mutex_unlock(&hdev->fpriv_list_lock); +} + /* * hl_device_reset - reset the device * @@ -1558,8 +1584,10 @@ void hl_device_fini(struct hl_device *hdev) HL_PENDING_RESET_LONG_SEC); rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC); - if (rc) + if (rc) { dev_crit(hdev->dev, "Failed to kill all open processes\n"); + device_disable_open_processes(hdev); + } hl_cb_pool_fini(hdev); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 03af61cecd37..083a30969c5f 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -5,6 +5,8 @@ * All Rights Reserved. */ +#define pr_fmt(fmt)"habanalabs: " fmt + #include #include "habanalabs.h" @@ -682,6 +684,11 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) const struct hl_ioctl_desc *ioctl = NULL; unsigned int nr = _IOC_NR(cmd); + if (!hdev) { + pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n"); + return -ENODEV; + } + if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) { ioctl = &hl_ioctls[nr];
[PATCH 1/2] habanalabs: Call put_pid() when releasing control device
From: Tomer Tayar The refcount of the "hl_fpriv" structure is not used for the control device, and thus hl_hpriv_put() is not called when releasing this device. This results with no call to put_pid(), so add it explicitly in hl_device_release_ctrl(). Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 15fcb5c31c4b..9ecd805f0e88 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -117,6 +117,8 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp) list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_list_lock); + put_pid(hpriv->taskpid); + kfree(hpriv); return 0; -- 2.25.1
[PATCH] habanalabs: set max asid to 2
From: farah kassabri currently we support only 2 asids in all asics. asid 0 for driver, and asic 1 for user. no need to setup 1024 asids configurations at init phase. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/include/gaudi/gaudi.h | 2 +- drivers/misc/habanalabs/include/goya/goya.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi.h b/drivers/misc/habanalabs/include/gaudi/gaudi.h index f9ea897ae42c..ffae107b1693 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi.h @@ -38,7 +38,7 @@ #define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */ -#define MAX_ASID 1024 +#define MAX_ASID 2 #define PROT_BITS_OFFS 0xF80 diff --git a/drivers/misc/habanalabs/include/goya/goya.h b/drivers/misc/habanalabs/include/goya/goya.h index 43d241891e45..1b4ca435021d 100644 --- a/drivers/misc/habanalabs/include/goya/goya.h +++ b/drivers/misc/habanalabs/include/goya/goya.h @@ -30,7 +30,7 @@ #define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */ -#define MAX_ASID 1024 +#define MAX_ASID 2 #define PROT_BITS_OFFS 0xF80 -- 2.25.1
[PATCH] habanalabs: mark hl_eq_inc_ptr() as static
hl_eq_inc_ptr() is not called from anywhere outside irq.c so mark it as static Reported-by: kernel test robot Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index de53fb5f978a..44a0522b59b9 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -47,7 +47,7 @@ inline u32 hl_cq_inc_ptr(u32 ptr) * Increment ptr by 1. If it reaches the number of event queue * entries, set it to 0 */ -inline u32 hl_eq_inc_ptr(u32 ptr) +static inline u32 hl_eq_inc_ptr(u32 ptr) { ptr++; if (unlikely(ptr == HL_EQ_LENGTH)) -- 2.25.1
Re: [PATCH] drivers: habanalabs: remove unused dentry pointer for debugfs files
On Tue, Feb 16, 2021 at 5:08 PM Greg Kroah-Hartman wrote: > > The dentry for the created debugfs file was being saved, but never used > anywhere. As the pointer isn't needed for anything, and the debugfs > files are being properly removed by removing the parent directory, > remove the saved pointer as well, saving a tiny bit of memory and logic. > > Cc: Oded Gabbay > Cc: Arnd Bergmann > Cc: Tomer Tayar > Cc: Moti Haimovski > Cc: Omer Shpigelman > Cc: Ofir Bitton > Cc: linux-kernel@vger.kernel.org > Signed-off-by: Greg Kroah-Hartman > --- > drivers/misc/habanalabs/common/debugfs.c| 5 + > drivers/misc/habanalabs/common/habanalabs.h | 2 -- > 2 files changed, 1 insertion(+), 6 deletions(-) > > diff --git a/drivers/misc/habanalabs/common/debugfs.c > b/drivers/misc/habanalabs/common/debugfs.c > index cef716643979..770b0131397d 100644 > --- a/drivers/misc/habanalabs/common/debugfs.c > +++ b/drivers/misc/habanalabs/common/debugfs.c > @@ -965,7 +965,6 @@ void hl_debugfs_add_device(struct hl_device *hdev) > struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; > int count = ARRAY_SIZE(hl_debugfs_list); > struct hl_debugfs_entry *entry; > - struct dentry *ent; > int i; > > dev_entry->hdev = hdev; > @@ -1072,13 +1071,11 @@ void hl_debugfs_add_device(struct hl_device *hdev) > &hl_stop_on_err_fops); > > for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { > - > - ent = debugfs_create_file(hl_debugfs_list[i].name, > + debugfs_create_file(hl_debugfs_list[i].name, > 0444, > dev_entry->root, > entry, > &hl_debugfs_fops); > - entry->dent = ent; > entry->info_ent = &hl_debugfs_list[i]; > entry->dev_entry = dev_entry; > } > diff --git a/drivers/misc/habanalabs/common/habanalabs.h > b/drivers/misc/habanalabs/common/habanalabs.h > index 60e16dc4bcac..48937e9eed83 100644 > --- a/drivers/misc/habanalabs/common/habanalabs.h > +++ b/drivers/misc/habanalabs/common/habanalabs.h > @@ -1393,12 +1393,10 @@ struct hl_info_list { > > /** > * struct hl_debugfs_entry - debugfs dentry wrapper. > - * @dent: base debugfs entry structure. > * @info_ent: dentry realted ops. > * @dev_entry: ASIC specific debugfs manager. > */ > struct hl_debugfs_entry { > - struct dentry *dent; > const struct hl_info_list *info_ent; > struct hl_dbg_device_entry *dev_entry; > }; > -- > 2.30.1 > This patch is: Reviewed-by: Oded Gabbay Thanks, Oded
[git pull] habanalabs second pull request for kernel 5.12
Hi Greg, This is habanalabs second pull request for the merge window of kernel 5.12. It contains important fixes, especially in the firmware-related code. Details are in the tag. Thanks, Oded The following changes since commit 369aea84595189200a2e6b028f556a7efa0ec489: mei: implement client dma setup. (2021-02-06 15:48:11 +0100) are available in the Git repository at: https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git tags/misc-habanalabs-next-2021-02-08 for you to fetch changes up to da5dfbb97a82ff698e1dc7b229d4d4f5759dad2b: habanalabs/gaudi: don't enable clock gating on DMA5 (2021-02-08 18:20:08 +0200) This tag contains the following changes for 5.12-rc1: - Improve communication protocol with device CPU CP application. The change prevents random (rare) out-of-sync errors. - Notify F/W to start sending events only after initialization of device is done. This fixes the issue where fatal events were received but ignored. - Fix integer handling (static analysis warning). - Always fetch HBM ECC errors from F/W (if available). - Minor fix in GAUDI-specific initialization code. Oded Gabbay (4): habanalabs: fix integer handling issue habanalabs: enable F/W events after init done habanalabs: return block size + block ID habanalabs/gaudi: don't enable clock gating on DMA5 Ofir Bitton (2): habanalabs: improve communication protocol with cpucp habanalabs: support fetching first available user CQ Ohad Sharabi (2): habanalabs/gaudi: use HBM_ECC_EN bit for ECC ERR habanalabs: update security map after init CPU Qs drivers/misc/habanalabs/common/device.c| 23 -- drivers/misc/habanalabs/common/firmware_if.c | 14 ++- drivers/misc/habanalabs/common/habanalabs.h| 15 +-- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 3 +- drivers/misc/habanalabs/common/memory.c| 19 + drivers/misc/habanalabs/common/mmu/mmu.c | 2 +- drivers/misc/habanalabs/gaudi/gaudi.c | 49 ++ drivers/misc/habanalabs/goya/goya.c| 27 +--- .../misc/habanalabs/include/common/hl_boot_if.h| 5 +++ include/uapi/misc/habanalabs.h | 30 ++--- 10 files changed, 148 insertions(+), 39 deletions(-)
[PATCH 3/3] habanalabs/gaudi: don't enable clock gating on DMA5
Graph Compiler uses DMA5 in a non-standard way and it requires the driver to disable clock gating on that DMA. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 35342edd4a02..9152242778f5 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3460,6 +3460,12 @@ static void gaudi_set_clock_gating(struct hl_device *hdev) enable = !!(hdev->clock_gating_mask & (BIT_ULL(gaudi_dma_assignment[i]))); + /* GC sends work to DMA engine through Upper CP in DMA5 so +* we need to not enable clock gating in that DMA +*/ + if (i == GAUDI_HBM_DMA_4) + enable = 0; + qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, enable ? QMAN_CGM1_PWR_GATE_EN : 0); -- 2.25.1
[PATCH 1/3] habanalabs: update security map after init CPU Qs
From: Ohad Sharabi when reading CPU_BOOT_DEV_STS0 reg after FW reports SRAM AVAILABLE the value in the register might not yet be updated by FW. to overcome this issue another "up-to-date" read of this register is done at the end of CPU queues init. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 7 ++- drivers/misc/habanalabs/common/habanalabs.h | 3 --- drivers/misc/habanalabs/gaudi/gaudi.c| 6 +- drivers/misc/habanalabs/goya/goya.c | 6 +- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 31b52a223f02..09706c571e95 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -125,7 +125,8 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, goto out; } - if (hdev->asic_prop.fw_cpucp_ack_with_pi) + if (hdev->asic_prop.fw_app_security_map & + CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN) expected_ack_val = queue->pi; else expected_ack_val = CPUCP_PACKET_FENCE_VAL; @@ -786,10 +787,6 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) prop->hard_reset_done_by_fw = true; - if (prop->fw_boot_cpu_security_map & - CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN) - prop->fw_cpucp_ack_with_pi = true; - dev_dbg(hdev->dev, "Firmware boot CPU security status %#x\n", prop->fw_boot_cpu_security_map); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 18ed3a6000b0..46c37b0c704a 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -420,8 +420,6 @@ struct hl_mmu_properties { *from BOOT_DEV_STS0 * @dram_supports_virtual_memory: is there an MMU towards the DRAM * @hard_reset_done_by_fw: true if firmware is handling hard reset flow - * @fw_cpucp_ack_with_pi: true if cpucp is acking messages with the PQ PI - *instead of a magic number * @num_functional_hbms: number of functional HBMs in each DCORE. */ struct asic_fixed_properties { @@ -483,7 +481,6 @@ struct asic_fixed_properties { u8 fw_security_status_valid; u8 dram_supports_virtual_memory; u8 hard_reset_done_by_fw; - u8 fw_cpucp_ack_with_pi; u8 num_functional_hbms; }; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 6905857b363b..f937d90786b2 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -536,7 +536,6 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->fw_security_disabled = true; prop->fw_security_status_valid = false; prop->hard_reset_done_by_fw = false; - prop->fw_cpucp_ack_with_pi = false; return 0; } @@ -3727,6 +3726,7 @@ static int gaudi_init_cpu(struct hl_device *hdev) static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) { struct gaudi_device *gaudi = hdev->asic_specific; + struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_eq *eq; u32 status; struct hl_hw_queue *cpu_pq = @@ -3783,6 +3783,10 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) return -EIO; } + /* update FW application security bits */ + if (prop->fw_security_status_valid) + prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0); + gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; return 0; } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index af6a5760924c..c30524660761 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -464,7 +464,6 @@ int goya_get_fixed_properties(struct hl_device *hdev) prop->fw_security_disabled = true; prop->fw_security_status_valid = false; prop->hard_reset_done_by_fw = false; - prop->fw_cpucp_ack_with_pi = false; return 0; } @@ -1189,6 +1188,7 @@ static int goya_stop_external_queues(struct hl_device *hdev) int goya_init_cpu_queues(struct hl_device *hdev) { struct goya_device *goya = hdev->asic_specific; + struct asic_fixed_properties *prop =
[PATCH 2/3] habanalabs: return block size + block ID
When user gives us a block address to get its ID to mmap it, he also needs to get from us the block size to pass to the driver in the mmap function. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 4 ++- drivers/misc/habanalabs/common/memory.c | 19 +-- drivers/misc/habanalabs/gaudi/gaudi.c | 2 +- drivers/misc/habanalabs/goya/goya.c | 2 +- include/uapi/misc/habanalabs.h | 27 - 5 files changed, 37 insertions(+), 17 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 46c37b0c704a..dc9f5a83dfc9 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -862,6 +862,8 @@ enum div_select_defs { * showing it to users. * @ack_protection_bits_errors: ack and dump all security violations * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it. + * also returns the size of the block if caller supplies + * a valid pointer for it * @hw_block_mmap: mmap a HW block with a given id. * @enable_events_from_fw: send interrupt to firmware to notify them the * driver is ready to receive asynchronous events. This @@ -980,7 +982,7 @@ struct hl_asic_funcs { u64 (*descramble_addr)(struct hl_device *hdev, u64 addr); void (*ack_protection_bits_errors)(struct hl_device *hdev); int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr, - u32 *block_id); + u32 *block_size, u32 *block_id); int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, u32 block_id, u32 block_size); void (*enable_events_from_fw)(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 7171e8820a2d..7cadf75ebb81 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1289,12 +1289,13 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, return rc; } -static int map_block(struct hl_device *hdev, u64 address, u64 *handle) +static int map_block(struct hl_device *hdev, u64 address, u64 *handle, + u32 *size) { u32 block_id = 0; int rc; - rc = hdev->asic_funcs->get_hw_block_id(hdev, address, &block_id); + rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id); *handle = block_id | HL_MMAP_TYPE_BLOCK; *handle <<= PAGE_SHIFT; @@ -1371,7 +1372,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) struct hl_device *hdev = hpriv->hdev; struct hl_ctx *ctx = hpriv->ctx; u64 block_handle, device_addr = 0; - u32 handle = 0; + u32 handle = 0, block_size; int rc; switch (args->in.op) { @@ -1416,8 +1417,9 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) case HL_MEM_OP_MAP_BLOCK: rc = map_block(hdev, args->in.map_block.block_addr, - &block_handle); - args->out.handle = block_handle; + &block_handle, &block_size); + args->out.block_handle = block_handle; + args->out.block_size = block_size; break; default: @@ -1437,7 +1439,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) struct hl_device *hdev = hpriv->hdev; struct hl_ctx *ctx = hpriv->ctx; u64 block_handle, device_addr = 0; - u32 handle = 0; + u32 handle = 0, block_size; int rc; if (!hl_device_operational(hdev, &status)) { @@ -1524,8 +1526,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) case HL_MEM_OP_MAP_BLOCK: rc = map_block(hdev, args->in.map_block.block_addr, - &block_handle); - args->out.handle = block_handle; + &block_handle, &block_size); + args->out.block_handle = block_handle; + args->out.block_size = block_size; break; default: diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index f937d90786b2..35342edd4a02 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -8490,7 +8490,7 @@ static u64 gaudi_get_device_time(struct hl_device *hdev) } static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, - u32 *block_id) + u32 *block_size,
[PATCH 1/4] habanalabs: improve communication protocol with cpucp
From: Ofir Bitton Current messaging communictaion protocol with cpucp can get out of sync due to coherency issues. In order to improve the protocol reliability, we modify the protocol to expect a different acknowledgment for every packet sent to cpucp. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c| 17 +++-- drivers/misc/habanalabs/common/habanalabs.h | 3 +++ drivers/misc/habanalabs/gaudi/gaudi.c | 6 +- drivers/misc/habanalabs/goya/goya.c | 6 +- .../misc/habanalabs/include/common/hl_boot_if.h | 5 + 5 files changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index ba6920f2b4ab..31b52a223f02 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -90,9 +90,10 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, u16 len, u32 timeout, u64 *result) { + struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id]; struct cpucp_packet *pkt; dma_addr_t pkt_dma_addr; - u32 tmp; + u32 tmp, expected_ack_val; int rc = 0; pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, @@ -115,14 +116,22 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, goto out; } + /* set fence to a non valid value */ + pkt->fence = UINT_MAX; + rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr); if (rc) { dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc); goto out; } + if (hdev->asic_prop.fw_cpucp_ack_with_pi) + expected_ack_val = queue->pi; + else + expected_ack_val = CPUCP_PACKET_FENCE_VAL; + rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, - (tmp == CPUCP_PACKET_FENCE_VAL), 1000, + (tmp == expected_ack_val), 1000, timeout, true); hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); @@ -777,6 +786,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) prop->hard_reset_done_by_fw = true; + if (prop->fw_boot_cpu_security_map & + CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN) + prop->fw_cpucp_ack_with_pi = true; + dev_dbg(hdev->dev, "Firmware boot CPU security status %#x\n", prop->fw_boot_cpu_security_map); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 30f32f2edb8a..3c54010f7ab9 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -419,6 +419,8 @@ struct hl_mmu_properties { *from BOOT_DEV_STS0 * @dram_supports_virtual_memory: is there an MMU towards the DRAM * @hard_reset_done_by_fw: true if firmware is handling hard reset flow + * @fw_cpucp_ack_with_pi: true if cpucp is acking messages with the PQ PI + *instead of a magic number * @num_functional_hbms: number of functional HBMs in each DCORE. */ struct asic_fixed_properties { @@ -479,6 +481,7 @@ struct asic_fixed_properties { u8 fw_security_status_valid; u8 dram_supports_virtual_memory; u8 hard_reset_done_by_fw; + u8 fw_cpucp_ack_with_pi; u8 num_functional_hbms; }; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 52fcaf25531a..006c34ae35c2 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -533,6 +533,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->fw_security_disabled = true; prop->fw_security_status_valid = false; prop->hard_reset_done_by_fw = false; + prop->fw_cpucp_ack_with_pi = false; return 0; } @@ -4438,9 +4439,12 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) /* ring the doorbell */ WREG32(db_reg_offset, db_value); - if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) + if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { + /* make sure device CPU will read latest data from host *
[PATCH 2/4] habanalabs: support fetching first available user CQ
From: Ofir Bitton User must be aware of the available CQs when it needs to use them. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 2 ++ drivers/misc/habanalabs/common/habanalabs_ioctl.c | 3 ++- drivers/misc/habanalabs/gaudi/gaudi.c | 3 +++ drivers/misc/habanalabs/goya/goya.c | 3 +++ include/uapi/misc/habanalabs.h| 3 +++ 5 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 3c54010f7ab9..98163317ec43 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -411,6 +411,7 @@ struct hl_mmu_properties { * @first_available_user_mon: first monitor available for the user * @first_available_user_msix_interrupt: first available msix interrupt * reserved for the user + * @first_available_cq: first available CQ for the user. * @tpc_enabled_mask: which TPCs are enabled. * @completion_queues_count: number of completion queues. * @fw_security_disabled: true if security measures are disabled in firmware, @@ -475,6 +476,7 @@ struct asic_fixed_properties { u16 first_available_user_sob[HL_MAX_DCORES]; u16 first_available_user_mon[HL_MAX_DCORES]; u16 first_available_user_msix_interrupt; + u16 first_available_cq[HL_MAX_DCORES]; u8 tpc_enabled_mask; u8 completion_queues_count; u8 fw_security_disabled; diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index e86f46d4b613..03af61cecd37 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -397,7 +397,8 @@ static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args) prop->first_available_user_sob[args->dcore_id]; sm_info.first_available_monitor = prop->first_available_user_mon[args->dcore_id]; - + sm_info.first_available_cq = + prop->first_available_cq[args->dcore_id]; return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size, sizeof(sm_info))) ? -EFAULT : 0; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 006c34ae35c2..8fc0de3cf3a9 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -529,6 +529,9 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->first_available_user_msix_interrupt = USHRT_MAX; + for (i = 0 ; i < HL_MAX_DCORES ; i++) + prop->first_available_cq[i] = USHRT_MAX; + /* disable fw security for now, set it in a later stage */ prop->fw_security_disabled = true; prop->fw_security_status_valid = false; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 53db7e966866..d26b405f0c17 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -457,6 +457,9 @@ int goya_get_fixed_properties(struct hl_device *hdev) prop->first_available_user_msix_interrupt = USHRT_MAX; + for (i = 0 ; i < HL_MAX_DCORES ; i++) + prop->first_available_cq[i] = USHRT_MAX; + /* disable fw security for now, set it in a later stage */ prop->fw_security_disabled = true; prop->fw_security_status_valid = false; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index ebde42b37b43..64ae83b5f8e5 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -414,10 +414,13 @@ struct hl_pll_frequency_info { * struct hl_info_sync_manager - sync manager information * @first_available_sync_object: first available sob * @first_available_monitor: first available monitor + * @first_available_cq: first available cq */ struct hl_info_sync_manager { __u32 first_available_sync_object; __u32 first_available_monitor; + __u32 first_available_cq; + __u32 reserved; }; /** -- 2.25.1
[PATCH 4/4] habanalabs: enable F/W events after init done
Only after the initialization of the device is done, the driver is ready to receive events from the F/W. The driver can't handle events before that because of races so it will ignore events. In case of a fatal event, the driver won't know about it and the device will be operational although it shouldn't be. Same logic should be applied after hard-reset. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 23 + drivers/misc/habanalabs/common/habanalabs.h | 9 ++-- drivers/misc/habanalabs/gaudi/gaudi.c | 10 ++--- drivers/misc/habanalabs/goya/goya.c | 12 +++ 4 files changed, 41 insertions(+), 13 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 59219c862ca0..15fcb5c31c4b 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1159,12 +1159,20 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, atomic_set(&hdev->in_reset, 0); hdev->needs_reset = false; - if (hard_reset) + dev_notice(hdev->dev, "Successfully finished resetting the device\n"); + + if (hard_reset) { hdev->hard_reset_cnt++; - else - hdev->soft_reset_cnt++; - dev_warn(hdev->dev, "Successfully finished resetting the device\n"); + /* After reset is done, we are ready to receive events from +* the F/W. We can't do it before because we will ignore events +* and if those events are fatal, we won't know about it and +* the device will be operational although it shouldn't be +*/ + hdev->asic_funcs->enable_events_from_fw(hdev); + } else { + hdev->soft_reset_cnt++; + } return 0; @@ -1415,6 +1423,13 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) hdev->init_done = true; + /* After initialization is done, we are ready to receive events from +* the F/W. We can't do it before because we will ignore events and if +* those events are fatal, we won't know about it and the device will +* be operational although it shouldn't be +*/ + hdev->asic_funcs->enable_events_from_fw(hdev); + return 0; release_ctx: diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 98163317ec43..18ed3a6000b0 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -860,12 +860,16 @@ enum div_select_defs { * and place them in the relevant cs jobs * @collective_wait_create_jobs: allocate collective wait cs jobs * @scramble_addr: Routine to scramble the address prior of mapping it - * in the MMU. + * in the MMU. * @descramble_addr: Routine to de-scramble the address prior of - * showing it to users. + * showing it to users. * @ack_protection_bits_errors: ack and dump all security violations * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it. * @hw_block_mmap: mmap a HW block with a given id. + * @enable_events_from_fw: send interrupt to firmware to notify them the + * driver is ready to receive asynchronous events. This + * function should be called during the first init and + * after every hard-reset of the device */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -982,6 +986,7 @@ struct hl_asic_funcs { u32 *block_id); int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, u32 block_id, u32 block_size); + void (*enable_events_from_fw)(struct hl_device *hdev); }; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index b929e602fa3d..6905857b363b 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -1383,8 +1383,6 @@ static int gaudi_late_init(struct hl_device *hdev) return rc; } - WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER); - rc = gaudi_fetch_psoc_frequency(hdev); if (rc) { dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); @@ -8500,6 +8498,11 @@ static int gaudi_block_mmap(struct hl_device *hdev, return -EPERM; } +static void gaudi_enable_events_from_fw(struct hl_device *hdev) +{ + WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER); +} + static const struct hl_asic_funcs gaudi_funcs = { .early_init = gaudi_e
[PATCH 3/4] habanalabs/gaudi: use HBM_ECC_EN bit for ECC ERR
From: Ohad Sharabi driver should use ECC info from FW only if HBM ECC CAP is set. otherwise, try to fetch the data from MC regs only if security is disabled. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 8fc0de3cf3a9..b929e602fa3d 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7105,7 +7105,9 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; int err = 0; - if (!hdev->asic_prop.fw_security_disabled) { + if (hdev->asic_prop.fw_security_status_valid && + (hdev->asic_prop.fw_app_security_map & + CPU_BOOT_DEV_STS0_HBM_ECC_EN)) { if (!hbm_ecc_data) { dev_err(hdev->dev, "No FW ECC data"); return 0; @@ -7127,14 +7129,24 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); dev_err(hdev->dev, - "HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", - device, ch, type, wr_par, rd_par, ca_par, serr, derr); + "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", + device, ch, wr_par, rd_par, ca_par, serr, derr); + dev_err(hdev->dev, + "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", + device, ch, hbm_ecc_data->first_addr, type, + hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, + hbm_ecc_data->dec_cnt); err = 1; return 0; } + if (!hdev->asic_prop.fw_security_disabled) { + dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); + return 0; + } + base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x); -- 2.25.1
[git pull] habanalabs pull request for kernel 5.12
Hi Greg, This is habanalabs pull request for the merge window of kernel 5.12. It contains changes and new features, support for new firmware. Details are in the tag. Thanks, Oded The following changes since commit 0fc99422bc034de018607ef6b70f92d4bc4a236d: firmware: xilinx: Remove PM_API_MAX value (2021-01-26 19:38:54 +0100) are available in the Git repository at: https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git tags/misc-habanalabs-next-2021-01-27 for you to fetch changes up to f1aebf5e3d606a2a910eed54bc8d17655f12b606: habanalabs: update to latest hl_boot_if.h spec from F/W (2021-01-27 21:03:51 +0200) This tag contains habanalabs driver changes for v5.12: - Add feature called "staged command submissions". In this feature, the driver allows the user to submit multiple command submissions that describe a single pass on the deep learning graph. The driver tracks the completion of the entire pass by the last stage CS. - Update code to support the latest firmware image - Optimizations and improvements to MMU code: - Support page size that is not power-of-2 - Make the locks scheme simpler - mmap areas in device configuration space to userspace - Security fixes: - Make ETR non-secured - Remove access to kernel memory through debug-fs interface - Remove access through PCI bar to SyncManager register block in Gaudi - Many small bug fixes Alon Mizrahi (2): habanalabs: replace WARN/WARN_ON with dev_crit in driver habanalabs: return dram virtual address in info ioctl Christophe JAILLET (1): habanalabs: Use 'dma_set_mask_and_coherent()' Moti Haimovski (2): habanalabs: report dram_page_size in hw_ip_info ioctl habanalabs: support non power-of-2 DRAM phys page sizes Oded Gabbay (8): habanalabs: update firmware boot interface habanalabs: add ASIC property of functional HBMs habanalabs: update to latest hl_boot_if.h habanalabs: update email address in sysfs/debugfs docs CREDITS: update email address and home address habanalabs: update SyncManager interrupt handling habanalabs/gaudi: unmask HBM interrupts after handling habanalabs: update to latest hl_boot_if.h spec from F/W Ofir Bitton (20): habanalabs: Init the VM module for kernel context habanalabs/gaudi: support CS with no completion habanalabs: allow user to pass a staged submission seq habanalabs/gaudi: remove duplicated gaudi packets masks habanalabs/gaudi: add debug prints for security status habanalabs/gaudi: set uninitialized symbol habanalabs: remove access to kernel memory using debugfs habanalabs: report correct dram size in info ioctl habanalabs: read device boot errors after cpucp is up habanalabs: separate common code to dedicated folders habanalabs: increment ctx ref from within a cs allocation habanalabs: add driver support for internal cb scheduling habanalabs/gaudi: remove PCI access to SM block habanalabs/gaudi: print sync manager SEI interrupt info habanalabs: ignore F/W BMC errors in case no BMC present habanalabs: add security violations dump to debugfs habanalabs: add user available interrupt to hw_ip habanalabs: add new mem ioctl op for mapping hw blocks habanalabs: add CS completion and timeout properties habanalabs: staged submission support Ohad Sharabi (4): habanalabs: refactor MMU locks code habanalabs/goya: move mmu_prepare to context init habanalabs: modify device_idle interface habanalabs: fix ETR security issue Omer Shpigelman (2): habanalabs: kernel doc format in memory functions habanalabs: modify memory functions signatures farah kassabri (2): habanalabs: always try to use the hint address habanalabs: fix MMU debugfs related nodes CREDITS| 8 +- .../ABI/testing/debugfs-driver-habanalabs | 50 +- Documentation/ABI/testing/sysfs-driver-habanalabs | 58 +- drivers/misc/habanalabs/common/Makefile| 10 +- drivers/misc/habanalabs/common/asid.c | 6 +- drivers/misc/habanalabs/common/command_buffer.c| 8 +- .../misc/habanalabs/common/command_submission.c| 473 ++-- drivers/misc/habanalabs/common/context.c | 33 +- drivers/misc/habanalabs/common/debugfs.c | 43 +- drivers/misc/habanalabs/common/device.c| 23 +- drivers/misc/habanalabs/common/firmware_if.c | 143 +++-- drivers/misc/habanalabs/common/habanalabs.h| 100 +++- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 22 +- drivers/misc/habanalabs/common/hw_queue.c | 51 +- drivers/misc/habanalabs/common/memory.c| 614 + drivers/mi
[PATCH 2/3] habanalabs/gaudi: unmask HBM interrupts after handling
As the driver does with all interrupts, we need to tell F/W to unmask the HBM interrupts after the driver handled them. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index a9bd5aef6c02..52fcaf25531a 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7377,6 +7377,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, gaudi_hbm_read_interrupts(hdev, gaudi_hbm_event_to_dev(event_type), &eq_entry->hbm_ecc_data); + hl_fw_unmask_irq(hdev, event_type); break; case GAUDI_EVENT_TPC0_DEC: -- 2.25.1
[PATCH 1/3] habanalabs: update SyncManager interrupt handling
The firmware provides more information about SyncManager events. Adjust the code to the latest firmware interface file. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 14 +++--- drivers/misc/habanalabs/include/common/cpucp_if.h | 11 +-- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 1348016309e3..a9bd5aef6c02 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -6860,24 +6860,24 @@ static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; switch (sei_data->sei_cause) { - case GAUDI_SM_SEI_SO_OVERFLOW: + case SM_SEI_SO_OVERFLOW: dev_err(hdev->dev, "SM %u SEI Error: SO %u overflow/underflow", - index, le16_to_cpu(sei_data->sei_log)); + index, le32_to_cpu(sei_data->sei_log)); break; - case GAUDI_SM_SEI_LBW_4B_UNALIGNED: + case SM_SEI_LBW_4B_UNALIGNED: dev_err(hdev->dev, "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", - index, le16_to_cpu(sei_data->sei_log)); + index, le32_to_cpu(sei_data->sei_log)); break; - case GAUDI_SM_SEI_AXI_RESPONSE_ERR: + case SM_SEI_AXI_RESPONSE_ERR: dev_err(hdev->dev, "SM %u SEI Error: AXI ID %u response error", - index, le16_to_cpu(sei_data->sei_log)); + index, le32_to_cpu(sei_data->sei_log)); break; default: dev_err(hdev->dev, "Unknown SM SEI cause %u", - le16_to_cpu(sei_data->sei_log)); + le32_to_cpu(sei_data->sei_log)); break; } } diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index d75d1077461b..b77c1c16c32c 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -58,10 +58,17 @@ struct hl_eq_ecc_data { __u8 pad[7]; }; +enum hl_sm_sei_cause { + SM_SEI_SO_OVERFLOW, + SM_SEI_LBW_4B_UNALIGNED, + SM_SEI_AXI_RESPONSE_ERR +}; + struct hl_eq_sm_sei_data { - __le16 sei_log; + __le32 sei_log; + /* enum hl_sm_sei_cause */ __u8 sei_cause; - __u8 pad[5]; + __u8 pad[3]; }; struct hl_eq_entry { -- 2.25.1
[PATCH 3/3] habanalabs: update to latest hl_boot_if.h spec from F/W
It adds the definition for indication that the F/W handles HBM ECC events. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/include/common/hl_boot_if.h | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index e29c77bdea07..57785478a4ef 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -69,8 +69,9 @@ * image has failed to match expected * checksum. Trying to program image again * might solve this. + * * CPU_BOOT_ERR0_PLL_FAIL PLL settings failed, meaning that one - * of the PLLs remained in REF_CLK + * of the PLLs remains in REF_CLK * * CPU_BOOT_ERR0_ENABLED Error registers enabled. * This is a main indication that the @@ -161,6 +162,10 @@ * FW initialized Clock Gating. * Initialized in: preboot * + * CPU_BOOT_DEV_STS0_HBM_ECC_ENHBM ECC handling Enabled. + * FW handles HBM ECC indications. + * Initialized in: linux + * * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * This is a main indication that the * running FW populates the device status @@ -184,6 +189,7 @@ #define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << 11) #define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << 12) #define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13) +#define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << 14) #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) enum cpu_boot_status { -- 2.25.1
[PATCH 3/3] habanalabs: fix ETR security issue
From: Ohad Sharabi ETR should always be non-secured as it is used by the users to record profiling/trace data. This patch fixes the configuration to match those requirements. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/gaudi/gaudi_coresight.c| 18 +++--- drivers/misc/habanalabs/goya/goya_coresight.c | 11 +-- .../habanalabs/include/gaudi/gaudi_masks.h | 5 - .../include/goya/asic_reg/goya_masks.h | 5 - 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c index 88a09d42e111..6e56fa1c6c69 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c @@ -634,9 +634,21 @@ static int gaudi_config_etr(struct hl_device *hdev, WREG32(mmPSOC_ETR_BUFWM, 0x3FFC); WREG32(mmPSOC_ETR_RSZ, input->buffer_size); WREG32(mmPSOC_ETR_MODE, input->sink_mode); - /* Workaround for H3 #HW-2075 bug: use small data chunks */ - WREG32(mmPSOC_ETR_AXICTL, (is_host ? 0 : 0x700) | - PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT); + if (hdev->asic_prop.fw_security_disabled) { + /* make ETR not privileged */ + val = FIELD_PREP( + PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0); + /* make ETR non-secured (inverted logic) */ + val |= FIELD_PREP( + PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK, 1); + /* +* Workaround for H3 #HW-2075 bug: use small data +* chunks +*/ + val |= FIELD_PREP(PSOC_ETR_AXICTL_WRBURSTLEN_MASK, + is_host ? 0 : 7); + WREG32(mmPSOC_ETR_AXICTL, val); + } WREG32(mmPSOC_ETR_DBALO, lower_32_bits(input->buffer_address)); WREG32(mmPSOC_ETR_DBAHI, diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index 6fa03933b438..6b7445cca580 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -434,8 +434,15 @@ static int goya_config_etr(struct hl_device *hdev, WREG32(mmPSOC_ETR_BUFWM, 0x3FFC); WREG32(mmPSOC_ETR_RSZ, input->buffer_size); WREG32(mmPSOC_ETR_MODE, input->sink_mode); - WREG32(mmPSOC_ETR_AXICTL, - 0x700 | PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT); + if (hdev->asic_prop.fw_security_disabled) { + /* make ETR not privileged */ + val = FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0); + /* make ETR non-secured (inverted logic) */ + val |= FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK, 1); + /* burst size 8 */ + val |= FIELD_PREP(PSOC_ETR_AXICTL_WRBURSTLEN_MASK, 7); + WREG32(mmPSOC_ETR_AXICTL, val); + } WREG32(mmPSOC_ETR_DBALO, lower_32_bits(input->buffer_address)); WREG32(mmPSOC_ETR_DBAHI, diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h index b9b90d079e23..b53aeda9a982 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h @@ -388,7 +388,10 @@ enum axi_id { #define RAZWI_INITIATOR_ID_X_Y_TPC6RAZWI_INITIATOR_ID_X_Y(7, 6) #define RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5 RAZWI_INITIATOR_ID_X_Y(8, 6) -#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT 1 +#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT 1 +#define PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK 0x1 +#define PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK 0x2 +#define PSOC_ETR_AXICTL_WRBURSTLEN_MASK0xF00 /* STLB_CACHE_INV */ #define STLB_CACHE_INV_PRODUCER_INDEX_SHIFT 0 diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h index 067489bd048e..9ff3cb245580 100644 --- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h +++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h @@ -259,6 +259,9 @@ #define DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT #define DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT -#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT
[PATCH 2/3] habanalabs: staged submission support
From: Ofir Bitton We introduce a new mechanism named Staged Submission. This mechanism allows the user to send a whole CS in pieces. Each CS will not require completion rather than the last CS. Timeout timer will be triggered upon reception of the first CS in group. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c| 214 -- drivers/misc/habanalabs/common/habanalabs.h | 9 + drivers/misc/habanalabs/common/hw_queue.c | 27 +++ drivers/misc/habanalabs/gaudi/gaudi.c | 1 + 4 files changed, 227 insertions(+), 24 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 57daff0e59ae..7bd4a03b3429 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -334,6 +334,133 @@ static void complete_job(struct hl_device *hdev, struct hl_cs_job *job) cs_job_put(job); } +/* + * hl_staged_cs_find_first - locate the first CS in this staged submission + * + * @hdev: pointer to device structure + * @cs_seq: staged submission sequence number + * + * @note: This function must be called under 'hdev->cs_mirror_lock' + * + * Find and return a CS pointer with the given sequence + */ +struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq) +{ + struct hl_cs *cs; + + list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node) + if (cs->staged_cs && cs->staged_first && + cs->sequence == cs_seq) + return cs; + + return NULL; +} + +/* + * is_staged_cs_last_exists - returns true if the last CS in sequence exists + * + * @hdev: pointer to device structure + * @cs: staged submission member + * + */ +bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs) +{ + struct hl_cs *last_entry; + + last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs, + staged_cs_node); + + if (last_entry->staged_last) + return true; + + return false; +} + +/* + * staged_cs_get - get CS reference if this CS is a part of a staged CS + * + * @hdev: pointer to device structure + * @cs: current CS + * @cs_seq: staged submission sequence number + * + * Increment CS reference for every CS in this staged submission except for + * the CS which get completion. + */ +static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs) +{ + /* Only the last CS in this staged submission will get a completion. +* We must increment the reference for all other CS's in this +* staged submission. +* Once we get a completion we will release the whole staged submission. +*/ + if (!cs->staged_last) + cs_get(cs); +} + +/* + * staged_cs_put - put a CS in case it is part of staged submission + * + * @hdev: pointer to device structure + * @cs: CS to put + * + * This function decrements a CS reference (for a non completion CS) + */ +static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) +{ + /* We release all CS's in a staged submission except the last +* CS which we have never incremented its reference. +*/ + if (!cs_needs_completion(cs)) + cs_put(cs); +} + +static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) +{ + bool next_entry_found = false; + struct hl_cs *next; + + if (!cs_needs_timeout(cs)) + return; + + spin_lock(&hdev->cs_mirror_lock); + + /* We need to handle tdr only once for the complete staged submission. +* Hence, we choose the CS that reaches this function first which is +* the CS marked as 'staged_last'. +*/ + if (cs->staged_cs && cs->staged_last) + cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); + + spin_unlock(&hdev->cs_mirror_lock); + + /* Don't cancel TDR in case this CS was timedout because we might be +* running from the TDR context +*/ + if (cs && (cs->timedout || + hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)) + return; + + if (cs && cs->tdr_active) + cancel_delayed_work_sync(&cs->work_tdr); + + spin_lock(&hdev->cs_mirror_lock); + + /* queue TDR for next CS */ + list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node) + if (cs_needs_timeout(next)) { + next_entry_found = true; + break; + } + + if (next_entry_found && !next->tdr_active) { + next->t
[PATCH 1/3] habanalabs: modify device_idle interface
From: Ohad Sharabi Currently this API uses single 64 bits mask for engines idle indication. Recently, it was observed that more bits are needed for some ASICs. This patch modifies the use of the idle mask and the idle_extensions mask. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/context.c | 9 +++-- drivers/misc/habanalabs/common/debugfs.c | 2 +- drivers/misc/habanalabs/common/habanalabs.h | 4 +- .../misc/habanalabs/common/habanalabs_ioctl.c | 5 ++- drivers/misc/habanalabs/gaudi/gaudi.c | 37 --- drivers/misc/habanalabs/goya/goya.c | 21 +-- include/uapi/misc/habanalabs.h| 4 +- 7 files changed, 40 insertions(+), 42 deletions(-) diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index 829fe98eed61..cda871afb8f4 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -12,7 +12,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) { struct hl_device *hdev = ctx->hdev; - u64 idle_mask = 0; + u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; int i; /* Release all allocated pending cb's, those cb's were never @@ -55,10 +55,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx) if ((!hdev->pldm) && (hdev->pdev) && (!hdev->asic_funcs->is_device_idle(hdev, - &idle_mask, NULL))) + idle_mask, + HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL))) dev_notice(hdev->dev, - "device not idle after user context is closed (0x%llx)\n", - idle_mask); + "device not idle after user context is closed (0x%llx, 0x%llx)\n", + idle_mask[0], idle_mask[1]); } else { dev_dbg(hdev->dev, "closing kernel context\n"); hdev->asic_funcs->ctx_fini(ctx); diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 9e3c1efe56ba..df847a6d19f4 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -414,7 +414,7 @@ static int engines_show(struct seq_file *s, void *data) return 0; } - hdev->asic_funcs->is_device_idle(hdev, NULL, s); + hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s); return 0; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index fd2fffd20ba1..be7947d69dfa 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -933,8 +933,8 @@ struct hl_asic_funcs { void (*set_clock_gating)(struct hl_device *hdev); void (*disable_clock_gating)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, void *data); - bool (*is_device_idle)(struct hl_device *hdev, u64 *mask, - struct seq_file *s); + bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, + u8 mask_len, struct seq_file *s); int (*soft_reset_late_init)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 628bdc56dca3..e86f46d4b613 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -145,9 +145,10 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) return -EINVAL; hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, - &hw_idle.busy_engines_mask_ext, NULL); + hw_idle.busy_engines_mask_ext, + HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL); hw_idle.busy_engines_mask = - lower_32_bits(hw_idle.busy_engines_mask_ext); + lower_32_bits(hw_idle.busy_engines_mask_ext[0]); return copy_to_user(out, &hw_idle, min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index e1c6072e5fb3..9a3d2fb477a8 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -4538,7 +4538,6 @@ static int gaudi_scrub_device_mem(struct hl_de
[PATCH] habanalabs: add CS completion and timeout properties
From: Ofir Bitton In order to support staged submission feature, we need to distinguish on which command submission we want to receive timeout and for which we want to receive completion. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c| 104 ++ drivers/misc/habanalabs/common/habanalabs.h | 15 ++- drivers/misc/habanalabs/common/hw_queue.c | 24 +++- 3 files changed, 117 insertions(+), 26 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index a5e9bb0a4855..57daff0e59ae 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -149,9 +149,10 @@ void hl_fence_get(struct hl_fence *fence) kref_get(&fence->refcount); } -static void hl_fence_init(struct hl_fence *fence) +static void hl_fence_init(struct hl_fence *fence, u64 sequence) { kref_init(&fence->refcount); + fence->cs_sequence = sequence; fence->error = 0; fence->timestamp = ktime_set(0, 0); init_completion(&fence->completion); @@ -184,6 +185,28 @@ static void cs_job_put(struct hl_cs_job *job) kref_put(&job->refcount, cs_job_do_release); } +bool cs_needs_completion(struct hl_cs *cs) +{ + /* In case this is a staged CS, only the last CS in sequence should +* get a completion, any non staged CS will always get a completion +*/ + if (cs->staged_cs && !cs->staged_last) + return false; + + return true; +} + +bool cs_needs_timeout(struct hl_cs *cs) +{ + /* In case this is a staged CS, only the first CS in sequence should +* get a timeout, any non staged CS will always get a timeout +*/ + if (cs->staged_cs && !cs->staged_first) + return false; + + return true; +} + static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) { /* @@ -225,7 +248,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) parser.queue_type = job->queue_type; parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; job->patched_cb = NULL; - parser.completion = true; + parser.completion = cs_needs_completion(job->cs); rc = hdev->asic_funcs->cs_parser(hdev, &parser); @@ -291,8 +314,21 @@ static void complete_job(struct hl_device *hdev, struct hl_cs_job *job) hl_debugfs_remove_job(hdev, job); - if (job->queue_type == QUEUE_TYPE_EXT || - job->queue_type == QUEUE_TYPE_HW) + /* We decrement reference only for a CS that gets completion +* because the reference was incremented only for this kind of CS +* right before it was scheduled. +* +* In staged submission, only the last CS marked as 'staged_last' +* gets completion, hence its release function will be called from here. +* As for all the rest CS's in the staged submission which do not get +* completion, their CS reference will be decremented by the +* 'staged_last' CS during the CS release flow. +* All relevant PQ CI counters will be incremented during the CS release +* flow by calling 'hl_hw_queue_update_ci'. +*/ + if (cs_needs_completion(cs) && + (job->queue_type == QUEUE_TYPE_EXT || + job->queue_type == QUEUE_TYPE_HW)) cs_put(cs); cs_job_put(job); @@ -347,8 +383,8 @@ static void cs_do_release(struct kref *ref) hdev->asic_funcs->hw_queues_unlock(hdev); - /* Need to update CI for internal queues */ - hl_int_hw_queue_update_ci(cs); + /* Need to update CI for all queue jobs that does not get completion */ + hl_hw_queue_update_ci(cs); /* remove CS from CS mirror list */ spin_lock(&hdev->cs_mirror_lock); @@ -359,6 +395,7 @@ static void cs_do_release(struct kref *ref) * running from the TDR context */ if (!cs->timedout && hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) { + bool next_entry_found = false; struct hl_cs *next; if (cs->tdr_active) @@ -367,10 +404,13 @@ static void cs_do_release(struct kref *ref) spin_lock(&hdev->cs_mirror_lock); /* queue TDR for next CS */ - next = list_first_entry_or_null(&hdev->cs_mirror_list, - struct hl_cs, mirror_node); + list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node) + if (cs_needs_timeout(next)) { + next_
[PATCH] habanalabs: add new mem ioctl op for mapping hw blocks
From: Ofir Bitton For future ASIC support the driver allows user to map certain regions in the device's configuration space for direct access from userspace. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 3 + drivers/misc/habanalabs/common/habanalabs.h | 16 +++- drivers/misc/habanalabs/common/memory.c | 93 - drivers/misc/habanalabs/gaudi/gaudi.c | 15 +++- drivers/misc/habanalabs/goya/goya.c | 15 +++- include/uapi/misc/habanalabs.h | 18 +++- 6 files changed, 150 insertions(+), 10 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index a438279b8691..0926cfb256ef 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -142,6 +142,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma) switch (vm_pgoff & HL_MMAP_TYPE_MASK) { case HL_MMAP_TYPE_CB: return hl_cb_mmap(hpriv, vma); + + case HL_MMAP_TYPE_BLOCK: + return hl_hw_block_mmap(hpriv, vma); } return -EINVAL; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 4129e4e6a7b5..e105612ed577 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -28,17 +28,18 @@ #define HL_NAME"habanalabs" /* Use upper bits of mmap offset to store habana driver specific information. - * bits[63:62] - Encode mmap type + * bits[63:61] - Encode mmap type * bits[45:0] - mmap offset value * * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these * defines are w.r.t to PAGE_SIZE */ -#define HL_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT) -#define HL_MMAP_TYPE_MASK (0x3ull << HL_MMAP_TYPE_SHIFT) +#define HL_MMAP_TYPE_SHIFT (61 - PAGE_SHIFT) +#define HL_MMAP_TYPE_MASK (0x7ull << HL_MMAP_TYPE_SHIFT) +#define HL_MMAP_TYPE_BLOCK (0x4ull << HL_MMAP_TYPE_SHIFT) #define HL_MMAP_TYPE_CB(0x2ull << HL_MMAP_TYPE_SHIFT) -#define HL_MMAP_OFFSET_VALUE_MASK (0x3FFFull >> PAGE_SHIFT) +#define HL_MMAP_OFFSET_VALUE_MASK (0x1FFFull >> PAGE_SHIFT) #define HL_MMAP_OFFSET_VALUE_GET(off) (off & HL_MMAP_OFFSET_VALUE_MASK) #define HL_PENDING_RESET_PER_SEC 10 @@ -856,6 +857,8 @@ enum div_select_defs { * @descramble_addr: Routine to de-scramble the address prior of * showing it to users. * @ack_protection_bits_errors: ack and dump all security violations + * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it. + * @hw_block_mmap: mmap a HW block with a given id. */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -968,6 +971,10 @@ struct hl_asic_funcs { u64 (*scramble_addr)(struct hl_device *hdev, u64 addr); u64 (*descramble_addr)(struct hl_device *hdev, u64 addr); void (*ack_protection_bits_errors)(struct hl_device *hdev); + int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr, + u32 *block_id); + int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, + u32 block_id, u32 block_size); }; @@ -2167,6 +2174,7 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, bool map_cb, u64 *handle); int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle); int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); +int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); struct hl_cb *hl_cb_get(struct hl_device *hdev,struct hl_cb_mgr *mgr, u32 handle); void hl_cb_put(struct hl_cb *cb); diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 36de0d05d3a2..7171e8820a2d 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1289,11 +1289,88 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, return rc; } +static int map_block(struct hl_device *hdev, u64 address, u64 *handle) +{ + u32 block_id = 0; + int rc; + + rc = hdev->asic_funcs->get_hw_block_id(hdev, address, &block_id); + + *handle = block_id | HL_MMAP_TYPE_BLOCK; + *handle <<= PAGE_SHIFT; + + return rc; +} + +static void hw_block_vm_close(struct vm_area_struct *vma) +{ + struct hl_ctx *ctx = (struct hl_ctx *) vma->vm_private_data; + + hl_ctx_put(ctx); + vma->vm_private_data = NULL; +} + +static const struct vm_operations_struct hw_block_vm_ops = { + .close = hw_block_vm_close +}
[git pull] habanalabs fixes for 5.11-rc5
Hi Greg, This pull request contains three important bug fixes for 5.11-rc5. One of the fixes prevent a possible host machine crash, another one prevents random card reset and the third adds a missing backward compatibility to a uapi. More details are in the tag. Thanks, Oded The following changes since commit cb5c681ab9037e25fcca20689c82cf034566d610: intel_th: pci: Add Alder Lake-P support (2021-01-21 18:54:43 +0100) are available in the Git repository at: https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git tags/misc-habanalabs-fixes-2021-01-21 for you to fetch changes up to 2dc4a6d79168e7e426e8ddf8e7219c9ffd13b2b1: habanalabs: disable FW events on device removal (2021-01-21 20:30:22 +0200) This tag contains the following bug fixes for 5.11-rc5/6: - Clear the fence field in the PCI counters packet before sending the packet to the F/W. Not clearing it might cause the driver and F/W to get out-of-sync - Fix backward compatibility in the uapi of IDLE check that is part of the INFO IOCTL. - Tell the F/W to not access the Host (device outbound) while the driver removes the device. If that happens, the server might crash. Oded Gabbay (2): habanalabs: fix backward compatibility of idle check habanalabs: disable FW events on device removal Ofir Bitton (1): habanalabs: zero pci counters packet before submit to FW drivers/misc/habanalabs/common/device.c | 9 + drivers/misc/habanalabs/common/firmware_if.c | 5 + drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 ++ 3 files changed, 16 insertions(+)
[PATCH 3/3] habanalabs: fix MMU debugfs related nodes
From: farah kassabri In mmu debugfs node show un-scrambled physical addresses. before read/write through data nodes, need to unscramble the physical address before using it for pci transaction. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c| 21 -- drivers/misc/habanalabs/common/habanalabs.h | 14 +++- drivers/misc/habanalabs/common/mmu/mmu.c| 74 ++--- drivers/misc/habanalabs/gaudi/gaudi.c | 3 +- drivers/misc/habanalabs/goya/goya.c | 3 +- 5 files changed, 93 insertions(+), 22 deletions(-) diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 323d0381a60a..9e3c1efe56ba 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -310,8 +310,8 @@ static int mmu_show(struct seq_file *s, void *data) struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; struct hl_ctx *ctx; - struct hl_mmu_hop_info hops_info; - u64 virt_addr = dev_entry->mmu_addr; + struct hl_mmu_hop_info hops_info = {0}; + u64 virt_addr = dev_entry->mmu_addr, phys_addr; int i; if (!hdev->mmu_enable) @@ -333,10 +333,19 @@ static int mmu_show(struct seq_file *s, void *data) return 0; } - seq_printf(s, - "asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 0x%llx\n", - dev_entry->mmu_asid, dev_entry->mmu_addr, - hops_info.scrambled_vaddr); + phys_addr = hops_info.hop_info[hops_info.used_hops - 1].hop_pte_val; + + if (hops_info.scrambled_vaddr && + (dev_entry->mmu_addr != hops_info.scrambled_vaddr)) + seq_printf(s, + "asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 0x%llx,\nphys_addr: 0x%llx, scrambled_phys_addr: 0x%llx\n", + dev_entry->mmu_asid, dev_entry->mmu_addr, + hops_info.scrambled_vaddr, + hops_info.unscrambled_paddr, phys_addr); + else + seq_printf(s, + "asid: %u, virt_addr: 0x%llx, phys_addr: 0x%llx\n", + dev_entry->mmu_asid, dev_entry->mmu_addr, phys_addr); for (i = 0 ; i < hops_info.used_hops ; i++) { seq_printf(s, "hop%d_addr: 0x%llx\n", diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index ce1a1e70a6d5..4129e4e6a7b5 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -851,8 +851,10 @@ enum div_select_defs { * @collective_wait_init_cs: Generate collective master/slave packets * and place them in the relevant cs jobs * @collective_wait_create_jobs: allocate collective wait cs jobs - * @scramble_vaddr: Routine to scramble the virtual address prior of mapping it + * @scramble_addr: Routine to scramble the address prior of mapping it * in the MMU. + * @descramble_addr: Routine to de-scramble the address prior of + * showing it to users. * @ack_protection_bits_errors: ack and dump all security violations */ struct hl_asic_funcs { @@ -963,7 +965,8 @@ struct hl_asic_funcs { int (*collective_wait_create_jobs)(struct hl_device *hdev, struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id, u32 collective_engine_id); - u64 (*scramble_vaddr)(struct hl_device *hdev, u64 virt_addr); + u64 (*scramble_addr)(struct hl_device *hdev, u64 addr); + u64 (*descramble_addr)(struct hl_device *hdev, u64 addr); void (*ack_protection_bits_errors)(struct hl_device *hdev); }; @@ -1726,13 +1729,17 @@ struct hl_mmu_per_hop_info { * @scrambled_vaddr: The value of the virtual address after scrambling. This * address replaces the original virtual-address when mapped * in the MMU tables. + * @unscrambled_paddr: The un-scrambled physical address. * @hop_info: Array holding the per-hop information used for the translation. * @used_hops: The number of hops used for the translation. + * @range_type: virtual address range type. */ struct hl_mmu_hop_info { u64 scrambled_vaddr; + u64 unscrambled_paddr; struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS]; u32 used_hops; + enum hl_va_range_type range_type; }; /** @@ -,7 +2229,8 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr); int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops); -u64 hl_mmu_scramble_v
[PATCH 1/3] habanalabs: always try to use the hint address
From: farah kassabri Currently hint address is ignored in case va block page size is not power of 2. We need to support th user hint address also in this case, but only if the hint address is aligned to page size. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/memory.c | 133 ++-- 1 file changed, 33 insertions(+), 100 deletions(-) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index ada977eb136c..585425514cad 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -520,8 +520,8 @@ static inline int add_va_block(struct hl_device *hdev, } /** - * get_va_block_pow2() - get a virtual block for the given size and alignment - * where alignment is a power of 2. + * get_va_block() - get a virtual block for the given size and alignment. + * * @hdev: pointer to the habanalabs device structure. * @va_range: pointer to the virtual addresses range. * @size: requested block size. @@ -530,11 +530,11 @@ static inline int add_va_block(struct hl_device *hdev, * * This function does the following: * - Iterate on the virtual block list to find a suitable virtual block for the - * given size and alignment. + * given size, hint address and alignment. * - Reserve the requested block and update the list. * - Return the start address of the virtual block. */ -static u64 get_va_block_pow2(struct hl_device *hdev, +static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range, u64 size, u64 hint_addr, u32 va_block_align) { @@ -542,22 +542,37 @@ static u64 get_va_block_pow2(struct hl_device *hdev, u64 valid_start, valid_size, prev_start, prev_end, align_mask, reserved_valid_start = 0, reserved_valid_size = 0; bool add_prev = false; + bool is_align_pow_2 = is_power_of_2(va_range->page_size); - align_mask = ~((u64)va_block_align - 1); + if (is_align_pow_2) + align_mask = ~((u64)va_block_align - 1); + else { + /* +* with non-power-of-2 range we work only with page granularity +* and the start address is page aligned, +* so no need for alignment checking. +*/ + size = DIV_ROUND_UP_ULL(size, va_range->page_size) * + va_range->page_size; + } - /* check if hint_addr is aligned */ - if (hint_addr & (va_block_align - 1)) + /* Check if we need to ignore hint address */ + if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) || + (!is_align_pow_2 && (hint_addr % va_range->page_size))) { + dev_info(hdev->dev, "Hint address 0x%llx will be ignored\n", + hint_addr); hint_addr = 0; + } mutex_lock(&va_range->lock); print_va_list_locked(hdev, &va_range->list); list_for_each_entry(va_block, &va_range->list, node) { - /* calc the first possible aligned addr */ + /* Calc the first possible aligned addr */ valid_start = va_block->start; - if (valid_start & (va_block_align - 1)) { + if (is_align_pow_2 && (valid_start & (va_block_align - 1))) { valid_start &= align_mask; valid_start += va_block_align; if (valid_start > va_block->end) @@ -565,9 +580,11 @@ static u64 get_va_block_pow2(struct hl_device *hdev, } valid_size = va_block->end - valid_start; + if (valid_size < size) + continue; - if (valid_size >= size && (!new_va_block || - valid_size < reserved_valid_size)) { + /* Pick the minimal length block which has the required size */ + if (!new_va_block || (valid_size < reserved_valid_size)) { new_va_block = va_block; reserved_valid_start = valid_start; reserved_valid_size = valid_size; @@ -584,10 +601,14 @@ static u64 get_va_block_pow2(struct hl_device *hdev, if (!new_va_block) { dev_err(hdev->dev, "no available va block for size %llu\n", - size); + size); goto out; } + /* +* Check if there is some leftover range due to reserving the new +* va block, then return it to the main virtual addresses list. +*/ if (reserved_val
[PATCH 2/3] habanalabs: add user available interrupt to hw_ip
From: Ofir Bitton In order to support completions that arrive directly to the user, the driver needs to supply the user with the first available msix interrupt available. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 3 +++ drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 ++ drivers/misc/habanalabs/gaudi/gaudi.c | 2 ++ drivers/misc/habanalabs/goya/goya.c | 2 ++ include/uapi/misc/habanalabs.h| 6 -- 5 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 3923b03e99aa..ce1a1e70a6d5 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -408,6 +408,8 @@ struct hl_mmu_properties { * @sync_stream_first_mon: first monitor available for sync stream use * @first_available_user_sob: first sob available for the user * @first_available_user_mon: first monitor available for the user + * @first_available_user_msix_interrupt: first available msix interrupt + * reserved for the user * @tpc_enabled_mask: which TPCs are enabled. * @completion_queues_count: number of completion queues. * @fw_security_disabled: true if security measures are disabled in firmware, @@ -469,6 +471,7 @@ struct asic_fixed_properties { u16 sync_stream_first_mon; u16 first_available_user_sob[HL_MAX_DCORES]; u16 first_available_user_mon[HL_MAX_DCORES]; + u16 first_available_user_msix_interrupt; u8 tpc_enabled_mask; u8 completion_queues_count; u8 fw_security_disabled; diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index e216865b8102..d94825be029c 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -90,6 +90,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od; hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor; + hw_ip.first_available_interrupt_id = + prop->first_available_user_msix_interrupt; return copy_to_user(out, &hw_ip, min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0; } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 2b01c081404a..69b3867bc151 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -527,6 +527,8 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->sync_stream_first_mon + (num_sync_stream_queues * HL_RSVD_MONS); + prop->first_available_user_msix_interrupt = USHRT_MAX; + /* disable fw security for now, set it in a later stage */ prop->fw_security_disabled = true; prop->fw_security_status_valid = false; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 50dcefc02cdd..82f69274def7 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -455,6 +455,8 @@ int goya_get_fixed_properties(struct hl_device *hdev) prop->max_pending_cs = GOYA_MAX_PENDING_CS; + prop->first_available_user_msix_interrupt = USHRT_MAX; + /* disable fw security for now, set it in a later stage */ prop->fw_security_disabled = true; prop->fw_security_status_valid = false; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index b431a70e1b8b..866355a53188 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -309,7 +309,9 @@ struct hl_info_hw_ip_info { __u32 num_of_events; __u32 device_id; /* PCI Device ID */ __u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */ - __u32 reserved[2]; + __u32 reserved; + __u16 first_available_interrupt_id; + __u16 reserved2; __u32 cpld_version; __u32 psoc_pci_pll_nr; __u32 psoc_pci_pll_nf; @@ -320,7 +322,7 @@ struct hl_info_hw_ip_info { __u8 pad[2]; __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; - __u64 reserved2; + __u64 reserved3; __u64 dram_page_size; }; -- 2.25.1
[PATCH 1/3] habanalabs: zero pci counters packet before submit to FW
From: Ofir Bitton Driver does not zero some pci counters packets before sending to FW. This causes an out of sync PI/CI between driver and FW. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 5 + 1 file changed, 5 insertions(+) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 20f77f58edef..c9a12980218a 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -402,6 +402,10 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, } counters->rx_throughput = result; + memset(&pkt, 0, sizeof(pkt)); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + /* Fetch PCI tx counter */ pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -414,6 +418,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, counters->tx_throughput = result; /* Fetch PCI replay counter */ + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); -- 2.25.1
[PATCH 3/3] habanalabs: disable FW events on device removal
When device is removed, we need to make sure the F/W won't send us any more events because during the remove process we disable the interrupts. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 9 + 1 file changed, 9 insertions(+) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 1ea57d86caa3..69d04eca767f 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1487,6 +1487,15 @@ void hl_device_fini(struct hl_device *hdev) } } + /* Disable PCI access from device F/W so it won't send us additional +* interrupts. We disable MSI/MSI-X at the halt_engines function and we +* can't have the F/W sending us interrupts after that. We need to +* disable the access here because if the device is marked disable, the +* message won't be send. Also, in case of heartbeat, the device CPU is +* marked as disable so this message won't be sent +*/ + hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); + /* Mark device as disabled */ hdev->disabled = true; -- 2.25.1
[PATCH 2/3] habanalabs: fix backward compatibility of idle check
Need to take the lower 32 bits of the driver's 64-bit idle mask and put it in the legacy 32-bit variable that the userspace reads to know the idle mask. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 12efbd9d2e3a..d25892d61ec9 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -133,6 +133,8 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, &hw_idle.busy_engines_mask_ext, NULL); + hw_idle.busy_engines_mask = + lower_32_bits(hw_idle.busy_engines_mask_ext); return copy_to_user(out, &hw_idle, min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; -- 2.25.1
[git pull] habanalabs fixes for 5.11-rc4
Hi Greg, This pull request contains three important bug fixes for 5.11-rc4. Basically the driver won't work without the dma address fix, and IMO the two other fixes are also improtant enough to be included at this stage. Thanks, Oded The following changes since commit f970d1d01af8606233f47901c1cf39f3ae21fd74: Merge tag 'phy-fixes-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy into char-misc-next (2021-01-11 15:37:40 +0100) are available in the Git repository at: https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git tags/misc-habanalabs-fixes-2021-01-13 for you to fetch changes up to 9488307a5559255f2fc9a3ab61e1c31e243ca7c6: habanalabs: prevent soft lockup during unmap (2021-01-12 15:00:10 +0200) This tag contains the following bug fixes: - Fix the dma address that is passed to dma_mmap_coherent. We passed an address that includes an offset that is needed by our device and that caused dma_mmap_coherent to do an errounous mapping. - Fix the reset process in case failures happen during the reset process. Without this fix, if the user would have asked to perform reset after the previous reset failed he would get a kernel panic - WA to prevent soft lockup BUG during unmap of host memory. In case of tens of thousands of mappings, the unmapping can take a long time that exceeds the soft lockup timeout. This WA adds a small sleep every 32K page unmappings to prevent that. -------- Oded Gabbay (3): habanalabs: fix dma_addr passed to dma_mmap_coherent habanalabs: fix reset process in case of failures habanalabs: prevent soft lockup during unmap drivers/misc/habanalabs/common/device.c | 2 +- drivers/misc/habanalabs/common/habanalabs.h | 1 + drivers/misc/habanalabs/common/memory.c | 10 -- drivers/misc/habanalabs/common/mmu.c| 6 +++--- drivers/misc/habanalabs/common/mmu_v1.c | 12 ++-- drivers/misc/habanalabs/gaudi/gaudi.c | 3 ++- drivers/misc/habanalabs/goya/goya.c | 3 ++- 7 files changed, 27 insertions(+), 10 deletions(-)
[PATCH 2/2] CREDITS: update email address and home address
Update my email address to kernel.org account and my home address to my new house. Signed-off-by: Oded Gabbay --- CREDITS | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CREDITS b/CREDITS index 090ed4b004a5..ebd00ca9515b 100644 --- a/CREDITS +++ b/CREDITS @@ -1240,10 +1240,10 @@ S: 80050-430 - Curitiba - ParanĂ¡ S: Brazil N: Oded Gabbay -E: oded.gab...@gmail.com -D: HabanaLabs and AMD KFD maintainer -S: 12 Shraga Raphaeli -S: Petah-Tikva, 4906418 +E: ogab...@kernel.org +D: HabanaLabs maintainer +S: 29 Duchifat St. +S: Ra'anana 4372029 S: Israel N: Kumar Gala -- 2.25.1
[PATCH 1/2] habanalabs: update email address in sysfs/debugfs docs
Use my kernel.org address for contact point instead of my private email address. Signed-off-by: Oded Gabbay --- .../ABI/testing/debugfs-driver-habanalabs | 44 +++--- .../ABI/testing/sysfs-driver-habanalabs | 58 +-- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index 3979bfdaa080..d447a611c41b 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -1,7 +1,7 @@ What: /sys/kernel/debug/habanalabs/hl/addr Date: Jan 2019 KernelVersion: 5.1 -Contact:oded.gab...@gmail.com +Contact:ogab...@kernel.org Description:Sets the device address to be used for read or write through PCI bar, or the device VA of a host mapped memory to be read or written directly from the host. The latter option is allowed @@ -11,7 +11,7 @@ Description:Sets the device address to be used for read or write through What: /sys/kernel/debug/habanalabs/hl/clk_gate Date: May 2020 KernelVersion: 5.8 -Contact:oded.gab...@gmail.com +Contact:ogab...@kernel.org Description:Allow the root user to disable/enable in runtime the clock gating mechanism in Gaudi. Due to how Gaudi is built, the clock gating needs to be disabled in order to access the @@ -34,28 +34,28 @@ Description:Allow the root user to disable/enable in runtime the clock What: /sys/kernel/debug/habanalabs/hl/command_buffers Date: Jan 2019 KernelVersion: 5.1 -Contact:oded.gab...@gmail.com +Contact:ogab...@kernel.org Description:Displays a list with information about the currently allocated command buffers What: /sys/kernel/debug/habanalabs/hl/command_submission Date: Jan 2019 KernelVersion: 5.1 -Contact:oded.gab...@gmail.com +Contact:ogab...@kernel.org Description:Displays a list with information about the currently active command submissions What: /sys/kernel/debug/habanalabs/hl/command_submission_jobs Date: Jan 2019 KernelVersion: 5.1 -Contact:oded.gab...@gmail.com +Contact:ogab...@kernel.org Description:Displays a list with detailed information about each JOB (CB) of each active command submission What: /sys/kernel/debug/habanalabs/hl/data32 Date: Jan 2019 KernelVersion: 5.1 -Contact:oded.gab...@gmail.com +Contact:ogab...@kernel.org Description:Allows the root user to read or write directly through the device's PCI bar. Writing to this file generates a write transaction while reading from the file generates a read @@ -70,7 +70,7 @@ Description:Allows the root user to read or write directly through the What: /sys/kernel/debug/habanalabs/hl/data64 Date: Jan 2020 KernelVersion: 5.6 -Contact:oded.gab...@gmail.com +Contact:ogab...@kernel.org Description:Allows the root user to read or write 64 bit data directly through the device's PCI bar. Writing to this file generates a write transaction while reading from the file generates a read @@ -85,7 +85,7 @@ Description:Allows the root user to read or write 64 bit data directly What: /sys/kernel/debug/habanalabs/hl/device Date: Jan 2019 KernelVersion: 5.1 -Contact:oded.gab...@gmail.com +Contact:ogab...@kernel.org Description:Enables the root user to set the device to specific state. Valid values are "disable", "enable", "suspend", "resume". User can read this property to see the valid values @@ -93,28 +93,28 @@ Description:Enables the root user to set the device to specific state. What: /sys/kernel/debug/habanalabs/hl/engines Date: Jul 2019 KernelVersion: 5.3 -Contact:oded.gab...@gmail.com +Contact:ogab...@kernel.org Description:Displays the status registers values of the device engines and their derived idle status What: /sys/kernel/debug/habanalabs/hl/i2c_addr Date: Jan 2019 KernelVersion: 5.1 -Contact:oded.gab...@gmail.com +Contact:ogab...@kernel.org Description:Sets I2C device address for I2C transaction that is generated by the device's CPU What: /sys/kernel/debug/habanalabs/hl/i2c_bus Date: Jan 2019 KernelVersion: 5.1 -Contact:oded.gab...@gmail.com +Contact:ogab...@kernel.org Description:Sets I2C bus address for I2C transaction that is generated by the device's CPU
[PATCH 3/4] habanalabs: ignore F/W BMC errors in case no BMC present
From: Ofir Bitton In order to support operation mode in which BMC is not active, driver must not take BMC errors into consideration. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index a6ea5bbeb699..dcd6c3614044 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -308,9 +308,15 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) dev_warn(hdev->dev, "Device boot warning - Skipped DRAM initialization\n"); - if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) - dev_warn(hdev->dev, - "Device boot error - Skipped waiting for BMC\n"); + + if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) { + if (hdev->bmc_enable) + dev_warn(hdev->dev, + "Device boot error - Skipped waiting for BMC\n"); + else + err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED; + } + if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY) dev_err(hdev->dev, "Device boot error - Serdes data from BMC not available\n"); -- 2.25.1
[PATCH 2/4] habanalabs/gaudi: print sync manager SEI interrupt info
From: Ofir Bitton Driver must print sync manager SEI information upon receiving interrupt from FW. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 41 +++ .../misc/habanalabs/include/common/cpucp_if.h | 7 .../include/gaudi/gaudi_async_events.h| 4 ++ 3 files changed, 52 insertions(+) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 4b602aa7a6a3..126650e3a9ad 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -225,6 +225,12 @@ gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { "MSG AXI LBW returned with error" }; +enum gaudi_sm_sei_cause { + GAUDI_SM_SEI_SO_OVERFLOW, + GAUDI_SM_SEI_LBW_4B_UNALIGNED, + GAUDI_SM_SEI_AXI_RESPONSE_ERR +}; + static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ @@ -6845,6 +6851,34 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev, } } +static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, + struct hl_eq_sm_sei_data *sei_data) +{ + u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; + + switch (sei_data->sei_cause) { + case GAUDI_SM_SEI_SO_OVERFLOW: + dev_err(hdev->dev, + "SM %u SEI Error: SO %u overflow/underflow", + index, le16_to_cpu(sei_data->sei_log)); + break; + case GAUDI_SM_SEI_LBW_4B_UNALIGNED: + dev_err(hdev->dev, + "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", + index, le16_to_cpu(sei_data->sei_log)); + break; + case GAUDI_SM_SEI_AXI_RESPONSE_ERR: + dev_err(hdev->dev, + "SM %u SEI Error: AXI ID %u response error", + index, le16_to_cpu(sei_data->sei_log)); + break; + default: + dev_err(hdev->dev, "Unknown SM SEI cause %u", + le16_to_cpu(sei_data->sei_log)); + break; + } +} + static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, struct hl_eq_ecc_data *ecc_data) { @@ -7468,6 +7502,13 @@ static void gaudi_handle_eqe(struct hl_device *hdev, hl_fw_unmask_irq(hdev, event_type); break; + case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: + gaudi_print_irq_info(hdev, event_type, false); + gaudi_print_sm_sei_info(hdev, event_type, + &eq_entry->sm_sei_data); + hl_fw_unmask_irq(hdev, event_type); + break; + case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: gaudi_print_clk_change_info(hdev, event_type); hl_fw_unmask_irq(hdev, event_type); diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index 00bd9b392f93..d75d1077461b 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -58,11 +58,18 @@ struct hl_eq_ecc_data { __u8 pad[7]; }; +struct hl_eq_sm_sei_data { + __le16 sei_log; + __u8 sei_cause; + __u8 pad[5]; +}; + struct hl_eq_entry { struct hl_eq_header hdr; union { struct hl_eq_ecc_data ecc_data; struct hl_eq_hbm_ecc_data hbm_ecc_data; + struct hl_eq_sm_sei_data sm_sei_data; __le64 data[7]; }; }; diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h index 9ccba8437ec9..49335e8334b4 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h @@ -212,6 +212,10 @@ enum gaudi_async_event_id { GAUDI_EVENT_NIC_SEI_2 = 266, GAUDI_EVENT_NIC_SEI_3 = 267, GAUDI_EVENT_NIC_SEI_4 = 268, + GAUDI_EVENT_DMA_IF_SEI_0 = 277, + GAUDI_EVENT_DMA_IF_SEI_1 = 278, + GAUDI_EVENT_DMA_IF_SEI_2 = 279, + GAUDI_EVENT_DMA_IF_SEI_3 = 280, GAUDI_EVENT_PCIE_FLR = 290, GAUDI_EVENT_TPC0_BMON_SPMU = 300, GAUDI_EVENT_TPC0_KRN_ERR = 301, -- 2.25.1
[PATCH 4/4] habanalabs: add security violations dump to debugfs
From: Ofir Bitton In order to improve driver security debuggability, we add security violations dump to debugfs. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../ABI/testing/debugfs-driver-habanalabs | 8 +++ drivers/misc/habanalabs/common/debugfs.c | 22 +++ drivers/misc/habanalabs/common/habanalabs.h | 2 ++ drivers/misc/habanalabs/gaudi/gaudi.c | 3 ++- drivers/misc/habanalabs/gaudi/gaudiP.h| 1 + .../misc/habanalabs/gaudi/gaudi_security.c| 5 + drivers/misc/habanalabs/goya/goya.c | 3 ++- drivers/misc/habanalabs/goya/goyaP.h | 1 + drivers/misc/habanalabs/goya/goya_security.c | 5 + 9 files changed, 48 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index c5d678d39144..3979bfdaa080 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -182,3 +182,11 @@ KernelVersion: 5.6 Contact:oded.gab...@gmail.com Description:Sets the stop-on_error option for the device engines. Value of "0" is for disable, otherwise enable. + +What: /sys/kernel/debug/habanalabs/hl/dump_security_violations +Date: Jan 2021 +KernelVersion: 5.12 +Contact:oded.gab...@gmail.com +Description:Dumps all security violations to dmesg. This will also ack +all security violations meanings those violations will not be +dumped next time user calls this API diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 50ca8eea6648..323d0381a60a 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -867,6 +867,17 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf, return count; } +static ssize_t hl_security_violations_read(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + + hdev->asic_funcs->ack_protection_bits_errors(hdev); + + return 0; +} + static const struct file_operations hl_data32b_fops = { .owner = THIS_MODULE, .read = hl_data_read32, @@ -924,6 +935,11 @@ static const struct file_operations hl_stop_on_err_fops = { .write = hl_stop_on_err_write }; +static const struct file_operations hl_security_violations_fops = { + .owner = THIS_MODULE, + .read = hl_security_violations_read +}; + static const struct hl_info_list hl_debugfs_list[] = { {"command_buffers", command_buffers_show, NULL}, {"command_submission", command_submission_show, NULL}, @@ -1073,6 +1089,12 @@ void hl_debugfs_add_device(struct hl_device *hdev) dev_entry, &hl_stop_on_err_fops); + debugfs_create_file("dump_security_violations", + 0644, + dev_entry->root, + dev_entry, + &hl_security_violations_fops); + for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { ent = debugfs_create_file(hl_debugfs_list[i].name, diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index d3c73fc7baf7..454ef3bfe2e7 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -850,6 +850,7 @@ enum div_select_defs { * @collective_wait_create_jobs: allocate collective wait cs jobs * @scramble_vaddr: Routine to scramble the virtual address prior of mapping it * in the MMU. + * @ack_protection_bits_errors: ack and dump all security violations */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -960,6 +961,7 @@ struct hl_asic_funcs { struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id, u32 collective_engine_id); u64 (*scramble_vaddr)(struct hl_device *hdev, u64 virt_addr); + void (*ack_protection_bits_errors)(struct hl_device *hdev); }; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 126650e3a9ad..36e2cc22d108 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -8545,7 +8545,8 @@ static const struct hl_asic_funcs gaudi_funcs = { .get_device_time = gaudi_get_device_time, .collective_wait_init_cs = gaudi_collective_wait_init_cs, .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, -
[PATCH 1/4] habanalabs: Use 'dma_set_mask_and_coherent()'
From: Christophe JAILLET Axe 'hl_pci_set_dma_mask()' and replace it with an equivalent 'dma_set_mask_and_coherent()' call. This makes the code a bit less verbose. It also removes an erroneous comment, because 'hl_pci_set_dma_mask()' does not try to use a fall-back value. Signed-off-by: Christophe JAILLET Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/pci/pci.c | 43 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/drivers/misc/habanalabs/common/pci/pci.c b/drivers/misc/habanalabs/common/pci/pci.c index c56ec1574127..b799f9258fb0 100644 --- a/drivers/misc/habanalabs/common/pci/pci.c +++ b/drivers/misc/habanalabs/common/pci/pci.c @@ -307,40 +307,6 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, return rc; } -/** - * hl_pci_set_dma_mask() - Set DMA masks for the device. - * @hdev: Pointer to hl_device structure. - * - * This function sets the DMA masks (regular and consistent) for a specified - * value. If it doesn't succeed, it tries to set it to a fall-back value - * - * Return: 0 on success, non-zero for failure. - */ -static int hl_pci_set_dma_mask(struct hl_device *hdev) -{ - struct pci_dev *pdev = hdev->pdev; - int rc; - - /* set DMA mask */ - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask)); - if (rc) { - dev_err(hdev->dev, - "Failed to set pci dma mask to %d bits, error %d\n", - hdev->dma_mask, rc); - return rc; - } - - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask)); - if (rc) { - dev_err(hdev->dev, - "Failed to set pci consistent dma mask to %d bits, error %d\n", - hdev->dma_mask, rc); - return rc; - } - - return 0; -} - /** * hl_pci_init() - PCI initialization code. * @hdev: Pointer to hl_device structure. @@ -377,9 +343,14 @@ int hl_pci_init(struct hl_device *hdev) goto unmap_pci_bars; } - rc = hl_pci_set_dma_mask(hdev); - if (rc) + rc = dma_set_mask_and_coherent(&pdev->dev, + DMA_BIT_MASK(hdev->dma_mask)); + if (rc) { + dev_err(hdev->dev, + "Failed to set dma mask to %d bits, error %d\n", + hdev->dma_mask, rc); goto unmap_pci_bars; + } return 0; -- 2.25.1
[PATCH 3/3] habanalabs: prevent soft lockup during unmap
When using Deep learning framework such as tensorflow or pytorch, there are tens of thousands of host memory mappings. When the user frees all those mappings at the same time, the process of unmapping and unpinning them can take a long time, which may cause a soft lockup bug. To prevent this, we need to free the core to do other things during the unmapping process. For now, we chose to do it every 32K unmappings (each unmap is a single 4K page). Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 1 + drivers/misc/habanalabs/common/memory.c | 10 -- drivers/misc/habanalabs/common/mmu.c| 6 +++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index e0d7f5fbaa5c..60e16dc4bcac 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2182,6 +2182,7 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr); int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops); +bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr); int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, void __iomem *dst, u32 src_offset, u32 size); diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index cbe9da4e0211..5d4fbdcaefe3 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -886,8 +886,10 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, { struct hl_device *hdev = ctx->hdev; u64 next_vaddr, i; + bool is_host_addr; u32 page_size; + is_host_addr = !hl_is_dram_va(hdev, vaddr); page_size = phys_pg_pack->page_size; next_vaddr = vaddr; @@ -900,9 +902,13 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, /* * unmapping on Palladium can be really long, so avoid a CPU * soft lockup bug by sleeping a little between unmapping pages +* +* In addition, when unmapping host memory we pass through +* the Linux kernel to unpin the pages and that takes a long +* time. Therefore, sleep every 32K pages to avoid soft lockup */ - if (hdev->pldm) - usleep_range(500, 1000); + if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) + usleep_range(50, 200); } } diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c index 33ae953d3a36..28a4638741d8 100644 --- a/drivers/misc/habanalabs/common/mmu.c +++ b/drivers/misc/habanalabs/common/mmu.c @@ -9,7 +9,7 @@ #include "habanalabs.h" -static bool is_dram_va(struct hl_device *hdev, u64 virt_addr) +bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -156,7 +156,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, if (!hdev->mmu_enable) return 0; - is_dram_addr = is_dram_va(hdev, virt_addr); + is_dram_addr = hl_is_dram_va(hdev, virt_addr); if (is_dram_addr) mmu_prop = &prop->dmmu; @@ -236,7 +236,7 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, if (!hdev->mmu_enable) return 0; - is_dram_addr = is_dram_va(hdev, virt_addr); + is_dram_addr = hl_is_dram_va(hdev, virt_addr); if (is_dram_addr) mmu_prop = &prop->dmmu; -- 2.25.1
[PATCH 2/3] habanalabs: fix reset process in case of failures
There are some points in the reset process where if the code fails for some reason, and the system admin tries to initiate the reset process again we will get a kernel panic. This is because there aren't any protections in different fini functions that are called during the reset process. The protections that are added in this patch make sure that if the fini functions are called multiple times, without calling init functions between them, there won't be double release of already released resources. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 2 +- drivers/misc/habanalabs/common/mmu_v1.c | 12 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 1456eabf9601..1ea57d86caa3 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1037,7 +1037,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, if (hard_reset) { /* Release kernel context */ - if (hl_ctx_put(hdev->kernel_ctx) == 1) + if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1) hdev->kernel_ctx = NULL; hl_vm_fini(hdev); hl_mmu_fini(hdev); diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c index 2ce6ea89d4fa..06d8a44dd5d4 100644 --- a/drivers/misc/habanalabs/common/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu_v1.c @@ -467,8 +467,16 @@ static void hl_mmu_v1_fini(struct hl_device *hdev) { /* MMU H/W fini was already done in device hw_fini() */ - kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); - gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); + if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) { + kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); + gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); + } + + /* Make sure that if we arrive here again without init was called we +* won't cause kernel panic. This can happen for example if we fail +* during hard reset code at certain points +*/ + hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL; } /** -- 2.25.1
[PATCH 1/3] habanalabs: fix dma_addr passed to dma_mmap_coherent
When doing dma_alloc_coherent in the driver, we add a certain hard-coded offset to the DMA address before returning to the callee function. This offset is needed when our device use this DMA address to perform outbound transactions to the host. However, if we want to map the DMA'able memory to the user via dma_mmap_coherent(), we need to pass the original dma address, without this offset. Otherwise, we will get erronouos mapping. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 3 ++- drivers/misc/habanalabs/goya/goya.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 8c09e4466af8..b328ddaa64ee 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -4002,7 +4002,8 @@ static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; - rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); + rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, + (dma_addr - HOST_PHYS_BASE), size); if (rc) dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index b8b4aa636b7c..63679a747d2c 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2719,7 +2719,8 @@ static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; - rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); + rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, + (dma_addr - HOST_PHYS_BASE), size); if (rc) dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); -- 2.25.1
Re: [PATCH] habanalabs: Use 'dma_set_mask_and_coherent()' instead of hand-writing it
On Tue, Dec 29, 2020 at 1:17 PM Christophe JAILLET wrote: > > Axe 'hl_pci_set_dma_mask()' and replace it with an equivalent > 'dma_set_mask_and_coherent()' call. > > This makes the code a bit less verbose. > > It also removes an erroneous comment, because 'hl_pci_set_dma_mask()' does > not try to use a fall-back value. > > Signed-off-by: Christophe JAILLET > --- > drivers/misc/habanalabs/common/pci.c | 42 > 1 file changed, 6 insertions(+), 36 deletions(-) > > diff --git a/drivers/misc/habanalabs/common/pci.c > b/drivers/misc/habanalabs/common/pci.c > index 923b2606e29f..eb8a784ba863 100644 > --- a/drivers/misc/habanalabs/common/pci.c > +++ b/drivers/misc/habanalabs/common/pci.c > @@ -301,40 +301,6 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, > return rc; > } > > -/** > - * hl_pci_set_dma_mask() - Set DMA masks for the device. > - * @hdev: Pointer to hl_device structure. > - * > - * This function sets the DMA masks (regular and consistent) for a specified > - * value. If it doesn't succeed, it tries to set it to a fall-back value > - * > - * Return: 0 on success, non-zero for failure. > - */ > -static int hl_pci_set_dma_mask(struct hl_device *hdev) > -{ > - struct pci_dev *pdev = hdev->pdev; > - int rc; > - > - /* set DMA mask */ > - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask)); > - if (rc) { > - dev_err(hdev->dev, > - "Failed to set pci dma mask to %d bits, error %d\n", > - hdev->dma_mask, rc); > - return rc; > - } > - > - rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask)); > - if (rc) { > - dev_err(hdev->dev, > - "Failed to set pci consistent dma mask to %d bits, > error %d\n", > - hdev->dma_mask, rc); > - return rc; > - } > - > - return 0; > -} > - > /** > * hl_pci_init() - PCI initialization code. > * @hdev: Pointer to hl_device structure. > @@ -371,9 +337,13 @@ int hl_pci_init(struct hl_device *hdev) > goto unmap_pci_bars; > } > > - rc = hl_pci_set_dma_mask(hdev); > - if (rc) > + rc = dma_set_mask_and_coherent(&pdev->dev, > DMA_BIT_MASK(hdev->dma_mask)); > + if (rc) { > + dev_err(hdev->dev, > + "Failed to set dma mask to %d bits, error %d\n", > + hdev->dma_mask, rc); > goto unmap_pci_bars; > + } > > return 0; > > -- > 2.27.0 > Reviewed-by: Oded Gabbay Applied to -next Oded
[PATCH 2/4] habanalabs: increment ctx ref from within a cs allocation
From: Ofir Bitton A CS must increment the relevant context reference count. We want to increment the reference inside the CS allocation function as opposed for today where we increment it outside. This is logical since we want to avoid explicitly incrementing the context every time we call the CS allocate function. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/command_submission.c | 15 +-- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index f7fac82ac41d..3affb350070c 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -479,6 +479,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, return -ENOMEM; } + /* increment refcnt for context */ + hl_ctx_get(hdev, ctx); + cs->ctx = ctx; cs->submitted = false; cs->completed = false; @@ -550,6 +553,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, kfree(cs_cmpl); free_cs: kfree(cs); + hl_ctx_put(ctx); return rc; } @@ -827,14 +831,9 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, if (rc) goto out; - /* increment refcnt for context */ - hl_ctx_get(hdev, hpriv->ctx); - rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs); - if (rc) { - hl_ctx_put(hpriv->ctx); + if (rc) goto free_cs_chunk_array; - } cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); *cs_seq = cs->sequence; @@ -1276,15 +1275,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, } } - /* increment refcnt for context */ - hl_ctx_get(hdev, ctx); - rc = allocate_cs(hdev, ctx, cs_type, &cs); if (rc) { if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) hl_fence_put(sig_fence); - hl_ctx_put(ctx); goto free_cs_chunk_array; } -- 2.25.1
[PATCH 3/4] habanalabs: add driver support for internal cb scheduling
From: Ofir Bitton In order to support scnenarios in which driver needs access to HW components but it cannot access them directly, we add support for scheduling command buffers internally. These command buffers will be transmitted upon next user command submission context. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c| 141 ++ drivers/misc/habanalabs/common/context.c | 3 + drivers/misc/habanalabs/common/device.c | 2 + drivers/misc/habanalabs/common/habanalabs.h | 26 4 files changed, 172 insertions(+) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 3affb350070c..2dd69d6d4782 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -585,6 +585,18 @@ void hl_cs_rollback_all(struct hl_device *hdev) } } +void hl_pending_cb_rollback_all(struct hl_device *hdev) +{ + struct hl_pending_cb *pending_cb, *tmp; + + list_for_each_entry_safe(pending_cb, tmp, + &hdev->kernel_ctx->pending_cb_list, cb_node) { + list_del(&pending_cb->cb_node); + hl_cb_put(pending_cb->cb); + kfree(pending_cb); + } +} + static void job_wq_completion(struct work_struct *work) { struct hl_cs_job *job = container_of(work, struct hl_cs_job, @@ -954,6 +966,131 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, return rc; } +static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx, + struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id) +{ + struct hw_queue_properties *hw_queue_prop; + struct hl_cs_counters_atomic *cntr; + struct hl_cs_job *job; + + hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id]; + cntr = &hdev->aggregated_cs_counters; + + job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); + if (!job) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); + dev_err(hdev->dev, "Failed to allocate a new job\n"); + return -ENOMEM; + } + + job->id = 0; + job->cs = cs; + job->user_cb = cb; + atomic_inc(&job->user_cb->cs_cnt); + job->user_cb_size = size; + job->hw_queue_id = hw_queue_id; + job->patched_cb = job->user_cb; + job->job_cb_size = job->user_cb_size; + + /* increment refcount as for external queues we get completion */ + cs_get(cs); + + cs->jobs_in_queue_cnt[job->hw_queue_id]++; + + list_add_tail(&job->cs_node, &cs->job_list); + + hl_debugfs_add_job(hdev, job); + + return 0; +} + +static int hl_submit_pending_cb(struct hl_fpriv *hpriv) +{ + struct hw_queue_properties *hw_queue_prop; + struct hl_device *hdev = hpriv->hdev; + struct hl_ctx *ctx = hpriv->ctx; + struct hl_pending_cb *pending_cb, *tmp; + struct list_head local_cb_list; + struct hl_cs *cs; + struct hl_cb *cb; + u32 hw_queue_id; + u32 cb_size; + int process_list, rc = 0; + + if (list_empty(&ctx->pending_cb_list)) + return 0; + + process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0); + + /* Only a single thread is allowed to process the list */ + if (!process_list) + return 0; + + if (list_empty(&ctx->pending_cb_list)) + goto free_pending_cb_token; + + /* move all list elements to a local list */ + INIT_LIST_HEAD(&local_cb_list); + spin_lock(&ctx->pending_cb_lock); + list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list, + cb_node) + list_move_tail(&pending_cb->cb_node, &local_cb_list); + spin_unlock(&ctx->pending_cb_lock); + + rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, &cs); + if (rc) + goto add_list_elements; + + hl_debugfs_add_cs(cs); + + /* Iterate through pending cb list, create jobs and add to CS */ + list_for_each_entry(pending_cb, &local_cb_list, cb_node) { + cb = pending_cb->cb; + cb_size = pending_cb->cb_size; + hw_queue_id = pending_cb->hw_queue_id; + hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id]; + + rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size, + hw_queue_id); +