[PATCH 3/3] habanalabs: ignore f/w status error

2021-04-17 Thread Oded Gabbay
In case firmware has a bug and erroneously reports a status error
(e.g. device unusable) during boot, allow the user to tell the driver
to continue the boot regardless of the error status.

This will be done via kernel parameter which exposes a mask. The
user that loads the driver can decide exactly which status error to
ignore and which to take into account. The bitmask is according to
defines in hl_boot_if.h

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c| 3 ++-
 drivers/misc/habanalabs/common/habanalabs.h | 7 +++
 drivers/misc/habanalabs/common/habanalabs_drv.c | 7 +++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 377a7ca886fe..0713b2c12d54 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -400,7 +400,8 @@ static int fw_read_errors(struct hl_device *hdev, u32 
boot_err0_reg,
err_exists = true;
}
 
-   if (err_exists)
+   if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
+   lower_32_bits(hdev->boot_error_status_mask)))
return -EIO;
 
return 0;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 91291a8e201e..6579f8767abd 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1962,6 +1962,12 @@ struct hl_mmu_funcs {
  * @clock_gating_mask: is clock gating enabled. bitmask that represents the
  * different engines. See debugfs-driver-habanalabs for
  * details.
+ * @boot_error_status_mask: contains a mask of the device boot error status.
+ *  Each bit represents a different error, according to
+ *  the defines in hl_boot_if.h. If the bit is cleared,
+ *  the error will be ignored by the driver during
+ *  device initialization. Mainly used to debug and
+ *  workaround firmware bugs
  * @in_reset: is device in reset flow.
  * @curr_pll_profile: current PLL profile.
  * @card_type: Various ASICs have several card types. This indicates the card
@@ -2077,6 +2083,7 @@ struct hl_device {
u64 timeout_jiffies;
u64 max_power;
u64 clock_gating_mask;
+   u64 boot_error_status_mask;
atomic_tin_reset;
enum hl_pll_frequency   curr_pll_profile;
enum cpucp_card_types   card_type;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c 
b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 7135f1e03864..64d1530db985 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -30,6 +30,7 @@ static DEFINE_MUTEX(hl_devs_idr_lock);
 static int timeout_locked = 30;
 static int reset_on_lockup = 1;
 static int memory_scrub = 1;
+static ulong boot_error_status_mask = ULONG_MAX;
 
 module_param(timeout_locked, int, 0444);
 MODULE_PARM_DESC(timeout_locked,
@@ -43,6 +44,10 @@ module_param(memory_scrub, int, 0444);
 MODULE_PARM_DESC(memory_scrub,
"Scrub device memory in various states (0 = no, 1 = yes, default yes)");
 
+module_param(boot_error_status_mask, ulong, 0444);
+MODULE_PARM_DESC(boot_error_status_mask,
+   "Mask of the error status during device CPU boot (If bitX is cleared 
then error X is masked. Default all 1's)");
+
 #define PCI_VENDOR_ID_HABANALABS   0x1da3
 
 #define PCI_IDS_GOYA   0x0001
@@ -319,6 +324,8 @@ int create_hdev(struct hl_device **dev, struct pci_dev 
*pdev,
hdev->major = hl_major;
hdev->reset_on_lockup = reset_on_lockup;
hdev->memory_scrub = memory_scrub;
+   hdev->boot_error_status_mask = boot_error_status_mask;
+
hdev->pldm = 0;
 
set_driver_behavior_per_device(hdev);
-- 
2.25.1



[PATCH 2/3] habanalabs: change error level of security not ready

2021-04-17 Thread Oded Gabbay
This error indicates a problem in the security initialization inside
the f/w so we need to stop the device loading because it won't be
usable.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index fff29f057b6d..377a7ca886fe 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -362,12 +362,9 @@ static int fw_read_errors(struct hl_device *hdev, u32 
boot_err0_reg,
}
 
if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
-   dev_warn(hdev->dev,
+   dev_err(hdev->dev,
"Device boot warning - security not ready\n");
-   /* This is a warning so we don't want it to disable the
-* device
-*/
-   err_val &= ~CPU_BOOT_ERR0_SECURITY_NOT_RDY;
+   err_exists = true;
}
 
if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
-- 
2.25.1



[PATCH 1/3] habanalabs: skip reading f/w errors on bad status

2021-04-17 Thread Oded Gabbay
If we read all FF from the boot status register, then something is
totally wrong and there is no point of reading specific errors.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 7cf82da67dab..fff29f057b6d 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -850,8 +850,13 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 
cpu_boot_status_reg,
if (rc) {
dev_err(hdev->dev, "Failed to read preboot version\n");
detect_cpu_boot_status(hdev, status);
-   fw_read_errors(hdev, boot_err0_reg,
-   cpu_security_boot_status_reg);
+
+   /* If we read all FF, then something is totally wrong, no point
+* of reading specific errors
+*/
+   if (status != -1)
+   fw_read_errors(hdev, boot_err0_reg,
+   cpu_security_boot_status_reg);
return -EIO;
}
 
-- 
2.25.1



Re: [PATCH 2/3] habanalabs: support legacy and new pll indexes

2021-04-17 Thread Oded Gabbay
Thanks Nathan,
I have already a pending patch that fixes this issue:
https://lkml.org/lkml/2021/4/17/73

On Thu, Apr 15, 2021 at 5:17 PM Nathan Chancellor  wrote:
>
> On Sun, Mar 21, 2021 at 10:11:29PM +0200, Oded Gabbay wrote:
> > From: Ohad Sharabi 
> >
> > In order to use minimum of hard coded values common to LKD and F/W
> > a dynamic method to work with PLLs is introduced in this patch.
> > Formerly asic specific PLL numbering is now common for all asics.
> > To be backward compatible a bit in dev status is defined, if the bit is
> > not set LKD will keep working with old PLL numbering.
> >
> > Signed-off-by: Ohad Sharabi 
> > Reviewed-by: Oded Gabbay 
> > Signed-off-by: Oded Gabbay 
> > ---
> >  drivers/misc/habanalabs/common/firmware_if.c  | 49 ++-
> >  drivers/misc/habanalabs/common/habanalabs.h   | 14 --
> >  drivers/misc/habanalabs/common/sysfs.c| 24 ++---
> >  drivers/misc/habanalabs/gaudi/gaudi.c | 33 +
> >  drivers/misc/habanalabs/goya/goya.c   | 26 ++
> >  .../misc/habanalabs/include/common/cpucp_if.h | 41 
> >  .../habanalabs/include/common/hl_boot_if.h|  6 +++
> >  .../habanalabs/include/gaudi/gaudi_fw_if.h| 14 --
> >  .../misc/habanalabs/include/goya/goya_fw_if.h | 11 -
> >  9 files changed, 182 insertions(+), 36 deletions(-)
> >
> > diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
> > b/drivers/misc/habanalabs/common/firmware_if.c
> > index 2a58edaf984a..092691a8917d 100644
> > --- a/drivers/misc/habanalabs/common/firmware_if.c
> > +++ b/drivers/misc/habanalabs/common/firmware_if.c
> > @@ -539,18 +539,63 @@ int hl_fw_cpucp_total_energy_get(struct hl_device 
> > *hdev, u64 *total_energy)
> >   return rc;
> >  }
> >
> > -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
> > +int get_used_pll_index(struct hl_device *hdev, enum pll_index 
> > input_pll_index,
> > + enum pll_index *pll_index)
> > +{
> > + struct asic_fixed_properties *prop = &hdev->asic_prop;
> > + u8 pll_byte, pll_bit_off;
> > + bool dynamic_pll;
> > +
> > + if (input_pll_index >= PLL_MAX) {
> > + dev_err(hdev->dev, "PLL index %d is out of range\n",
> > + input_pll_index);
> > + return -EINVAL;
> > + }
> > +
> > + dynamic_pll = prop->fw_security_status_valid &&
> > + (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN);
> > +
> > + if (!dynamic_pll) {
> > + /*
> > +  * in case we are working with legacy FW (each asic has unique
> > +  * PLL numbering) extract the legacy numbering
> > +  */
> > + *pll_index = hdev->legacy_pll_map[input_pll_index];
> > + return 0;
> > + }
> > +
> > + /* PLL map is a u8 array */
> > + pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3];
> > + pll_bit_off = input_pll_index & 0x7;
> > +
> > + if (!(pll_byte & BIT(pll_bit_off))) {
> > + dev_err(hdev->dev, "PLL index %d is not supported\n",
> > + input_pll_index);
> > + return -EINVAL;
> > + }
> > +
> > + *pll_index = input_pll_index;
> > +
> > + return 0;
> > +}
> > +
> > +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index 
> > pll_index,
> >   u16 *pll_freq_arr)
> >  {
> >   struct cpucp_packet pkt;
> > + enum pll_index used_pll_idx;
> >   u64 result;
> >   int rc;
> >
> > + rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
> > + if (rc)
> > + return rc;
> > +
> >   memset(&pkt, 0, sizeof(pkt));
> >
> >   pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
> >   CPUCP_PKT_CTL_OPCODE_SHIFT);
> > - pkt.pll_type = __cpu_to_le16(pll_index);
> > + pkt.pll_type = __cpu_to_le16((u16)used_pll_idx);
> >
> >   rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, 
> > sizeof(pkt),
> >   HL_CPUCP_INFO_TIMEOUT_USEC, &result);
> > diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
> > b/drivers/misc/habanalabs/common/habanalabs.h
> &

[PATCH 3/7] habanalabs: update firmware files to latest

2021-04-17 Thread Oded Gabbay
Update the firmware files to the latest from the firmware team.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/include/common/cpucp_if.h   | 4 +++-
 drivers/misc/habanalabs/include/common/hl_boot_if.h | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h 
b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 27cd0ba99aa3..bf10ca8d2457 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -107,7 +107,9 @@ enum pq_init_status {
PQ_INIT_STATUS_NA = 0,
PQ_INIT_STATUS_READY_FOR_CP,
PQ_INIT_STATUS_READY_FOR_HOST,
-   PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI
+   PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI,
+   PQ_INIT_STATUS_LEN_NOT_POWER_OF_TWO_ERR,
+   PQ_INIT_STATUS_ILLEGAL_Q_ADDR_ERR
 };
 
 /*
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h 
b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index e0a259e0495c..84c14688d69a 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -8,7 +8,7 @@
 #ifndef HL_BOOT_IF_H
 #define HL_BOOT_IF_H
 
-#define LKD_HARD_RESET_MAGIC   0xED7BD694
+#define LKD_HARD_RESET_MAGIC   0xED7BD694 /* deprecated - do not use */
 #define HL_POWER9_HOST_MAGIC   0x1DA30009
 
 #define BOOT_FIT_SRAM_OFFSET   0x20
-- 
2.25.1



[PATCH 2/7] habanalabs: increase ELBI reset timeout for PLDM

2021-04-17 Thread Oded Gabbay
From: Moti Haimovski 

On PLDM, in case of NIC hangs, the ELBI reset to take much longer than
expected. As a result an increase in the ELBI reset timeout is required.

Signed-off-by: Moti Haimovski 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/pci/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/pci/pci.c 
b/drivers/misc/habanalabs/common/pci/pci.c
index e941b7eef346..5d07ca53d9ce 100644
--- a/drivers/misc/habanalabs/common/pci/pci.c
+++ b/drivers/misc/habanalabs/common/pci/pci.c
@@ -10,7 +10,7 @@
 
 #include 
 
-#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC  (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
+#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC  (HL_PCI_ELBI_TIMEOUT_MSEC * 100)
 
 #define IATU_REGION_CTRL_REGION_EN_MASKBIT(31)
 #define IATU_REGION_CTRL_MATCH_MODE_MASK   BIT(30)
-- 
2.25.1



[PATCH 4/7] habanalabs: prepare preboot stage to dynamic f/w load

2021-04-17 Thread Oded Gabbay
From: Ohad Sharabi 

Start the skeleton for the dynamic F/W load by marking current preboot
code path as legacy.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c | 79 
 drivers/misc/habanalabs/common/habanalabs.h  |  8 +-
 2 files changed, 71 insertions(+), 16 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 7cf82da67dab..337c76fb5e3c 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -811,21 +811,15 @@ static void detect_cpu_boot_status(struct hl_device 
*hdev, u32 status)
}
 }
 
-int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
-   u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
-   u32 timeout)
+static int hl_fw_read_preboot_caps(struct hl_device *hdev,
+   u32 cpu_boot_status_reg,
+   u32 cpu_boot_caps_reg,
+   u32 boot_err0_reg, u32 timeout)
 {
struct asic_fixed_properties *prop = &hdev->asic_prop;
-   u32 status, security_status;
+   u32 status;
int rc;
 
-   /* pldm was added for cases in which we use preboot on pldm and want
-* to load boot fit, but we can't wait for preboot because it runs
-* very slowly
-*/
-   if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm)
-   return 0;
-
/* Need to check two possible scenarios:
 *
 * CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where
@@ -848,18 +842,41 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 
cpu_boot_status_reg,
timeout);
 
if (rc) {
-   dev_err(hdev->dev, "Failed to read preboot version\n");
+   dev_err(hdev->dev, "CPU boot ready status timeout\n");
detect_cpu_boot_status(hdev, status);
-   fw_read_errors(hdev, boot_err0_reg,
-   cpu_security_boot_status_reg);
+   fw_read_errors(hdev, boot_err0_reg, cpu_boot_status_reg);
return -EIO;
}
 
+   prop->fw_preboot_caps_map = RREG32(cpu_boot_caps_reg);
+
+   /*
+* For now- force dynamic_fw_load to false as LKD does not yet
+* implements all necessary parts of it.
+* TODO: once dynamic load is ready set to:
+* prop->dynamic_fw_load = !!(prop->fw_preboot_caps_map &
+*CPU_BOOT_DEV_STS0_FW_LD_COM_EN)
+*/
+   prop->dynamic_fw_load = 0;
+
+   dev_dbg(hdev->dev, "Attempting %s FW load\n",
+   prop->dynamic_fw_load ? "dynamic" : "legacy");
+   return 0;
+}
+
+static int hl_fw_read_preboot_status_legacy(struct hl_device *hdev,
+   u32 cpu_boot_status_reg, u32 cpu_security_boot_status_reg,
+   u32 boot_err0_reg, u32 timeout)
+{
+   struct asic_fixed_properties *prop = &hdev->asic_prop;
+   u32 security_status;
+   int rc;
+
rc = hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
if (rc)
return rc;
 
-   security_status = RREG32(cpu_security_boot_status_reg);
+   security_status = prop->fw_preboot_caps_map;
 
/* We read security status multiple times during boot:
 * 1. preboot - a. Check whether the security status bits are valid
@@ -901,6 +918,38 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 
cpu_boot_status_reg,
return 0;
 }
 
+int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
+   u32 cpu_boot_caps_reg, u32 boot_err0_reg,
+   u32 timeout)
+{
+   int rc;
+
+   /* pldm was added for cases in which we use preboot on pldm and want
+* to load boot fit, but we can't wait for preboot because it runs
+* very slowly
+*/
+   if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm)
+   return 0;
+
+   /*
+* In order to determine boot method (static VS dymanic) we need to
+* read the boot caps register
+*/
+   rc = hl_fw_read_preboot_caps(hdev, cpu_boot_status_reg,
+   cpu_boot_caps_reg, boot_err0_reg,
+   timeout);
+   if (rc)
+   return rc;
+
+   if (!hdev->asic_prop.dynamic_fw_load)
+   return hl_fw_read_preboot_status_legacy(hdev, 
cpu_boot_status_reg,
+   cpu_boot_caps_reg, boot_err0_reg,
+   timeout);
+
+   dev_err(hdev->dev, "Dynamic FW load is not supported\n")

[PATCH 7/7] habanalabs: use mmu cache range invalidation

2021-04-17 Thread Oded Gabbay
From: Alon Mizrahi 

Use mmu cache range invalidation instead of entire cache invalidation
because it yields better performance.

In GOYA and GAUDI, always use entire cache invalidation because these
ASICs don't support range invalidation.

Signed-off-by: Alon Mizrahi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/habanalabs.h |  2 +-
 drivers/misc/habanalabs/common/memory.c |  8 ++--
 drivers/misc/habanalabs/gaudi/gaudi.c   | 49 ++--
 drivers/misc/habanalabs/goya/goya.c | 51 ++---
 4 files changed, 16 insertions(+), 94 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 9ee7a583b614..07c2713ba372 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1035,7 +1035,7 @@ struct hl_asic_funcs {
int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
u32 flags);
int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
-   u32 asid, u64 va, u64 size);
+   u32 flags, u32 asid, u64 va, u64 size);
int (*send_heartbeat)(struct hl_device *hdev);
void (*set_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/common/memory.c 
b/drivers/misc/habanalabs/common/memory.c
index 2938cbbafbbc..b92878d76f23 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1117,7 +1117,8 @@ static int map_device_va(struct hl_ctx *ctx, struct 
hl_mem_in *args,
goto map_err;
}
 
-   rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
+   rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false,
+   *vm_type, ctx->asid, ret_vaddr, phys_pg_pack->total_size);
 
mutex_unlock(&ctx->mmu_lock);
 
@@ -1261,8 +1262,9 @@ static int unmap_device_va(struct hl_ctx *ctx, struct 
hl_mem_in *args,
 * at the loop end rather than for each iteration
 */
if (!ctx_free)
-   rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
-   *vm_type);
+   rc = hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true,
+   *vm_type, ctx->asid, vaddr,
+   phys_pg_pack->total_size);
 
mutex_unlock(&ctx->mmu_lock);
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 45c40549ded4..1d5930578e07 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7863,52 +7863,13 @@ static int gaudi_mmu_invalidate_cache(struct hl_device 
*hdev, bool is_hard,
 }
 
 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
-   bool is_hard, u32 asid, u64 va, u64 size)
+   bool is_hard, u32 flags,
+   u32 asid, u64 va, u64 size)
 {
-   struct gaudi_device *gaudi = hdev->asic_specific;
-   u32 status, timeout_usec;
-   u32 inv_data;
-   u32 pi;
-   int rc;
-
-   if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
-   hdev->hard_reset_pending)
-   return 0;
-
-   if (hdev->pldm)
-   timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
-   else
-   timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
-
-   /*
-* TODO: currently invalidate entire L0 & L1 as in regular hard
-* invalidation. Need to apply invalidation of specific cache
-* lines with mask of ASID & VA & size.
-* Note that L1 with be flushed entirely in any case.
+   /* Treat as invalidate all because there is no range invalidation
+* in Gaudi
 */
-
-   /* L0 & L1 invalidation */
-   inv_data = RREG32(mmSTLB_CACHE_INV);
-   /* PI is 8 bit */
-   pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
-   WREG32(mmSTLB_CACHE_INV,
-   (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
-
-   rc = hl_poll_timeout(
-   hdev,
-   mmSTLB_INV_CONSUMER_INDEX,
-   status,
-   status == pi,
-   1000,
-   timeout_usec);
-
-   if (rc) {
-   dev_err_ratelimited(hdev->dev,
-   "MMU cache invalidation timeout\n");
-   hl_device_reset(hdev, HL_RESET_HARD);
-   }
-
-   return rc;
+   return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
 }
 
 static int gaudi_mmu_update_a

[PATCH 5/7] habanalabs: request f/w in separate function

2021-04-17 Thread Oded Gabbay
From: Ohad Sharabi 

This refactor is needed due to the dynamic FW load in which requesting
the FW file (and getting its attributes) is not immediately followed by
copying FW file content.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c | 65 +---
 1 file changed, 44 insertions(+), 21 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 337c76fb5e3c..0a9e3cb86552 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -12,6 +12,47 @@
 #include 
 
 #define FW_FILE_MAX_SIZE   0x140 /* maximum size of 20MB */
+
+static int hl_request_fw(struct hl_device *hdev,
+   const struct firmware **firmware_p,
+   const char *fw_name)
+{
+   size_t fw_size;
+   int rc;
+
+   rc = request_firmware(firmware_p, fw_name, hdev->dev);
+   if (rc) {
+   dev_err(hdev->dev, "Firmware file %s is not found! (error 
%d)\n",
+   fw_name, rc);
+   goto out;
+   }
+
+   fw_size = (*firmware_p)->size;
+   if ((fw_size % 4) != 0) {
+   dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
+   fw_name, fw_size);
+   rc = -EINVAL;
+   goto release_fw;
+   }
+
+   dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
+
+   if (fw_size > FW_FILE_MAX_SIZE) {
+   dev_err(hdev->dev,
+   "FW file size %zu exceeds maximum of %u bytes\n",
+   fw_size, FW_FILE_MAX_SIZE);
+   rc = -EINVAL;
+   goto release_fw;
+   }
+
+   return 0;
+
+release_fw:
+   release_firmware(*firmware_p);
+out:
+   return rc;
+}
+
 /**
  * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
  *
@@ -33,29 +74,11 @@ int hl_fw_load_fw_to_device(struct hl_device *hdev, const 
char *fw_name,
size_t fw_size;
int rc;
 
-   rc = request_firmware(&fw, fw_name, hdev->dev);
-   if (rc) {
-   dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
-   goto out;
-   }
+   rc = hl_request_fw(hdev, &fw, fw_name);
+   if (rc)
+   return rc;
 
fw_size = fw->size;
-   if ((fw_size % 4) != 0) {
-   dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
-   fw_name, fw_size);
-   rc = -EINVAL;
-   goto out;
-   }
-
-   dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
-
-   if (fw_size > FW_FILE_MAX_SIZE) {
-   dev_err(hdev->dev,
-   "FW file size %zu exceeds maximum of %u bytes\n",
-   fw_size, FW_FILE_MAX_SIZE);
-   rc = -EINVAL;
-   goto out;
-   }
 
if (size - src_offset > fw_size) {
dev_err(hdev->dev,
-- 
2.25.1



[PATCH 6/7] habanalabs: refactor init device cpu code

2021-04-17 Thread Oded Gabbay
From: Ohad Sharabi 

Replace multiple arguments to init device CPU function by passing
firmware loader managing structure that is initialized per ASIC with
the loader parameters.

In addition, the FW loader management structure is now part of the
habanalabs device, this way the loader parameters will be able to be
communicated across various boot stages.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c | 30 --
 drivers/misc/habanalabs/common/habanalabs.h  | 32 +---
 drivers/misc/habanalabs/gaudi/gaudi.c| 24 ++-
 drivers/misc/habanalabs/goya/goya.c  | 24 ++-
 4 files changed, 82 insertions(+), 28 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 0a9e3cb86552..a97d9c264adc 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -973,18 +973,26 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 
cpu_boot_status_reg,
return -EINVAL;
 }
 
-int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
-   u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
-   u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
-   bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
+int hl_fw_init_cpu(struct hl_device *hdev)
 {
+   u32 cpu_msg_status_reg, cpu_timeout, msg_to_cpu_reg, status;
+   u32 cpu_boot_status_reg, cpu_security_boot_status_reg;
struct asic_fixed_properties *prop = &hdev->asic_prop;
-   u32 status;
+   struct fw_load_mgr *fw_loader;
int rc;
 
if (!(hdev->fw_components & FW_TYPE_BOOT_CPU))
return 0;
 
+   /* init loader parameters */
+   hdev->asic_funcs->init_firmware_loader(hdev);
+   fw_loader = &hdev->fw_loader;
+   cpu_security_boot_status_reg = fw_loader->cpu_boot_status_reg;
+   cpu_msg_status_reg = fw_loader->cpu_cmd_status_to_host_reg;
+   cpu_boot_status_reg = fw_loader->cpu_boot_status_reg;
+   msg_to_cpu_reg = fw_loader->kmd_msg_to_cpu_reg;
+   cpu_timeout = fw_loader->cpu_timeout;
+
dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
cpu_timeout / USEC_PER_SEC);
 
@@ -995,7 +1003,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 
cpu_boot_status_reg,
status,
status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
1,
-   boot_fit_timeout);
+   fw_loader->boot_fit_timeout);
 
if (rc) {
dev_dbg(hdev->dev,
@@ -1018,7 +1026,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 
cpu_boot_status_reg,
status,
status == CPU_MSG_OK,
1,
-   boot_fit_timeout);
+   fw_loader->boot_fit_timeout);
 
if (rc) {
dev_err(hdev->dev,
@@ -1088,7 +1096,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 
cpu_boot_status_reg,
if (rc)
goto out;
 
-   if (skip_bmc) {
+   if (fw_loader->skip_bmc) {
WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
 
rc = hl_poll_timeout(
@@ -1135,7 +1143,8 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 
cpu_boot_status_reg,
goto out;
}
 
-   rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
+   rc = fw_read_errors(hdev, fw_loader->boot_err0_reg,
+   cpu_security_boot_status_reg);
if (rc)
return rc;
 
@@ -1164,7 +1173,8 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 
cpu_boot_status_reg,
return 0;
 
 out:
-   fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
+   fw_read_errors(hdev, fw_loader->boot_err0_reg,
+   cpu_security_boot_status_reg);
 
return rc;
 }
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 6441a270e10a..9ee7a583b614 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -818,6 +818,28 @@ enum div_select_defs {
DIV_SEL_DIVIDED_PLL = 3,
 };
 
+/**
+ * struct fw_load_mgr - manager FW loading process
+ * @kmd_msg_to_cpu_reg: register address for KMD->CPU messages
+ * @cpu_cmd_status_to_host_reg: register address for CPU command status 
response
+ * @cpu_boot_status_reg: boot status register
+ * @cpu_boot_dev_status_reg: boot device status register
+ * @boot_err0_reg: boot error register
+ * @cpu_timeout: CPU response timeout in usec
+ * @boot_fit_timeout: Boot fit load timeout in usec
+ * @skip_bmc: 

[PATCH 1/7] habanalabs: expose ASIC specific PLL index

2021-04-17 Thread Oded Gabbay
From: Bharat Jauhari 

Currently the user cannot interpret the PLL information based on index
as its exposed as an integer.

This commit exposes ASIC specific PLL indexes and maps it to a generic
FW compatible index.

Signed-off-by: Bharat Jauhari 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c | 34 +++-
 drivers/misc/habanalabs/common/habanalabs.h  | 16 +++---
 drivers/misc/habanalabs/common/sysfs.c   |  4 +-
 drivers/misc/habanalabs/gaudi/gaudi.c| 55 +++-
 drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c  | 12 ++---
 drivers/misc/habanalabs/goya/goya.c  | 47 +++--
 drivers/misc/habanalabs/goya/goya_hwmgr.c| 40 +++---
 include/uapi/misc/habanalabs.h   | 33 
 8 files changed, 127 insertions(+), 114 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 832dd5c5bb06..7cf82da67dab 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -661,18 +661,13 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, 
u64 *total_energy)
return rc;
 }
 
-int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index,
+int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
enum pll_index *pll_index)
 {
struct asic_fixed_properties *prop = &hdev->asic_prop;
u8 pll_byte, pll_bit_off;
bool dynamic_pll;
-
-   if (input_pll_index >= PLL_MAX) {
-   dev_err(hdev->dev, "PLL index %d is out of range\n",
-   input_pll_index);
-   return -EINVAL;
-   }
+   int fw_pll_idx;
 
dynamic_pll = prop->fw_security_status_valid &&
(prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN);
@@ -680,28 +675,39 @@ int get_used_pll_index(struct hl_device *hdev, enum 
pll_index input_pll_index,
if (!dynamic_pll) {
/*
 * in case we are working with legacy FW (each asic has unique
-* PLL numbering) extract the legacy numbering
+* PLL numbering) use the driver based index as they are
+* aligned with fw legacy numbering
 */
-   *pll_index = hdev->legacy_pll_map[input_pll_index];
+   *pll_index = input_pll_index;
return 0;
}
 
+   /* retrieve a FW compatible PLL index based on
+* ASIC specific user request
+*/
+   fw_pll_idx = hdev->asic_funcs->map_pll_idx_to_fw_idx(input_pll_index);
+   if (fw_pll_idx < 0) {
+   dev_err(hdev->dev, "Invalid PLL index (%u) error %d\n",
+   input_pll_index, fw_pll_idx);
+   return -EINVAL;
+   }
+
/* PLL map is a u8 array */
-   pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3];
-   pll_bit_off = input_pll_index & 0x7;
+   pll_byte = prop->cpucp_info.pll_map[fw_pll_idx >> 3];
+   pll_bit_off = fw_pll_idx & 0x7;
 
if (!(pll_byte & BIT(pll_bit_off))) {
dev_err(hdev->dev, "PLL index %d is not supported\n",
-   input_pll_index);
+   fw_pll_idx);
return -EINVAL;
}
 
-   *pll_index = input_pll_index;
+   *pll_index = fw_pll_idx;
 
return 0;
 }
 
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index,
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
u16 *pll_freq_arr)
 {
struct cpucp_packet pkt;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 44e89da30b4a..91291a8e201e 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -930,6 +930,9 @@ enum div_select_defs {
  * driver is ready to receive asynchronous events. This
  * function should be called during the first init and
  * after every hard-reset of the device
+ * @get_msi_info: Retrieve asic-specific MSI ID of the f/w async event
+ * @map_pll_idx_to_fw_idx: convert driver specific per asic PLL index to
+ * generic f/w compatible PLL Indexes
  */
 struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -1054,6 +1057,7 @@ struct hl_asic_funcs {
u32 block_id, u32 block_size);
void (*enable_events_from_fw)(struct hl_device *hdev);
void (*get_msi_info)(u32 *table);
+   int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
 };
 
 
@@ -1950,8 +1954,6 @@ struct hl_mmu_

Re: [PATCH][next] habanalabs/gaudi: Fix uninitialized return code rc when read size is zero

2021-04-15 Thread Oded Gabbay
On Mon, Apr 12, 2021 at 9:41 PM Arnd Bergmann  wrote:
>
> On Mon, Apr 12, 2021 at 6:11 PM Colin King  wrote:
> >
> > From: Colin Ian King 
> >
> > In the case where size is zero the while loop never assigns rc and the
> > return value is uninitialized. Fix this by initializing rc to zero.
> >
> > Addresses-Coverity: ("Uninitialized scalar variable")
> > Fixes: 639781dcab82 ("habanalabs/gaudi: add debugfs to DMA from the device")
> > Signed-off-by: Colin Ian King 
> > ---
> >  drivers/misc/habanalabs/gaudi/gaudi.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
> > b/drivers/misc/habanalabs/gaudi/gaudi.c
> > index 8730b691ec61..b751652f80a8 100644
> > --- a/drivers/misc/habanalabs/gaudi/gaudi.c
> > +++ b/drivers/misc/habanalabs/gaudi/gaudi.c
> > @@ -6252,7 +6252,7 @@ static int gaudi_debugfs_read_dma(struct hl_device 
> > *hdev, u64 addr, u32 size,
> > dma_addr_t dma_addr;
> > void *kernel_addr;
> > bool is_eng_idle;
> > -   int rc, dma_id;
> > +   int rc = 0, dma_id;
> >
> > kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
> > hdev, SZ_2M,
>
>
> In general, I don't like adding initializations during the declaration as that
> tends to hide warnings for the cases where a later initialization is
> missing. In this case it looks correct though.
>
> Acked-by: Arnd Bergmann 

I don't mind taking this patch for eliminating the warning but fyi,
the caller function (hl_dma_size_write) checks that the size is not
zero. If the size is zero, we never reach this function.

Greg, do you mind applying it directly to your -next branch ? I don't
have anything pending and I'm too lazy sending a pull request on a
single patch ;)

Reviewed-by: Oded Gabbay 

Thanks,
Oded


[git pull] habanalabs pull request for kernel 5.13

2021-04-10 Thread Oded Gabbay
Hi Greg,

This is habanalabs pull request for the merge window of kernel 5.13.
It contains changes and new features, support for new firmware.
Details are in the tag.

Thanks,
Oded

The following changes since commit b195b20b7145bcae22ad261abc52d68336f5e913:

  Merge tag 'extcon-next-for-5.13' of 
git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/extcon into 
char-misc-next (2021-04-08 08:45:30 +0200)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git 
tags/misc-habanalabs-next-2021-04-10

for you to fetch changes up to b575a7673e3d0396992fc72fce850723d39264e3:

  habanalabs: print f/w boot unknown error (2021-04-09 14:10:32 +0300)


This tag contains habanalabs driver changes for v5.13:

- Add support to reset device after the user closes the file descriptor.
  Because we support a single user, we can reset the device (if needs to)
  after a user closes its file descriptor to make sure the device is in
  idle and clean state for the next user.

- Add a new feature to allow the user to wait on interrupt. This is needed
  for future ASICs

- Replace GFP_ATOMIC with GFP_KERNEL wherever possible and add code to
  support failure of allocating with GFP_ATOMIC.

- Update code to support the latest firmware image:
  - More security features are done in the firmware
  - Remove hard-coded assumptions and replace them with values that are
sent to the firmware on loading.
  - Print device unusable error
  - Reset device in case the communication between driver and firmware
gets out of sync.
  - Support new PCI device ids for secured GAUDI.

- Expose current power draw through the INFO IOCTL.

- Support resetting the device upon a request from the BMC (through F/W).

- Always use only a single MSI in GAUDI, due to H/W limitation.

- Improve data-path code by taking out code from spinlock protection.

- Allow user to specify custom timeout per Command Submission.

- Some enhancements to debugfs.

- Various minor changes and improvements.


Alon Mizrahi (1):
  habanalabs: add custom timeout flag per cs

Bharat Jauhari (1):
  habanalabs: move dram scrub to free sequence

Koby Elbaz (2):
  habanalabs: improve utilization calculation
  habanalabs: support DEVICE_UNUSABLE error indication from FW

Oded Gabbay (11):
  habanalabs: reset after device is actually released
  habanalabs: fail reset if device is not idle
  habanalabs: reset_upon_device_release is for bring-up
  habanalabs: print if device is used on FD close
  habanalabs: change default CS timeout to 30 seconds
  habanalabs: use correct define for 32-bit max value
  habanalabs/gaudi: always use single-msi mode
  habanalabs/gaudi: add debugfs to DMA from the device
  habanalabs: remove the store jobs array from CS IOCTL
  habanalabs: use strscpy instead of sprintf and strlcpy
  habanalabs: print f/w boot unknown error

Ofir Bitton (13):
  habanalabs: add reset support when user closes FD
  habanalabs: enable all IRQs for user interrupt support
  habanalabs: wait for interrupt support
  habanalabs: use a single FW loading bringup flag
  habanalabs/gaudi: update extended async event header
  habanalabs: replace GFP_ATOMIC with GFP_KERNEL
  habanalabs: debugfs access to user mapped host addresses
  habanalabs/gaudi: reset device upon BMC request
  habanalabs/gaudi: unsecure TPC cfg status registers
  habanalabs/gaudi: Update async events header
  habanalabs: move relevant datapath work outside cs lock
  habanalabs/gaudi: derive security status from pci id
  habanalabs/gaudi: skip iATU if F/W security is enabled

Ohad Sharabi (6):
  habanalabs: reset device in case of sync error
  habanalabs: skip DISABLE PCI packet to FW on heartbeat
  habanalabs: update hl_boot_if.h
  habanalabs: support legacy and new pll indexes
  habanalabs: send dynamic msi-x indexes to f/w
  habanalabs: update to latest F/W communication header

Sagiv Ozeri (2):
  habanalabs: support HW blocks vm show
  habanalabs: return current power via INFO IOCTL

Tomer Tayar (1):
  habanalabs/gaudi: clear QM errors only if not in stop_on_err mode

Yang Li (1):
  habanalabs: Switch to using the new API kobj_to_dev()

farah kassabri (3):
  habanalabs: set max asid to 2
  habanalabs: avoid soft lockup bug upon mapping error
  habanalabs/gaudi: sync stream add protection to SOB reset flow

 .../ABI/testing/debugfs-driver-habanalabs  |  70 +++-
 drivers/misc/habanalabs/common/command_buffer.c|  12 +-
 .../misc/habanalabs/common/command_submission.c| 368 +
 drivers/misc/habanalabs/common/context.c   |  14 +-
 drivers/misc/habanalabs/common/debugfs.c   | 224 +++--
 drivers/misc/habanal

[PATCH 7/7] habanalabs: update to latest F/W communication header

2021-04-08 Thread Oded Gabbay
From: Ohad Sharabi 

update files to latest version from F/W team.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../misc/habanalabs/include/common/cpucp_if.h |   3 +-
 .../habanalabs/include/common/hl_boot_if.h| 198 ++
 2 files changed, 200 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h 
b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 20a710f7a369..27cd0ba99aa3 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -11,6 +11,8 @@
 #include 
 #include 
 
+#include "hl_boot_if.h"
+
 #define NUM_HBM_PSEUDO_CH  2
 #define NUM_HBM_CH_PER_DEV 8
 #define CPUCP_PKT_HBM_ECC_INFO_WR_PAR_SHIFT0
@@ -564,7 +566,6 @@ struct eq_generic_event {
  */
 
 #define CARD_NAME_MAX_LEN  16
-#define VERSION_MAX_LEN128
 #define CPUCP_MAX_SENSORS  128
 #define CPUCP_MAX_NICS 128
 #define CPUCP_LANES_PER_NIC4
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h 
b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 980b432fd76e..e0a259e0495c 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -13,6 +13,8 @@
 
 #define BOOT_FIT_SRAM_OFFSET   0x20
 
+#define VERSION_MAX_LEN128
+
 /*
  * CPU error bits in BOOT_ERROR registers
  *
@@ -251,6 +253,7 @@ enum kmd_msg {
KMD_MSG_SKIP_BMC,
RESERVED,
KMD_MSG_RST_DEV,
+   KMD_MSG_LAST
 };
 
 enum cpu_msg_status {
@@ -259,4 +262,199 @@ enum cpu_msg_status {
CPU_MSG_ERR,
 };
 
+/* communication registers mapping - consider ABI when changing */
+struct cpu_dyn_regs {
+   uint32_t cpu_pq_base_addr_low;
+   uint32_t cpu_pq_base_addr_high;
+   uint32_t cpu_pq_length;
+   uint32_t cpu_pq_init_status;
+   uint32_t cpu_eq_base_addr_low;
+   uint32_t cpu_eq_base_addr_high;
+   uint32_t cpu_eq_length;
+   uint32_t cpu_eq_ci;
+   uint32_t cpu_cq_base_addr_low;
+   uint32_t cpu_cq_base_addr_high;
+   uint32_t cpu_cq_length;
+   uint32_t cpu_pf_pq_pi;
+   uint32_t cpu_boot_dev_sts0;
+   uint32_t cpu_boot_dev_sts1;
+   uint32_t cpu_boot_err0;
+   uint32_t cpu_boot_err1;
+   uint32_t cpu_boot_status;
+   uint32_t fw_upd_sts;
+   uint32_t fw_upd_cmd;
+   uint32_t fw_upd_pending_sts;
+   uint32_t fuse_ver_offset;
+   uint32_t preboot_ver_offset;
+   uint32_t uboot_ver_offset;
+   uint32_t hw_state;
+   uint32_t kmd_msg_to_cpu;
+   uint32_t cpu_cmd_status_to_host;
+   uint32_t reserved1[32]; /* reserve for future use */
+};
+
+/* HCDM - Habana Communications Descriptor Magic */
+#define HL_COMMS_DESC_MAGIC0x4843444D
+#define HL_COMMS_DESC_VER  1
+
+/* this is the comms descriptor header - meta data */
+struct comms_desc_header {
+   uint32_t magic; /* magic for validation */
+   uint32_t crc32; /* CRC32 of the descriptor w/o header */
+   uint16_t size;  /* size of the descriptor w/o header */
+   uint8_t version;/* descriptor version */
+   uint8_t reserved[5];/* pad to 64 bit */
+};
+
+/* this is the main FW descriptor - consider ABI when changing */
+struct lkd_fw_comms_desc {
+   struct comms_desc_header header;
+   struct cpu_dyn_regs cpu_dyn_regs;
+   char fuse_ver[VERSION_MAX_LEN];
+   char cur_fw_ver[VERSION_MAX_LEN];
+   /* can be used for 1 more version w/o ABI change */
+   char reserved0[VERSION_MAX_LEN];
+   uint64_t img_addr;  /* address for next FW component load */
+};
+
+/*
+ * LKD commands:
+ *
+ * COMMS_NOOP  Used to clear the command register and no actual
+ * command is send.
+ *
+ * COMMS_CLR_STS   Clear status command - FW should clear the
+ * status register. Used for synchronization
+ * between the commands as part of the race free
+ * protocol.
+ *
+ * COMMS_RST_STATE Reset the current communication state which is
+ * kept by FW for proper responses.
+ * Should be used in the beginning of the
+ * communication cycle to clean any leftovers from
+ * previous communication attempts.
+ *
+ * COMMS_PREP_DESC Prepare descriptor for setting up the
+ * communication and other dynamic data:
+ * struct lkd_fw_comms_desc.
+ * This command has a parameter stating the next FW
+ * component size, so the FW can actually

[PATCH 6/7] habanalabs: print on f/w boot unknown error

2021-04-08 Thread Oded Gabbay
We need to print a message to the kernel log in case we encounter
an unknown error in the f/w boot to help the user understand what
happened.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 652571d3b8e6..4c096b6132b5 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -351,8 +351,12 @@ static int fw_read_errors(struct hl_device *hdev, u32 
boot_err0_reg,
dev_dbg(hdev->dev, "Device security status %#x\n",
security_val);
 
-   if (err_val & ~CPU_BOOT_ERR0_ENABLED)
+   if (err_val & ~CPU_BOOT_ERR0_ENABLED) {
+   dev_err(hdev->dev,
+   "Device boot error - unknown error 0x%08x\n",
+   err_val);
return -EIO;
+   }
 
return 0;
 }
-- 
2.25.1



[PATCH 5/7] habanalabs/gaudi: skip iATU if F/W security is enabled

2021-04-08 Thread Oded Gabbay
From: Ofir Bitton 

As part of the securing GAUDI, the F/W will configure the PCI iATU
regions. If the driver identifies a secured PCI ID, it will know to
skip iATU configuration in a very early stage.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/habanalabs.h |  3 ++
 drivers/misc/habanalabs/common/pci/pci.c| 52 +
 drivers/misc/habanalabs/gaudi/gaudi.c   | 23 +
 drivers/misc/habanalabs/goya/goya.c | 24 +-
 4 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index c1b46126c522..44e89da30b4a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -445,6 +445,7 @@ struct hl_mmu_properties {
  * @dram_supports_virtual_memory: is there an MMU towards the DRAM
  * @hard_reset_done_by_fw: true if firmware is handling hard reset flow
  * @num_functional_hbms: number of functional HBMs in each DCORE.
+ * @iatu_done_by_fw: true if iATU configuration is being done by FW.
  */
 struct asic_fixed_properties {
struct hw_queue_properties  *hw_queues_props;
@@ -508,6 +509,7 @@ struct asic_fixed_properties {
u8  dram_supports_virtual_memory;
u8  hard_reset_done_by_fw;
u8  num_functional_hbms;
+   u8  iatu_done_by_fw;
 };
 
 /**
@@ -2400,6 +2402,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 
cpu_boot_status_reg,
 
 int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
bool is_wc[3]);
+int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
 int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
 int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
struct hl_inbound_pci_region *pci_region);
diff --git a/drivers/misc/habanalabs/common/pci/pci.c 
b/drivers/misc/habanalabs/common/pci/pci.c
index b799f9258fb0..e941b7eef346 100644
--- a/drivers/misc/habanalabs/common/pci/pci.c
+++ b/drivers/misc/habanalabs/common/pci/pci.c
@@ -85,6 +85,58 @@ static void hl_pci_bars_unmap(struct hl_device *hdev)
pci_release_regions(pdev);
 }
 
+int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data)
+{
+   struct pci_dev *pdev = hdev->pdev;
+   ktime_t timeout;
+   u64 msec;
+   u32 val;
+
+   if (hdev->pldm)
+   msec = HL_PLDM_PCI_ELBI_TIMEOUT_MSEC;
+   else
+   msec = HL_PCI_ELBI_TIMEOUT_MSEC;
+
+   /* Clear previous status */
+   pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
+
+   pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
+   pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL, 0);
+
+   timeout = ktime_add_ms(ktime_get(), msec);
+   for (;;) {
+   pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
+   if (val & PCI_CONFIG_ELBI_STS_MASK)
+   break;
+   if (ktime_compare(ktime_get(), timeout) > 0) {
+   pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
+   &val);
+   break;
+   }
+
+   usleep_range(300, 500);
+   }
+
+   if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) {
+   pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
+
+   return 0;
+   }
+
+   if (val & PCI_CONFIG_ELBI_STS_ERR) {
+   dev_err(hdev->dev, "Error reading from ELBI\n");
+   return -EIO;
+   }
+
+   if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
+   dev_err(hdev->dev, "ELBI read didn't finish in time\n");
+   return -EIO;
+   }
+
+   dev_err(hdev->dev, "ELBI read has undefined bits in status\n");
+   return -EIO;
+}
+
 /**
  * hl_pci_elbi_write() - Write through the ELBI interface.
  * @hdev: Pointer to hl_device structure.
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 841748392e49..8730b691ec61 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -629,6 +629,11 @@ static int gaudi_init_iatu(struct hl_device *hdev)
struct hl_outbound_pci_region outbound_region;
int rc;
 
+   if (hdev->asic_prop.iatu_done_by_fw) {
+   hdev->asic_funcs->set_dma_mask_from_fw(hdev);
+   return 0;
+   }
+
/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
inbound_region.mode = PCI_BAR_MATCH_MODE;
inbound_region.bar = SRAM_BAR_ID;
@@ -673,6 +678,7 @@ static int gaudi_early_init(struct hl_device *

[PATCH 4/7] habanalabs/gaudi: derive security status from pci id

2021-04-08 Thread Oded Gabbay
From: Ofir Bitton 

As F/ security indication must be available before driver approaches
PCI bus, F/W security should be derived from PCI id rather than be
fetched during boot handshake with F/W.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c   |  4 
 drivers/misc/habanalabs/common/firmware_if.c  |  6 +++---
 drivers/misc/habanalabs/common/habanalabs.h   |  4 +++-
 .../misc/habanalabs/common/habanalabs_drv.c   | 21 +++
 drivers/misc/habanalabs/common/mmu/mmu.c  |  1 +
 drivers/misc/habanalabs/common/sysfs.c|  3 +++
 drivers/misc/habanalabs/gaudi/gaudi.c |  2 --
 drivers/misc/habanalabs/goya/goya.c   |  2 --
 8 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index 2ed4f2bedc08..00e92b678828 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -327,6 +327,10 @@ static int device_early_init(struct hl_device *hdev)
gaudi_set_asic_funcs(hdev);
strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name));
break;
+   case ASIC_GAUDI_SEC:
+   gaudi_set_asic_funcs(hdev);
+   strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name));
+   break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
hdev->asic_type);
diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 532a2fd7bfb4..652571d3b8e6 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -819,16 +819,16 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 
cpu_boot_status_reg,
if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
prop->fw_security_status_valid = 1;
 
+   /* FW security should be derived from PCI ID, we keep this
+* check for backward compatibility
+*/
if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
prop->fw_security_disabled = false;
-   else
-   prop->fw_security_disabled = true;
 
if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
} else {
prop->fw_security_status_valid = 0;
-   prop->fw_security_disabled = true;
}
 
dev_dbg(hdev->dev, "Firmware preboot security status %#x\n",
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 867986ef4588..c1b46126c522 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -766,11 +766,13 @@ struct hl_eq {
  * @ASIC_INVALID: Invalid ASIC type.
  * @ASIC_GOYA: Goya device.
  * @ASIC_GAUDI: Gaudi device.
+ * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000).
  */
 enum hl_asic_type {
ASIC_INVALID,
ASIC_GOYA,
-   ASIC_GAUDI
+   ASIC_GAUDI,
+   ASIC_GAUDI_SEC
 };
 
 struct hl_cs_parser;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c 
b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 59896566dca1..7135f1e03864 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -47,10 +47,12 @@ MODULE_PARM_DESC(memory_scrub,
 
 #define PCI_IDS_GOYA   0x0001
 #define PCI_IDS_GAUDI  0x1000
+#define PCI_IDS_GAUDI_SEC  0x1010
 
 static const struct pci_device_id ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
+   { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ids);
@@ -74,6 +76,9 @@ static enum hl_asic_type get_asic_type(u16 device)
case PCI_IDS_GAUDI:
asic_type = ASIC_GAUDI;
break;
+   case PCI_IDS_GAUDI_SEC:
+   asic_type = ASIC_GAUDI_SEC;
+   break;
default:
asic_type = ASIC_INVALID;
break;
@@ -82,6 +87,16 @@ static enum hl_asic_type get_asic_type(u16 device)
return asic_type;
 }
 
+static bool is_asic_secured(enum hl_asic_type asic_type)
+{
+   switch (asic_type) {
+   case ASIC_GAUDI_SEC:
+   return true;
+   default:
+   return false;
+   }
+}
+
 /*
  * hl_device_open - open function for habanalabs device
  *
@@ -287,6 +302,12 @@ int create_hdev(struct hl_device **dev, struct pci_dev 
*pdev,
hdev->asic_type =

[PATCH 3/7] habanalabs: move dram scrub to free sequence

2021-04-08 Thread Oded Gabbay
From: Bharat Jauhari 

DRAM scrubbing can take time hence it adds to latency during allocation.
To minimize latency during initialization, scrubbing is moved to release
call.
In case scrubbing fails it means the device is in a bad state,
hence HARD reset is initiated.

Signed-off-by: Bharat Jauhari 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/memory.c | 87 ++---
 1 file changed, 48 insertions(+), 39 deletions(-)

diff --git a/drivers/misc/habanalabs/common/memory.c 
b/drivers/misc/habanalabs/common/memory.c
index 6530fddbbc21..2938cbbafbbc 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -81,16 +81,6 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct 
hl_mem_in *args,
num_pgs, total_size);
return -ENOMEM;
}
-
-   if (hdev->memory_scrub) {
-   rc = hdev->asic_funcs->scrub_device_mem(hdev, paddr,
-   total_size);
-   if (rc) {
-   dev_err(hdev->dev,
-   "Failed to scrub contiguous device 
memory\n");
-   goto pages_pack_err;
-   }
-   }
}
 
phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
@@ -128,17 +118,6 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct 
hl_mem_in *args,
goto page_err;
}
 
-   if (hdev->memory_scrub) {
-   rc = hdev->asic_funcs->scrub_device_mem(hdev,
-   phys_pg_pack->pages[i],
-   page_size);
-   if (rc) {
-   dev_err(hdev->dev,
-   "Failed to scrub device 
memory\n");
-   goto page_err;
-   }
-   }
-
num_curr_pgs++;
}
}
@@ -280,37 +259,67 @@ static void dram_pg_pool_do_release(struct kref *ref)
  * @phys_pg_pack: physical page pack to free.
  *
  * This function does the following:
- * - For DRAM memory only, iterate over the pack and free each physical block
- *   structure by returning it to the general pool.
+ * - For DRAM memory only
+ *   - iterate over the pack, scrub and free each physical block structure by
+ * returning it to the general pool.
+ * In case of error during scrubbing, initiate hard reset.
+ * Once hard reset is triggered, scrubbing is bypassed while freeing the
+ * memory continues.
  * - Free the hl_vm_phys_pg_pack structure.
  */
-static void free_phys_pg_pack(struct hl_device *hdev,
+static int free_phys_pg_pack(struct hl_device *hdev,
struct hl_vm_phys_pg_pack *phys_pg_pack)
 {
struct hl_vm *vm = &hdev->vm;
u64 i;
+   int rc = 0;
+
+   if (phys_pg_pack->created_from_userptr)
+   goto end;
 
-   if (!phys_pg_pack->created_from_userptr) {
-   if (phys_pg_pack->contiguous) {
-   gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
+   if (phys_pg_pack->contiguous) {
+   if (hdev->memory_scrub && !hdev->disabled) {
+   rc = hdev->asic_funcs->scrub_device_mem(hdev,
+   phys_pg_pack->pages[0],
phys_pg_pack->total_size);
+   if (rc)
+   dev_err(hdev->dev,
+   "Failed to scrub contiguous device 
memory\n");
+   }
 
-   for (i = 0; i < phys_pg_pack->npages ; i++)
-   kref_put(&vm->dram_pg_pool_refcount,
-   dram_pg_pool_do_release);
-   } else {
-   for (i = 0 ; i < phys_pg_pack->npages ; i++) {
-   gen_pool_free(vm->dram_pg_pool,
+   gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
+   phys_pg_pack->total_size);
+
+   for (i = 0; i < phys_pg_pack->npages ; i++)
+   kref_put(&vm->dram_pg_pool_refcount,
+   dram_pg_pool_do_release);
+   } else {
+   for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+   if (hdev->memory_scrub && !hdev->disabled && rc == 0) {
+

[PATCH 2/7] habanalabs: send dynamic msi-x indexes to f/w

2021-04-08 Thread Oded Gabbay
From: Ohad Sharabi 

In order to minimize hard coded values between F/W and the driver, we
send msi-x indexes dynamically to the F/W.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c  | 67 +
 drivers/misc/habanalabs/common/habanalabs.h   |  4 +
 drivers/misc/habanalabs/gaudi/gaudi.c |  2 +-
 drivers/misc/habanalabs/goya/goya.c   |  2 +-
 .../misc/habanalabs/include/common/cpucp_if.h | 75 ++-
 5 files changed, 131 insertions(+), 19 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index d81a8d537373..532a2fd7bfb4 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -422,6 +422,73 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
return rc;
 }
 
+static int hl_fw_send_msi_info_msg(struct hl_device *hdev)
+{
+   struct cpucp_array_data_packet *pkt;
+   size_t total_pkt_size, data_size;
+   u64 result;
+   int rc;
+
+   /* skip sending this info for unsupported ASICs */
+   if (!hdev->asic_funcs->get_msi_info)
+   return 0;
+
+   data_size = CPUCP_NUM_OF_MSI_TYPES * sizeof(u32);
+   total_pkt_size = sizeof(struct cpucp_array_data_packet) + data_size;
+
+   /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
+   total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
+
+   /* total_pkt_size is casted to u16 later on */
+   if (total_pkt_size > USHRT_MAX) {
+   dev_err(hdev->dev, "CPUCP array data is too big\n");
+   return -EINVAL;
+   }
+
+   pkt = kzalloc(total_pkt_size, GFP_KERNEL);
+   if (!pkt)
+   return -ENOMEM;
+
+   pkt->length = cpu_to_le32(CPUCP_NUM_OF_MSI_TYPES);
+
+   hdev->asic_funcs->get_msi_info((u32 *)&pkt->data);
+
+   pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_MSI_INFO_SET <<
+   CPUCP_PKT_CTL_OPCODE_SHIFT);
+
+   rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)pkt,
+   total_pkt_size, 0, &result);
+
+   /*
+* in case packet result is invalid it means that FW does not support
+* this feature and will use default/hard coded MSI values. no reason
+* to stop the boot
+*/
+   if (rc && result == cpucp_packet_invalid)
+   rc = 0;
+
+   if (rc)
+   dev_err(hdev->dev, "failed to send CPUCP array data\n");
+
+   kfree(pkt);
+
+   return rc;
+}
+
+int hl_fw_cpucp_handshake(struct hl_device *hdev,
+   u32 cpu_security_boot_status_reg,
+   u32 boot_err0_reg)
+{
+   int rc;
+
+   rc = hl_fw_cpucp_info_get(hdev, cpu_security_boot_status_reg,
+   boot_err0_reg);
+   if (rc)
+   return rc;
+
+   return hl_fw_send_msi_info_msg(hdev);
+}
+
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
 {
struct cpucp_packet pkt = {};
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index d89ae4c3d634..867986ef4588 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1049,6 +1049,7 @@ struct hl_asic_funcs {
int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
u32 block_id, u32 block_size);
void (*enable_events_from_fw)(struct hl_device *hdev);
+   void (*get_msi_info)(u32 *table);
 };
 
 
@@ -2374,6 +2375,9 @@ int hl_fw_send_heartbeat(struct hl_device *hdev);
 int hl_fw_cpucp_info_get(struct hl_device *hdev,
u32 cpu_security_boot_status_reg,
u32 boot_err0_reg);
+int hl_fw_cpucp_handshake(struct hl_device *hdev,
+   u32 cpu_security_boot_status_reg,
+   u32 boot_err0_reg);
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
struct hl_info_pci_counters *counters);
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 791434278904..62e3c63bec20 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7966,7 +7966,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
 
-   rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
+   rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
if (rc)
return rc;
 
diff --git a/driv

[PATCH 1/7] habanalabs/gaudi: clear QM errors only if not in stop_on_err mode

2021-04-08 Thread Oded Gabbay
From: Tomer Tayar 

Clearing QM errors by the driver will prevent these H/W blocks from
stopping in case they are configured to stop on errors, so perform this
clearing only if this mode is not in use.

Signed-off-by: Tomer Tayar 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 03d3fb643e79..791434278904 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7086,7 +7086,8 @@ static void gaudi_handle_qman_err_generic(struct 
hl_device *hdev,
}
 
/* Write 1 clear errors */
-   WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
+   if (!hdev->stop_on_err)
+   WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
}
 
arb_err_val = RREG32(arb_err_addr);
-- 
2.25.1



[PATCH] habanalabs: support DEVICE_UNUSABLE error indication from FW

2021-04-04 Thread Oded Gabbay
From: Koby Elbaz 

In case of multiple ECC errors, FW will set the DEVICE_UNUSABLE bit.
On boot-up, the driver will therefore fail inserting the device.

Signed-off-by: Koby Elbaz 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c| 3 +++
 drivers/misc/habanalabs/include/common/hl_boot_if.h | 4 
 2 files changed, 7 insertions(+)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 092691a8917d..d81a8d537373 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -342,6 +342,9 @@ static int fw_read_errors(struct hl_device *hdev, u32 
boot_err0_reg,
dev_err(hdev->dev, "Device boot error - eFuse failure\n");
if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
dev_err(hdev->dev, "Device boot error - PLL failure\n");
+   if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL)
+   dev_err(hdev->dev,
+   "Device boot error - device unusable failure\n");
 
security_val = RREG32(cpu_security_boot_status_reg);
if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h 
b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index 1717874ff306..980b432fd76e 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -73,6 +73,9 @@
  * CPU_BOOT_ERR0_PLL_FAIL  PLL settings failed, meaning that one
  * of the PLLs remains in REF_CLK
  *
+ * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL  Device is unusable and customer support
+ * should be contacted.
+ *
  * CPU_BOOT_ERR0_ENABLED   Error registers enabled.
  * This is a main indication that the
  * running FW populates the error
@@ -92,6 +95,7 @@
 #define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL (1 << 10)
 #define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL (1 << 11)
 #define CPU_BOOT_ERR0_PLL_FAIL (1 << 12)
+#define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL (1 << 13)
 #define CPU_BOOT_ERR0_ENABLED  (1 << 31)
 
 /*
-- 
2.25.1



[PATCH] habanalabs: use strscpy instead of sprintf and strlcpy

2021-04-03 Thread Oded Gabbay
Prefer the use of strscpy when copying the ASIC name into a char array,
to prevent accidentally exceeding the array's length.
In addition, strlcpy is frowned upon so replace it.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index e64c60d48d42..2ed4f2bedc08 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -321,11 +321,11 @@ static int device_early_init(struct hl_device *hdev)
switch (hdev->asic_type) {
case ASIC_GOYA:
goya_set_asic_funcs(hdev);
-   strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
+   strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
break;
case ASIC_GAUDI:
gaudi_set_asic_funcs(hdev);
-   sprintf(hdev->asic_name, "GAUDI");
+   strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name));
break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
-- 
2.25.1



[PATCH] habanalabs: remove the store jobs array from CS IOCTL

2021-04-02 Thread Oded Gabbay
The store part was never implemented in the code and never been used
by the userspace applications.

We currently use the related parameters to a different purpose with
a defined union. However, there is no point in that and it is better
to just remove the union and the store parameters.

Signed-off-by: Oded Gabbay 
---
 include/uapi/misc/habanalabs.h | 35 ++
 1 file changed, 10 insertions(+), 25 deletions(-)

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 90798eaac728..d3e017b5f0db 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -644,17 +644,10 @@ struct hl_cs_in {
/* holds address of array of hl_cs_chunk for execution phase */
__u64 chunks_execute;
 
-   union {
-   /* this holds address of array of hl_cs_chunk for store phase -
-* Currently not in use
-*/
-   __u64 chunks_store;
-
-   /* Sequence number of a staged submission CS
-* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
-*/
-   __u64 seq;
-   };
+   /* Sequence number of a staged submission CS
+* valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
+*/
+   __u64 seq;
 
/* Number of chunks in restore phase array. Maximum number is
 * HL_MAX_JOBS_PER_CS
@@ -666,18 +659,10 @@ struct hl_cs_in {
 */
__u32 num_chunks_execute;
 
-   union {
-   /* Number of chunks in restore phase array -
-* Currently not in use
-*/
-   __u32 num_chunks_store;
-
-   /* timeout in seconds - valid only if HL_CS_FLAGS_CUSTOM_TIMEOUT
-* is set. this parameter is ignored in case of future multiple
-* users support.
-*/
-   __u32 timeout;
-   };
+   /* timeout in seconds - valid only if HL_CS_FLAGS_CUSTOM_TIMEOUT
+* is set
+*/
+   __u32 timeout;
 
/* HL_CS_FLAGS_* */
__u32 cs_flags;
@@ -1051,8 +1036,8 @@ struct hl_debug_args {
  * Each JOB will be enqueued on a specific queue, according to the user's 
input.
  * There can be more then one JOB per queue.
  *
- * The CS IOCTL will receive three sets of JOBS. One set is for "restore" 
phase,
- * a second set is for "execution" phase and a third set is for "store" phase.
+ * The CS IOCTL will receive two sets of JOBS. One set is for "restore" phase
+ * and a second set is for "execution" phase.
  * The JOBS on the "restore" phase are enqueued only after context-switch
  * (or if its the first CS for this context). The user can also order the
  * driver to run the "restore" phase explicitly
-- 
2.25.1



[PATCH 3/3] habanalabs/gaudi: add debugfs to DMA from the device

2021-04-02 Thread Oded Gabbay
When trying to debug program, the user often needs to
dump large parts of the device's DRAM, which can reach to tens of GBs.
Because reading from the device's internal memory through the PCI BAR
is extremely slow, the debug can take hours.

Instead, we can provide the user to copy data through one of the DMA
engines. This will make the operation much faster.

Currently, only GAUDI is supported.

In GAUDI, we need to find a PCI DMA engine that is IDLE and set the
DMA as secured to be able to bypass our MMU as we currently don't
map the temporary buffer to the MMU.

Example bash one-line to dump entire HBM to file (~2 minutes):

for (( i=0x0; i < 0x8; i+=0x800 )); do \
printf '0x%x\n' $i | sudo tee /sys/kernel/debug/habanalabs/hl0/addr ; \
echo 0x800 | sudo tee /sys/kernel/debug/habanalabs/hl0/dma_size ; \
sudo cat /sys/kernel/debug/habanalabs/hl0/data_dma >> hbm.txt ; done

Signed-off-by: Oded Gabbay 
---
 .../ABI/testing/debugfs-driver-habanalabs |  68 ++--
 drivers/misc/habanalabs/common/debugfs.c  | 118 -
 drivers/misc/habanalabs/common/habanalabs.h   |  17 +-
 drivers/misc/habanalabs/gaudi/gaudi.c | 159 ++
 drivers/misc/habanalabs/gaudi/gaudiP.h|   2 +
 drivers/misc/habanalabs/goya/goya.c   |   8 +
 6 files changed, 352 insertions(+), 20 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs 
b/Documentation/ABI/testing/debugfs-driver-habanalabs
index f9e233cbdc37..c78fc9282876 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -82,6 +82,24 @@ Description:Allows the root user to read or write 64 bit 
data directly
 If the IOMMU is disabled, it also allows the root user to read
 or write from the host a device VA of a host mapped memory
 
+What:   /sys/kernel/debug/habanalabs/hl/data_dma
+Date:   Apr 2021
+KernelVersion:  5.13
+Contact:ogab...@kernel.org
+Description:Allows the root user to read from the device's internal
+memory (DRAM/SRAM) through a DMA engine.
+This property is a binary blob that contains the result of the
+DMA transfer.
+This custom interface is needed (instead of using the generic
+Linux user-space PCI mapping) because the amount of internal
+memory is huge (>32GB) and reading it via the PCI bar will take
+a very long time.
+This interface doesn't support concurrency in the same device.
+In GAUDI and GOYA, this action can cause undefined behavior
+in case the it is done while the device is executing user
+workloads.
+Only supported on GAUDI at this stage.
+
 What:   /sys/kernel/debug/habanalabs/hl/device
 Date:   Jan 2019
 KernelVersion:  5.1
@@ -90,6 +108,24 @@ Description:Enables the root user to set the device to 
specific state.
 Valid values are "disable", "enable", "suspend", "resume".
 User can read this property to see the valid values
 
+What:   /sys/kernel/debug/habanalabs/hl/dma_size
+Date:   Apr 2021
+KernelVersion:  5.13
+Contact:ogab...@kernel.org
+Description:Specify the size of the DMA transaction when using DMA to read
+from the device's internal memory. The value can not be larger
+than 128MB. Writing to this value initiates the DMA transfer.
+When the write is finished, the user can read the "data_dma"
+blob
+
+What:   /sys/kernel/debug/habanalabs/hl/dump_security_violations
+Date:   Jan 2021
+KernelVersion:  5.12
+Contact:ogab...@kernel.org
+Description:Dumps all security violations to dmesg. This will also ack
+all security violations meanings those violations will not be
+dumped next time user calls this API
+
 What:   /sys/kernel/debug/habanalabs/hl/engines
 Date:   Jul 2019
 KernelVersion:  5.3
@@ -154,6 +190,16 @@ Description:Displays the hop values and physical 
address for a given ASID
 e.g. to display info about VA 0x1000 for ASID 1 you need to do:
 echo "1 0x1000" > /sys/kernel/debug/habanalabs/hl0/mmu
 
+What:   /sys/kernel/debug/habanalabs/hl/mmu_error
+Date:   Mar 2021
+KernelVersion:  5.12
+Contact:fkassa...@habana.ai
+Description:Check and display page fault or access violation mmu errors for
+all MMUs specified in mmu_cap_mask.
+e.g. to display error info for MMU hw cap bit 9, you need to 
do:
+echo "0x200" > /sys/kernel/debug/habanalabs/hl0/mmu_error
+   

[PATCH 2/3] habanalabs/gaudi: sync stream add protection to SOB reset flow

2021-04-02 Thread Oded Gabbay
From: farah kassabri 

Since we moved the SOB reset flow to workqueue and
not part of the fence release flow, we might reach a
scenario where new context is created while we in the middle
of resetting the SOB.
in such cases the reset may fail due to idle check.
This will mess up the streams sync since the SOB value is invalid.
so we protect this area with a mutex, to delay context creation.

Signed-off-by: farah kassabri 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index f273b792bc5d..bd711c8d3874 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -5729,18 +5729,26 @@ static int gaudi_memset_registers(struct hl_device 
*hdev, u64 reg_base,
 static int gaudi_schedule_register_memset(struct hl_device *hdev,
u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
 {
-   struct hl_ctx *ctx = hdev->compute_ctx;
+   struct hl_ctx *ctx;
struct hl_pending_cb *pending_cb;
struct packet_msg_long *pkt;
u32 cb_size, ctl;
struct hl_cb *cb;
-   int i;
+   int i, rc;
+
+   mutex_lock(&hdev->fpriv_list_lock);
+   ctx = hdev->compute_ctx;
 
/* If no compute context available or context is going down
 * memset registers directly
 */
-   if (!ctx || kref_read(&ctx->refcount) == 0)
-   return gaudi_memset_registers(hdev, reg_base, num_regs, val);
+   if (!ctx || kref_read(&ctx->refcount) == 0) {
+   rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
+   mutex_unlock(&hdev->fpriv_list_lock);
+   return rc;
+   }
+
+   mutex_unlock(&hdev->fpriv_list_lock);
 
cb_size = (sizeof(*pkt) * num_regs) +
sizeof(struct packet_msg_prot) * 2;
-- 
2.25.1



[PATCH 1/3] habanalabs: add custom timeout flag per cs

2021-04-02 Thread Oded Gabbay
From: Alon Mizrahi 

There is a need to allow to user to send command submissions with
custom timeout as some CS take longer than the max timeout that is
used by default.

Signed-off-by: Alon Mizrahi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../habanalabs/common/command_submission.c| 35 +++
 drivers/misc/habanalabs/common/habanalabs.h   |  2 ++
 drivers/misc/habanalabs/common/hw_queue.c |  2 +-
 include/uapi/misc/habanalabs.h| 15 ++--
 4 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index 21a60b7c2091..ff8791a651fd 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -467,8 +467,7 @@ static void cs_handle_tdr(struct hl_device *hdev, struct 
hl_cs *cs)
 
if (next_entry_found && !next->tdr_active) {
next->tdr_active = true;
-   schedule_delayed_work(&next->work_tdr,
-   hdev->timeout_jiffies);
+   schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
}
 
spin_unlock(&hdev->cs_mirror_lock);
@@ -622,7 +621,7 @@ static void cs_timedout(struct work_struct *work)
 
 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
enum hl_cs_type cs_type, u64 user_sequence,
-   struct hl_cs **cs_new)
+   struct hl_cs **cs_new, u32 flags, u32 timeout)
 {
struct hl_cs_counters_atomic *cntr;
struct hl_fence *other = NULL;
@@ -649,6 +648,8 @@ static int allocate_cs(struct hl_device *hdev, struct 
hl_ctx *ctx,
cs->submitted = false;
cs->completed = false;
cs->type = cs_type;
+   cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
+   cs->timeout_jiffies = timeout;
INIT_LIST_HEAD(&cs->job_list);
INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
kref_init(&cs->refcount);
@@ -1092,7 +1093,8 @@ static int cs_staged_submission(struct hl_device *hdev, 
struct hl_cs *cs,
 }
 
 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
-   u32 num_chunks, u64 *cs_seq, u32 flags)
+   u32 num_chunks, u64 *cs_seq, u32 flags,
+   u32 timeout)
 {
bool staged_mid, int_queues_only = true;
struct hl_device *hdev = hpriv->hdev;
@@ -1121,11 +1123,11 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, 
void __user *chunks,
staged_mid = false;
 
rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
-   staged_mid ? user_sequence : ULLONG_MAX, &cs);
+   staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
+   timeout);
if (rc)
goto free_cs_chunk_array;
 
-   cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
*cs_seq = cs->sequence;
 
hl_debugfs_add_cs(cs);
@@ -1323,7 +1325,8 @@ static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
list_move_tail(&pending_cb->cb_node, &local_cb_list);
spin_unlock(&ctx->pending_cb_lock);
 
-   rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs);
+   rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0,
+   hdev->timeout_jiffies);
if (rc)
goto add_list_elements;
 
@@ -1424,7 +1427,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union 
hl_cs_args *args,
rc = 0;
} else {
rc = cs_ioctl_default(hpriv, chunks, num_chunks,
-   cs_seq, 0);
+   cs_seq, 0, hdev->timeout_jiffies);
}
 
mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1594,7 +1597,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct 
hl_device *hdev,
 
 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type 
cs_type,
void __user *chunks, u32 num_chunks,
-   u64 *cs_seq, bool timestamp)
+   u64 *cs_seq, u32 flags, u32 timeout)
 {
struct hl_cs_chunk *cs_chunk_array, *chunk;
struct hw_queue_properties *hw_queue_prop;
@@ -1700,7 +1703,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, 
enum hl_cs_type cs_type,
}
}
 
-   rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs);
+   rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
if (rc) {
if (cs_type == CS_TYPE_WAIT ||
  

[PATCH 3/3] habanalabs: improve utilization calculation

2021-03-21 Thread Oded Gabbay
From: Koby Elbaz 

The new approach is based on the notion that the relative
current power consumption is in relation of proportionality
to device's true utilization.
Utilization info ranges between [0,100]%
Currently, dc_power values are hard-coded.

Signed-off-by: Koby Elbaz 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../habanalabs/common/command_submission.c|  18 ---
 drivers/misc/habanalabs/common/device.c   | 121 ++
 drivers/misc/habanalabs/common/habanalabs.h   |  25 +---
 .../misc/habanalabs/common/habanalabs_ioctl.c |  11 +-
 drivers/misc/habanalabs/common/hw_queue.c |   8 --
 drivers/misc/habanalabs/gaudi/gaudi.c |  20 ++-
 drivers/misc/habanalabs/gaudi/gaudiP.h|   3 +
 drivers/misc/habanalabs/goya/goya.c   |   1 +
 drivers/misc/habanalabs/goya/goyaP.h  |   2 +
 9 files changed, 40 insertions(+), 169 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index ba6d3e317255..21a60b7c2091 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -505,24 +505,6 @@ static void cs_do_release(struct kref *ref)
goto out;
}
 
-   hdev->asic_funcs->hw_queues_lock(hdev);
-
-   hdev->cs_active_cnt--;
-   if (!hdev->cs_active_cnt) {
-   struct hl_device_idle_busy_ts *ts;
-
-   ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
-   ts->busy_to_idle_ts = ktime_get();
-
-   if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
-   hdev->idle_busy_ts_idx = 0;
-   } else if (hdev->cs_active_cnt < 0) {
-   dev_crit(hdev->dev, "CS active cnt %d is negative\n",
-   hdev->cs_active_cnt);
-   }
-
-   hdev->asic_funcs->hw_queues_unlock(hdev);
-
/* Need to update CI for all queue jobs that does not get completion */
hl_hw_queue_update_ci(cs);
 
diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index 53bc5ccb612f..49f0ceac4b81 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -383,17 +383,9 @@ static int device_early_init(struct hl_device *hdev)
goto free_sob_reset_wq;
}
 
-   hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
-   sizeof(struct hl_device_idle_busy_ts),
-   (GFP_KERNEL | __GFP_ZERO));
-   if (!hdev->idle_busy_ts_arr) {
-   rc = -ENOMEM;
-   goto free_chip_info;
-   }
-
rc = hl_mmu_if_set_funcs(hdev);
if (rc)
-   goto free_idle_busy_ts_arr;
+   goto free_chip_info;
 
hl_cb_mgr_init(&hdev->kernel_cb_mgr);
 
@@ -422,8 +414,6 @@ static int device_early_init(struct hl_device *hdev)
 
 free_cb_mgr:
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
-free_idle_busy_ts_arr:
-   kfree(hdev->idle_busy_ts_arr);
 free_chip_info:
kfree(hdev->hl_chip_info);
 free_sob_reset_wq:
@@ -461,7 +451,6 @@ static void device_early_fini(struct hl_device *hdev)
 
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 
-   kfree(hdev->idle_busy_ts_arr);
kfree(hdev->hl_chip_info);
 
destroy_workqueue(hdev->sob_reset_wq);
@@ -582,100 +571,24 @@ static void device_late_fini(struct hl_device *hdev)
hdev->late_init_done = false;
 }
 
-uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
+int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
 {
-   struct hl_device_idle_busy_ts *ts;
-   ktime_t zero_ktime, curr = ktime_get();
-   u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
-   s64 period_us, last_start_us, last_end_us, last_busy_time_us,
-   total_busy_time_us = 0, total_busy_time_ms;
-
-   zero_ktime = ktime_set(0, 0);
-   period_us = period_ms * USEC_PER_MSEC;
-   ts = &hdev->idle_busy_ts_arr[last_index];
-
-   /* check case that device is currently in idle */
-   if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
-   !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {
-
-   last_index--;
-   /* Handle case idle_busy_ts_idx was 0 */
-   if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
-   last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
-
-   ts = &hdev->idle_busy_ts_arr[last_index];
-   }
-
-   while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
-   /* Check if we are in last sample case. i.e. if the sample
-* begun before the sampling period

[PATCH 2/3] habanalabs: support legacy and new pll indexes

2021-03-21 Thread Oded Gabbay
From: Ohad Sharabi 

In order to use minimum of hard coded values common to LKD and F/W
a dynamic method to work with PLLs is introduced in this patch.
Formerly asic specific PLL numbering is now common for all asics.
To be backward compatible a bit in dev status is defined, if the bit is
not set LKD will keep working with old PLL numbering.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c  | 49 ++-
 drivers/misc/habanalabs/common/habanalabs.h   | 14 --
 drivers/misc/habanalabs/common/sysfs.c| 24 ++---
 drivers/misc/habanalabs/gaudi/gaudi.c | 33 +
 drivers/misc/habanalabs/goya/goya.c   | 26 ++
 .../misc/habanalabs/include/common/cpucp_if.h | 41 
 .../habanalabs/include/common/hl_boot_if.h|  6 +++
 .../habanalabs/include/gaudi/gaudi_fw_if.h| 14 --
 .../misc/habanalabs/include/goya/goya_fw_if.h | 11 -
 9 files changed, 182 insertions(+), 36 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 2a58edaf984a..092691a8917d 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -539,18 +539,63 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, 
u64 *total_energy)
return rc;
 }
 
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
+int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index,
+   enum pll_index *pll_index)
+{
+   struct asic_fixed_properties *prop = &hdev->asic_prop;
+   u8 pll_byte, pll_bit_off;
+   bool dynamic_pll;
+
+   if (input_pll_index >= PLL_MAX) {
+   dev_err(hdev->dev, "PLL index %d is out of range\n",
+   input_pll_index);
+   return -EINVAL;
+   }
+
+   dynamic_pll = prop->fw_security_status_valid &&
+   (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN);
+
+   if (!dynamic_pll) {
+   /*
+* in case we are working with legacy FW (each asic has unique
+* PLL numbering) extract the legacy numbering
+*/
+   *pll_index = hdev->legacy_pll_map[input_pll_index];
+   return 0;
+   }
+
+   /* PLL map is a u8 array */
+   pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3];
+   pll_bit_off = input_pll_index & 0x7;
+
+   if (!(pll_byte & BIT(pll_bit_off))) {
+   dev_err(hdev->dev, "PLL index %d is not supported\n",
+   input_pll_index);
+   return -EINVAL;
+   }
+
+   *pll_index = input_pll_index;
+
+   return 0;
+}
+
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index,
u16 *pll_freq_arr)
 {
struct cpucp_packet pkt;
+   enum pll_index used_pll_idx;
u64 result;
int rc;
 
+   rc = get_used_pll_index(hdev, pll_index, &used_pll_idx);
+   if (rc)
+   return rc;
+
memset(&pkt, 0, sizeof(pkt));
 
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
-   pkt.pll_type = __cpu_to_le16(pll_index);
+   pkt.pll_type = __cpu_to_le16((u16)used_pll_idx);
 
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 65f34918faed..dc8126b270d1 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1946,6 +1946,8 @@ struct hl_mmu_funcs {
  * @aggregated_cs_counters: aggregated cs counters among all contexts
  * @mmu_priv: device-specific MMU data.
  * @mmu_func: device-related MMU functions.
+ * @legacy_pll_map: map holding map between dynamic (common) PLL indexes and
+ *  static (asic specific) PLL indexes.
  * @dram_used_mem: current DRAM memory consumption.
  * @timeout_jiffies: device CS timeout value.
  * @max_power: the max power of the device, as configured by the sysadmin. This
@@ -2070,6 +2072,8 @@ struct hl_device {
struct hl_mmu_priv  mmu_priv;
struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS];
 
+   enum pll_index  *legacy_pll_map;
+
atomic64_t  dram_used_mem;
u64 timeout_jiffies;
u64 max_power;
@@ -2383,7 +2387,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
  

[PATCH 1/3] habanalabs: move relevant datapath work outside cs lock

2021-03-21 Thread Oded Gabbay
From: Ofir Bitton 

In order to shorten the time cs lock is being held, we move any
possible work outside of the cs lock.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../habanalabs/common/command_submission.c| 86 +++
 drivers/misc/habanalabs/common/device.c   | 13 ++-
 drivers/misc/habanalabs/common/habanalabs.h   |  4 +
 3 files changed, 68 insertions(+), 35 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index 720588aed28b..ba6d3e317255 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -84,6 +84,38 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
return 0;
 }
 
+static void sob_reset_work(struct work_struct *work)
+{
+   struct hl_cs_compl *hl_cs_cmpl =
+   container_of(work, struct hl_cs_compl, sob_reset_work);
+   struct hl_device *hdev = hl_cs_cmpl->hdev;
+
+   /*
+* A signal CS can get completion while the corresponding wait
+* for signal CS is on its way to the PQ. The wait for signal CS
+* will get stuck if the signal CS incremented the SOB to its
+* max value and there are no pending (submitted) waits on this
+* SOB.
+* We do the following to void this situation:
+* 1. The wait for signal CS must get a ref for the signal CS as
+*soon as possible in cs_ioctl_signal_wait() and put it
+*before being submitted to the PQ but after it incremented
+*the SOB refcnt in init_signal_wait_cs().
+* 2. Signal/Wait for signal CS will decrement the SOB refcnt
+*here.
+* These two measures guarantee that the wait for signal CS will
+* reset the SOB upon completion rather than the signal CS and
+* hence the above scenario is avoided.
+*/
+   kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
+
+   if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
+   hdev->asic_funcs->reset_sob_group(hdev,
+   hl_cs_cmpl->sob_group);
+
+   kfree(hl_cs_cmpl);
+}
+
 static void hl_fence_release(struct kref *kref)
 {
struct hl_fence *fence =
@@ -109,28 +141,9 @@ static void hl_fence_release(struct kref *kref)
hl_cs_cmpl->hw_sob->sob_id,
hl_cs_cmpl->sob_val);
 
-   /*
-* A signal CS can get completion while the corresponding wait
-* for signal CS is on its way to the PQ. The wait for signal CS
-* will get stuck if the signal CS incremented the SOB to its
-* max value and there are no pending (submitted) waits on this
-* SOB.
-* We do the following to void this situation:
-* 1. The wait for signal CS must get a ref for the signal CS as
-*soon as possible in cs_ioctl_signal_wait() and put it
-*before being submitted to the PQ but after it incremented
-*the SOB refcnt in init_signal_wait_cs().
-* 2. Signal/Wait for signal CS will decrement the SOB refcnt
-*here.
-* These two measures guarantee that the wait for signal CS will
-* reset the SOB upon completion rather than the signal CS and
-* hence the above scenario is avoided.
-*/
-   kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
+   queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work);
 
-   if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
-   hdev->asic_funcs->reset_sob_group(hdev,
-   hl_cs_cmpl->sob_group);
+   return;
}
 
 free:
@@ -670,9 +683,23 @@ static int allocate_cs(struct hl_device *hdev, struct 
hl_ctx *ctx,
goto free_cs;
}
 
+   cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+   sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
+   if (!cs->jobs_in_queue_cnt)
+   cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+   sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
+
+   if (!cs->jobs_in_queue_cnt) {
+   atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+   atomic64_inc(&cntr->out_of_mem_drop_cnt);
+   rc = -ENOMEM;
+   goto free_cs_cmpl;
+   }
+
cs_cmpl->hdev = hdev;
cs_cmpl->type = cs->type;
spin_lock_init(&cs_cmpl->lock);
+   INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work);
cs->fence = &cs_cmpl->base_fence;
 
  

[PATCH 4/4] habanalabs: support dynamic PLL numbering

2021-03-14 Thread Oded Gabbay
From: Ohad Sharabi 

As part of the effort remove hard-coded assumptions from the F/W-driver
communication, introduce support for dynamic pll numbering.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c  | 22 +-
 drivers/misc/habanalabs/common/habanalabs.h   |  2 +-
 .../misc/habanalabs/include/common/cpucp_if.h | 41 +++
 .../habanalabs/include/gaudi/gaudi_fw_if.h| 14 ---
 .../misc/habanalabs/include/goya/goya_fw_if.h | 11 -
 5 files changed, 62 insertions(+), 28 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 2a58edaf984a..8843e040c660 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -539,18 +539,36 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, 
u64 *total_energy)
return rc;
 }
 
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index,
u16 *pll_freq_arr)
 {
+   struct asic_fixed_properties *prop = &hdev->asic_prop;
+   u8 pll_byte, pll_bit_off;
struct cpucp_packet pkt;
u64 result;
int rc;
 
+   if (pll_index >= PLL_MAX) {
+   dev_err(hdev->dev, "PLL index %d is out of range\n",
+   pll_index);
+   return -EINVAL;
+   }
+
+   /* PLL map is a u8 array */
+   pll_byte = prop->cpucp_info.pll_map[pll_index >> 3];
+   pll_bit_off = pll_index & 0x7;
+
+   if (!(pll_byte & BIT(pll_bit_off))) {
+   dev_err(hdev->dev, "PLL index %d is not supported\n",
+   pll_index);
+   return -EINVAL;
+   }
+
memset(&pkt, 0, sizeof(pkt));
 
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
-   pkt.pll_type = __cpu_to_le16(pll_index);
+   pkt.pll_type = __cpu_to_le16((u16)pll_index);
 
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_CPUCP_INFO_TIMEOUT_USEC, &result);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 2dcefd6485e5..5f930cb5e33d 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2379,7 +2379,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
struct hl_info_pci_counters *counters);
 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
u64 *total_energy);
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index,
u16 *pll_freq_arr);
 int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h 
b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 6ba480a316ce..e745c78dd8fd 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -28,6 +28,9 @@
 #define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_SHIFT6
 #define CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK 0x07C0
 
+#define PLL_MAP_MAX_BITS   128
+#define PLL_MAP_LEN(PLL_MAP_MAX_BITS / 8)
+
 /*
  * info of the pkt queue pointers in the first async occurrence
  */
@@ -473,6 +476,42 @@ enum cpucp_pll_type_attributes {
cpucp_pll_pci,
 };
 
+/*
+ * PLL enumeration table used for all ASICs and future SW versions.
+ * For future ASIC-LKD compatibility, we can only add new enumerations.
+ * at the end of the table.
+ * Changing the order of entries or removing entries is not allowed.
+ */
+enum pll_index {
+   CPU_PLL = 0,
+   PCI_PLL = 1,
+   NIC_PLL = 2,
+   DMA_PLL = 3,
+   MESH_PLL = 4,
+   MME_PLL = 5,
+   TPC_PLL = 6,
+   IF_PLL = 7,
+   SRAM_PLL = 8,
+   NS_DCORE_PLL = 9,
+   MESH_DCORE_PLL = 10,
+   HBM_PLL = 11,
+   TPC_DCORE_PLL = 12,
+   VIDEO_DCORE_PLL = 13,
+   SRAM_DCORE_PLL = 14,
+   NIC_PHY_DCORE_PLL = 15,
+   MSS_DCORE_PLL = 16,
+   DMA_DCORE_PLL = 17,
+   SIF_PLL = 18,
+   DDR_PLL = 19,
+   VID_PLL = 20,
+   BANK_PLL = 21,
+   MMU_PLL = 22,
+   IC_PLL = 23,
+   MC_PLL = 24,
+   EMMC_PLL = 25,
+   PLL_MAX
+};
+
 /* Event Queue Packets */
 
 struct eq_generic_event {
@@ -547,6 +586,7 @@ struct cpucp_security_info {
  * @dram_size: available DRAM size.
  * @card_name: card name that will be displayed in HWMON subsystem on th

[PATCH 3/4] habanalabs: avoid soft lockup bug upon mapping error

2021-03-14 Thread Oded Gabbay
From: farah kassabri 

Add a little sleep between page unmappings in case mapping of
large number of host pages failed, in order to
avoid soft lockup bug during the rollback.

Signed-off-by: farah kassabri 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/memory.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/memory.c 
b/drivers/misc/habanalabs/common/memory.c
index 1b69573369cf..6530fddbbc21 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -857,6 +857,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
u32 page_size = phys_pg_pack->page_size;
int rc = 0;
+   bool is_host_addr;
 
for (i = 0 ; i < phys_pg_pack->npages ; i++) {
paddr = phys_pg_pack->pages[i];
@@ -878,6 +879,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
return 0;
 
 err:
+   is_host_addr = !hl_is_dram_va(hdev, vaddr);
+
next_vaddr = vaddr;
for (i = 0 ; i < mapped_pg_cnt ; i++) {
if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
@@ -888,6 +891,17 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
phys_pg_pack->pages[i], page_size);
 
next_vaddr += page_size;
+
+   /*
+* unmapping on Palladium can be really long, so avoid a CPU
+* soft lockup bug by sleeping a little between unmapping pages
+*
+* In addition, on host num of pages could be huge,
+* because page size could be 4KB, so when unmapping host
+* pages sleep every 32K pages to avoid soft lockup
+*/
+   if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
+   usleep_range(50, 200);
}
 
return rc;
@@ -921,9 +935,9 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 
vaddr,
 * unmapping on Palladium can be really long, so avoid a CPU
 * soft lockup bug by sleeping a little between unmapping pages
 *
-* In addition, when unmapping host memory we pass through
-* the Linux kernel to unpin the pages and that takes a long
-* time. Therefore, sleep every 32K pages to avoid soft lockup
+* In addition, on host num of pages could be huge,
+* because page size could be 4KB, so when unmapping host
+* pages sleep every 32K pages to avoid soft lockup
 */
if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
usleep_range(50, 200);
-- 
2.25.1



[PATCH 1/4] habanalabs/gaudi: unsecure TPC cfg status registers

2021-03-14 Thread Oded Gabbay
From: Ofir Bitton 

Unsecure relevant registers as TPC engine need access to
TPC status.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi_security.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c 
b/drivers/misc/habanalabs/gaudi/gaudi_security.c
index 7085f45814ae..9a706c5980ef 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_security.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c
@@ -9556,7 +9556,6 @@ static void gaudi_init_tpc_protection_bits(struct 
hl_device *hdev)
mask = 1U << ((mmTPC0_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_SFLAGS & 0x7F) >> 2);
-   mask |= 1U << ((mmTPC0_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2);
@@ -10011,7 +10010,6 @@ static void gaudi_init_tpc_protection_bits(struct 
hl_device *hdev)
mask = 1U << ((mmTPC1_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_SFLAGS & 0x7F) >> 2);
-   mask |= 1U << ((mmTPC1_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC1_CFG_TPC_STALL & 0x7F) >> 2);
@@ -10465,7 +10463,6 @@ static void gaudi_init_tpc_protection_bits(struct 
hl_device *hdev)
mask = 1U << ((mmTPC2_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_SFLAGS & 0x7F) >> 2);
-   mask |= 1U << ((mmTPC2_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC2_CFG_TPC_STALL & 0x7F) >> 2);
@@ -10919,7 +10916,6 @@ static void gaudi_init_tpc_protection_bits(struct 
hl_device *hdev)
mask = 1U << ((mmTPC3_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_SFLAGS & 0x7F) >> 2);
-   mask |= 1U << ((mmTPC3_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC3_CFG_TPC_STALL & 0x7F) >> 2);
@@ -11373,7 +11369,6 @@ static void gaudi_init_tpc_protection_bits(struct 
hl_device *hdev)
mask = 1U << ((mmTPC4_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_SFLAGS & 0x7F) >> 2);
-   mask |= 1U << ((mmTPC4_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC4_CFG_TPC_STALL & 0x7F) >> 2);
@@ -11827,7 +11822,6 @@ static void gaudi_init_tpc_protection_bits(struct 
hl_device *hdev)
mask = 1U << ((mmTPC5_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_SFLAGS & 0x7F) >> 2);
-   mask |= 1U << ((mmTPC5_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC5_CFG_TPC_STALL & 0x7F) >> 2);
@@ -12283,7 +12277,6 @@ static void gaudi_init_tpc_protection_bits(struct 
hl_device *hdev)
mask = 1U << ((mmTPC6_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_SFLAGS & 0x7F) >> 2);
-   mask |= 1U << ((mmTPC6_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC6_CFG_TPC_STALL & 0x7F) >> 2);
@@ -12739,7 +12732,6 @@ static void gaudi_init_tpc_protection_bits(struct 
hl_device *hdev)
mask = 1U << ((mmTPC7_CFG_PROT & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_VFLAGS & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_SFLAGS & 0x7F) >> 2);
-   mask |= 1U << ((mmTPC7_CFG_STATUS & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
mask |= 1U << ((mmTPC7_CFG_TPC_STALL & 0x7F) >> 2);
-- 
2.25.1



[PATCH 2/4] habanalabs/gaudi: Update async events header

2021-03-14 Thread Oded Gabbay
From: Ofir Bitton 

Update with latest version from the Firmware team.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi.c |  2 +-
 .../include/gaudi/gaudi_async_events.h|  2 +-
 .../gaudi/gaudi_async_ids_map_extended.h  | 35 ---
 3 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 0bcee675e1db..a65ae0dbdb92 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7574,7 +7574,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
event_type, cause);
break;
 
-   case GAUDI_EVENT_BMC_RESET_CMD:
+   case GAUDI_EVENT_DEV_RESET_REQ:
gaudi_print_irq_info(hdev, event_type, false);
goto reset_device;
 
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h 
b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
index 25f835ba2cd6..e8651abf84f2 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
@@ -303,7 +303,7 @@ enum gaudi_async_event_id {
GAUDI_EVENT_NIC3_QP1 = 619,
GAUDI_EVENT_NIC4_QP0 = 620,
GAUDI_EVENT_NIC4_QP1 = 621,
-   GAUDI_EVENT_BMC_RESET_CMD = 646,
+   GAUDI_EVENT_DEV_RESET_REQ = 646,
GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647,
GAUDI_EVENT_FIX_POWER_ENV_S = 658,
GAUDI_EVENT_FIX_POWER_ENV_E = 659,
diff --git 
a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h 
b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
index 079be1fb8f70..3dc79c131805 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
@@ -670,18 +670,29 @@ static struct gaudi_async_events_ids_map 
gaudi_irq_map_table[] = {
{ .fc_id = 643, .cpu_id = 492, .valid = 0, .name = "" },
{ .fc_id = 644, .cpu_id = 493, .valid = 0, .name = "" },
{ .fc_id = 645, .cpu_id = 494, .valid = 0, .name = "" },
-   { .fc_id = 646, .cpu_id = 495, .valid = 1, .name = "BMC_RST_CMD" },
-   { .fc_id = 647, .cpu_id = 496, .valid = 0, .name = "" },
-   { .fc_id = 648, .cpu_id = 497, .valid = 0, .name = "" },
-   { .fc_id = 649, .cpu_id = 498, .valid = 0, .name = "" },
-   { .fc_id = 650, .cpu_id = 499, .valid = 0, .name = "" },
-   { .fc_id = 651, .cpu_id = 500, .valid = 0, .name = "" },
-   { .fc_id = 652, .cpu_id = 501, .valid = 0, .name = "" },
-   { .fc_id = 653, .cpu_id = 502, .valid = 0, .name = "" },
-   { .fc_id = 654, .cpu_id = 503, .valid = 0, .name = "" },
-   { .fc_id = 655, .cpu_id = 504, .valid = 0, .name = "" },
-   { .fc_id = 656, .cpu_id = 505, .valid = 0, .name = "" },
-   { .fc_id = 657, .cpu_id = 506, .valid = 1, .name = "PKT_QUEUE_ASYNC" },
+   { .fc_id = 646, .cpu_id = 495, .valid = 1, .name = "DEV_RESET_REQ" },
+   { .fc_id = 647, .cpu_id = 496, .valid = 1,
+   .name = "PKT_QUEUE_OUT_SYNC" },
+   { .fc_id = 648, .cpu_id = 497, .valid = 1,
+   .name = "STATUS_NIC0_ENG0" },
+   { .fc_id = 649, .cpu_id = 498, .valid = 1,
+   .name = "STATUS_NIC0_ENG1" },
+   { .fc_id = 650, .cpu_id = 499, .valid = 1,
+   .name = "STATUS_NIC1_ENG0" },
+   { .fc_id = 651, .cpu_id = 500, .valid = 1,
+   .name = "STATUS_NIC1_ENG1" },
+   { .fc_id = 652, .cpu_id = 501, .valid = 1,
+   .name = "STATUS_NIC2_ENG0" },
+   { .fc_id = 653, .cpu_id = 502, .valid = 1,
+   .name = "STATUS_NIC2_ENG1" },
+   { .fc_id = 654, .cpu_id = 503, .valid = 1,
+   .name = "STATUS_NIC3_ENG0" },
+   { .fc_id = 655, .cpu_id = 504, .valid = 1,
+   .name = "STATUS_NIC3_ENG1" },
+   { .fc_id = 656, .cpu_id = 505, .valid = 1,
+   .name = "STATUS_NIC4_ENG0" },
+   { .fc_id = 657, .cpu_id = 506, .valid = 1,
+   .name = "STATUS_NIC4_ENG1" },
{ .fc_id = 658, .cpu_id = 507, .valid = 1, .name = "FIX_POWER_ENV_S" },
{ .fc_id = 659, .cpu_id = 508, .valid = 1, .name = "FIX_POWER_ENV_E" },
{ .fc_id = 660, .cpu_id = 509, .valid = 1,
-- 
2.25.1



[PATCH 1/2] habanalabs/gaudi: reset device upon BMC request

2021-03-09 Thread Oded Gabbay
From: Ofir Bitton 

In case the BMC of the devices' box wants to initiate a reset of
a specific device, it must go through driver.
Once driver will receive the request it will initiate a hard reset
flow.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 4 
 drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h| 1 +
 .../habanalabs/include/gaudi/gaudi_async_ids_map_extended.h   | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 8be75f7c97f8..099c51350be6 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7575,6 +7575,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
event_type, cause);
break;
 
+   case GAUDI_EVENT_BMC_RESET_CMD:
+   gaudi_print_irq_info(hdev, event_type, false);
+   goto reset_device;
+
case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
gaudi_print_irq_info(hdev, event_type, false);
gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h 
b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
index 0a0fa57024f8..25f835ba2cd6 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
@@ -303,6 +303,7 @@ enum gaudi_async_event_id {
GAUDI_EVENT_NIC3_QP1 = 619,
GAUDI_EVENT_NIC4_QP0 = 620,
GAUDI_EVENT_NIC4_QP1 = 621,
+   GAUDI_EVENT_BMC_RESET_CMD = 646,
GAUDI_EVENT_PKT_QUEUE_OUT_SYNC = 647,
GAUDI_EVENT_FIX_POWER_ENV_S = 658,
GAUDI_EVENT_FIX_POWER_ENV_E = 659,
diff --git 
a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h 
b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
index 9106395eb920..079be1fb8f70 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
@@ -670,7 +670,7 @@ static struct gaudi_async_events_ids_map 
gaudi_irq_map_table[] = {
{ .fc_id = 643, .cpu_id = 492, .valid = 0, .name = "" },
{ .fc_id = 644, .cpu_id = 493, .valid = 0, .name = "" },
{ .fc_id = 645, .cpu_id = 494, .valid = 0, .name = "" },
-   { .fc_id = 646, .cpu_id = 495, .valid = 0, .name = "" },
+   { .fc_id = 646, .cpu_id = 495, .valid = 1, .name = "BMC_RST_CMD" },
{ .fc_id = 647, .cpu_id = 496, .valid = 0, .name = "" },
{ .fc_id = 648, .cpu_id = 497, .valid = 0, .name = "" },
{ .fc_id = 649, .cpu_id = 498, .valid = 0, .name = "" },
-- 
2.25.1



[PATCH 2/2] habanalabs/gaudi: always use single-msi mode

2021-03-09 Thread Oded Gabbay
The device can get into deadlock in case it use indirect mode for MSI
interrupts (multi-msi) and have hard-reset during interrupt storm.

To prevent that, always use direct mode which means single-msi mode.

The F/W will prevent the host from writing to the indirect MSI
registers to prevent any malicious user from causing this scenario.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 099c51350be6..0bcee675e1db 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1766,8 +1766,7 @@ static int gaudi_enable_msi(struct hl_device *hdev)
if (gaudi->hw_cap_initialized & HW_CAP_MSI)
return 0;
 
-   rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
-   PCI_IRQ_MSI);
+   rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
if (rc < 0) {
dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
return rc;
-- 
2.25.1



[PATCH] habanalabs: debugfs access to user mapped host addresses

2021-03-03 Thread Oded Gabbay
From: Ofir Bitton 

In order to have a better debuggability we allow debugfs access
to user mmu mapped host memory. Non-user host memory access will be
rejected.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/debugfs.c| 88 -
 drivers/misc/habanalabs/common/habanalabs.h | 12 ++-
 drivers/misc/habanalabs/common/mmu/mmu.c|  2 +
 drivers/misc/habanalabs/gaudi/gaudi.c   | 40 --
 drivers/misc/habanalabs/goya/goya.c | 44 +--
 5 files changed, 145 insertions(+), 41 deletions(-)

diff --git a/drivers/misc/habanalabs/common/debugfs.c 
b/drivers/misc/habanalabs/common/debugfs.c
index fd3135c422b6..fff79b31af32 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -457,21 +457,58 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 
addr)
return false;
 }
 
-static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
-   u64 *phys_addr)
+static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
+   u64 *phys_addr)
 {
+   struct hl_vm_phys_pg_pack *phys_pg_pack;
struct hl_ctx *ctx = hdev->compute_ctx;
-   int rc = 0;
+   struct hl_vm_hash_node *hnode;
+   struct hl_userptr *userptr;
+   enum vm_type_t *vm_type;
+   bool valid = false;
+   u64 end_address;
+   u32 range_size;
+   int i, rc = 0;
 
if (!ctx) {
dev_err(hdev->dev, "no ctx available\n");
return -EINVAL;
}
 
+   /* Verify address is mapped */
+   mutex_lock(&ctx->mem_hash_lock);
+   hash_for_each(ctx->mem_hash, i, hnode, node) {
+   vm_type = hnode->ptr;
+
+   if (*vm_type == VM_TYPE_USERPTR) {
+   userptr = hnode->ptr;
+   range_size = userptr->size;
+   } else {
+   phys_pg_pack = hnode->ptr;
+   range_size = phys_pg_pack->total_size;
+   }
+
+   end_address = virt_addr + size;
+   if ((virt_addr >= hnode->vaddr) &&
+   (end_address <= hnode->vaddr + range_size)) {
+   valid = true;
+   break;
+   }
+   }
+   mutex_unlock(&ctx->mem_hash_lock);
+
+   if (!valid) {
+   dev_err(hdev->dev,
+   "virt addr 0x%llx is not mapped\n",
+   virt_addr);
+   return -EINVAL;
+   }
+
rc = hl_mmu_va_to_pa(ctx, virt_addr, phys_addr);
if (rc) {
-   dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys 
addr\n",
-   virt_addr);
+   dev_err(hdev->dev,
+   "virt addr 0x%llx is not mapped to phys addr\n",
+   virt_addr);
rc = -EINVAL;
}
 
@@ -483,10 +520,11 @@ static ssize_t hl_data_read32(struct file *f, char __user 
*buf,
 {
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
+   u64 va, addr = entry->addr;
char tmp_buf[32];
-   u64 addr = entry->addr;
-   u32 val;
+   bool user_address;
ssize_t rc;
+   u32 val;
 
if (atomic_read(&hdev->in_reset)) {
dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
@@ -496,13 +534,15 @@ static ssize_t hl_data_read32(struct file *f, char __user 
*buf,
if (*ppos)
return 0;
 
-   if (hl_is_device_va(hdev, addr)) {
-   rc = device_va_to_pa(hdev, addr, &addr);
+   user_address = hl_is_device_va(hdev, addr);
+   if (user_address) {
+   va = addr;
+   rc = device_va_to_pa(hdev, addr, sizeof(val), &addr);
if (rc)
return rc;
}
 
-   rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val);
+   rc = hdev->asic_funcs->debugfs_read32(hdev, addr, user_address, &val);
if (rc) {
dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
return rc;
@@ -519,6 +559,7 @@ static ssize_t hl_data_write32(struct file *f, const char 
__user *buf,
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u64 addr = entry->addr;
+   bool user_address;
u32 value;
ssize_t rc;
 
@@ -531,13 +572,14 @@ static ssize_t hl_data_write32(struct file *f, const char 
__user *buf,
if (rc)
return rc;
 
-   if (hl_is_device_va(hdev, addr)) {
-   rc = device_va_to_pa(

[git pull resend] habanalabs fixes for 5.12-rc2

2021-03-03 Thread Oded Gabbay
Hi Greg,

This pull request contains some fixes of the habanalabs driver for
5.12-rc2.
Nothing too scary, more details are in the tag.

Thanks,
Oded

The following changes since commit fe07bfda2fb9cdef8a4d4008a409bb02f35f1bd8:

  Linux 5.12-rc1 (2021-02-28 16:05:19 -0800)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git 
tags/misc-habanalabs-fixes-2021-03-03

for you to fetch changes up to 3612e9f5df4c0c605ab2e4b569214786cc701cb9:

  habanalabs: fix debugfs address translation (2021-03-03 09:56:58 +0200)


This tag contains the following fixes for 5.12-rc2:

- Call put_pid() when the user releases the control device.
- Disable file operations after a PCI device is removed.
- Fix address translation when displaying the memory mappings
  through our debugFS.
- Remove unused dentry pointer for debugFS files.


Greg Kroah-Hartman (1):
  drivers: habanalabs: remove unused dentry pointer for debugfs files

Oded Gabbay (1):
  habanalabs: mark hl_eq_inc_ptr() as static

Tomer Tayar (2):
  habanalabs: Call put_pid() when releasing control device
  habanalabs: Disable file operations after device is removed

farah kassabri (1):
  habanalabs: fix debugfs address translation

 drivers/misc/habanalabs/common/debugfs.c  |  5 +--
 drivers/misc/habanalabs/common/device.c   | 40 ---
 drivers/misc/habanalabs/common/habanalabs.h   |  2 --
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 12 +++
 drivers/misc/habanalabs/common/irq.c  |  2 +-
 drivers/misc/habanalabs/common/mmu/mmu.c  | 38 ++---
 6 files changed, 75 insertions(+), 24 deletions(-)


[git pull resend] habanalabs fixes for 5.12-rc2

2021-03-03 Thread Oded Gabbay
Hi Greg,

This pull request contains some fixes of the habanalabs driver for
5.12-rc2.
Nothing too scary, more details are in the tag.

Thanks,
Oded

The following changes since commit fe07bfda2fb9cdef8a4d4008a409bb02f35f1bd8:

  Linux 5.12-rc1 (2021-02-28 16:05:19 -0800)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git 
tags/misc-habanalabs-fixes-2021-03-03

for you to fetch changes up to 3612e9f5df4c0c605ab2e4b569214786cc701cb9:

  habanalabs: fix debugfs address translation (2021-03-03 09:56:58 +0200)


This tag contains the following fixes for 5.12-rc2:

- Call put_pid() when the user releases the control device.
- Disable file operations after a PCI device is removed.
- Fix address translation when displaying the memory mappings
  through our debugFS.
- Remove unused dentry pointer for debugFS files.


Greg Kroah-Hartman (1):
  drivers: habanalabs: remove unused dentry pointer for debugfs files

Oded Gabbay (1):
  habanalabs: mark hl_eq_inc_ptr() as static

Tomer Tayar (2):
  habanalabs: Call put_pid() when releasing control device
  habanalabs: Disable file operations after device is removed

farah kassabri (1):
  habanalabs: fix debugfs address translation

 drivers/misc/habanalabs/common/debugfs.c  |  5 +--
 drivers/misc/habanalabs/common/device.c   | 40 ---
 drivers/misc/habanalabs/common/habanalabs.h   |  2 --
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 12 +++
 drivers/misc/habanalabs/common/irq.c  |  2 +-
 drivers/misc/habanalabs/common/mmu/mmu.c  | 38 ++---
 6 files changed, 75 insertions(+), 24 deletions(-)


[git pull] habanalabs fixes for 5.12-rc2

2021-03-02 Thread Oded Gabbay
Hi Greg,

This pull request contains some fixes of the habanalabs driver for
5.12-rc2.
Nothing too scary, more details are in the tag.

Thanks,
Oded

The following changes since commit fe07bfda2fb9cdef8a4d4008a409bb02f35f1bd8:

  Linux 5.12-rc1 (2021-02-28 16:05:19 -0800)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git 
tags/misc-habanalabs-fixes-2021-03-02

for you to fetch changes up to bbcbfc85556ed9b2a15955eb6f9b5b732e6b1795:

  habanalabs: fix debugfs address translation (2021-03-02 15:07:09 +0200)


This tag contains the following fixes for 5.12-rc2:

- Call put_pid() when the user releases the control device.
- Disable file operations after a PCI device is removed.
- Fix address translation when displaying the memory mappings
  through our debugFS.
- Remove unused dentry pointer for debugFS files.
- Mark function as static.


Greg Kroah-Hartman (1):
  drivers: habanalabs: remove unused dentry pointer for debugfs files

Oded Gabbay (1):
  habanalabs: mark hl_eq_inc_ptr() as static

Tomer Tayar (2):
  habanalabs: Call put_pid() when releasing control device
  habanalabs: Disable file operations after device is removed

farah kassabri (1):
  habanalabs: fix debugfs address translation

 drivers/misc/habanalabs/common/debugfs.c  |  5 +--
 drivers/misc/habanalabs/common/device.c   | 40 ---
 drivers/misc/habanalabs/common/habanalabs.h   |  2 --
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 12 +++
 drivers/misc/habanalabs/common/irq.c  |  2 +-
 drivers/misc/habanalabs/common/mmu/mmu.c  | 38 ++---
 6 files changed, 75 insertions(+), 24 deletions(-)


[PATCH] habanalabs: fix debugfs address translation

2021-03-02 Thread Oded Gabbay
From: farah kassabri 

when user uses virtual addresses to access dram through debugfs,
driver translate this address to physical and use it
for the access through the pcie bar.
in case dram page size is different than the dmmu
page size, we need to have special treatment
for adding the page offset to the actual address, which
is to use the dram page size mask to fetch the page offset
from the virtual address, instead of the dmmu last hop shift.

Signed-off-by: farah kassabri 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/mmu/mmu.c | 38 
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c 
b/drivers/misc/habanalabs/common/mmu/mmu.c
index 71703a32350f..5251b336215c 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -499,18 +499,32 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx 
*ctx, u64 virt_addr,
else /* HL_VA_RANGE_TYPE_DRAM */
p = &prop->dmmu;
 
-   /*
-* find the correct hop shift field in hl_mmu_properties structure
-* in order to determine the right maks for the page offset.
-*/
-   hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift);
-   p = (char *)p + hop0_shift_off;
-   p = (char *)p + ((hops->used_hops - 1) * sizeof(u64));
-   hop_shift = *(u64 *)p;
-   offset_mask = (1ull << hop_shift) - 1;
-   addr_mask = ~(offset_mask);
-   *phys_addr = (tmp_phys_addr & addr_mask) |
-   (virt_addr & offset_mask);
+   if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) &&
+   !is_power_of_2(prop->dram_page_size)) {
+   u32 bit;
+   u64 page_offset_mask;
+   u64 phys_addr_mask;
+
+   bit = __ffs64((u64)prop->dram_page_size);
+   page_offset_mask = ((1 << bit) - 1);
+   phys_addr_mask = ~page_offset_mask;
+   *phys_addr = (tmp_phys_addr & phys_addr_mask) |
+   (virt_addr & page_offset_mask);
+   } else {
+   /*
+* find the correct hop shift field in hl_mmu_properties
+* structure in order to determine the right masks
+* for the page offset.
+*/
+   hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift);
+   p = (char *)p + hop0_shift_off;
+   p = (char *)p + ((hops->used_hops - 1) * sizeof(u64));
+   hop_shift = *(u64 *)p;
+   offset_mask = (1ull << hop_shift) - 1;
+   addr_mask = ~(offset_mask);
+   *phys_addr = (tmp_phys_addr & addr_mask) |
+   (virt_addr & offset_mask);
+   }
 }
 
 int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr)
-- 
2.25.1



Re: [PATCH] banalabs: Switch to using the new API kobj_to_dev()

2021-02-28 Thread Oded Gabbay
On Tue, Feb 23, 2021 at 11:12 AM Yang Li  wrote:
>
> fixed the following coccicheck:
> ./drivers/misc/habanalabs/common/sysfs.c:347:60-61: WARNING opportunity
> for kobj_to_dev()
>
> Reported-by: Abaci Robot 
> Signed-off-by: Yang Li 
> ---
>  drivers/misc/habanalabs/common/sysfs.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/misc/habanalabs/common/sysfs.c 
> b/drivers/misc/habanalabs/common/sysfs.c
> index 4366d8f..79c1ddf 100644
> --- a/drivers/misc/habanalabs/common/sysfs.c
> +++ b/drivers/misc/habanalabs/common/sysfs.c
> @@ -344,7 +344,7 @@ static ssize_t eeprom_read_handler(struct file *filp, 
> struct kobject *kobj,
> struct bin_attribute *attr, char *buf, loff_t offset,
> size_t max_size)
>  {
> -   struct device *dev = container_of(kobj, struct device, kobj);
> +   struct device *dev = kobj_to_dev(kobj);
> struct hl_device *hdev = dev_get_drvdata(dev);
>     char *data;
> int rc;
> --
> 1.8.3.1
>

Reviewed-by: Oded Gabbay 
Applied to -next.

Thanks,
Oded


[PATCH 2/2] habanalabs: update hl_boot_if.h

2021-02-28 Thread Oded Gabbay
From: Ohad Sharabi 

Update to the latest version of the file as supplied by the F/W.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/include/common/hl_boot_if.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h 
b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index e87f5a98e193..d17185b6aea9 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -170,6 +170,15 @@
  * is set to the PI counter.
  * Initialized in: linux
  *
+ * CPU_BOOT_DEV_STS0_FW_LD_COM_EN  Flexible FW loading communication
+ * protocol is enabled.
+ * Initialized in: preboot
+ *
+ * CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN   FW iATU configuration is enabled.
+ * This bit if set, means the iATU has been
+ * configured and is ready for use.
+ * Initialized in: ppboot
+ *
  * CPU_BOOT_DEV_STS0_ENABLED   Device status register enabled.
  * This is a main indication that the
  * running FW populates the device status
@@ -195,6 +204,8 @@
 #define CPU_BOOT_DEV_STS0_CLK_GATE_EN  (1 << 13)
 #define CPU_BOOT_DEV_STS0_HBM_ECC_EN   (1 << 14)
 #define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN(1 << 15)
+#define CPU_BOOT_DEV_STS0_FW_LD_COM_EN (1 << 16)
+#define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN  (1 << 17)
 #define CPU_BOOT_DEV_STS0_ENABLED  (1 << 31)
 
 enum cpu_boot_status {
-- 
2.25.1



[PATCH 1/2] habanalabs: skip DISABLE PCI packet to FW on heartbeat

2021-02-28 Thread Oded Gabbay
From: Ohad Sharabi 

if reset is due to heartbeat, device CPU is no responsive in which
case no point sending PCI disable message to it.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../habanalabs/common/command_submission.c|  4 +-
 drivers/misc/habanalabs/common/debugfs.c  |  2 +-
 drivers/misc/habanalabs/common/device.c   | 25 +++-
 drivers/misc/habanalabs/common/habanalabs.h   | 20 +-
 drivers/misc/habanalabs/common/sysfs.c|  4 +-
 drivers/misc/habanalabs/gaudi/gaudi.c | 40 ---
 drivers/misc/habanalabs/goya/goya.c   |  8 ++--
 7 files changed, 59 insertions(+), 44 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index 7d0d3e61a6e7..34a98e3c3bf8 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -620,7 +620,7 @@ static void cs_timedout(struct work_struct *work)
cs_put(cs);
 
if (hdev->reset_on_lockup)
-   hl_device_reset(hdev, false, false);
+   hl_device_reset(hdev, 0);
else
hdev->needs_reset = true;
 }
@@ -1473,7 +1473,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union 
hl_cs_args *args,
 
 out:
if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
-   hl_device_reset(hdev, false, false);
+   hl_device_reset(hdev, 0);
 
return rc;
 }
diff --git a/drivers/misc/habanalabs/common/debugfs.c 
b/drivers/misc/habanalabs/common/debugfs.c
index d4a49d6d1753..94d574953dd0 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -887,7 +887,7 @@ static ssize_t hl_stop_on_err_write(struct file *f, const 
char __user *buf,
 
hdev->stop_on_err = value ? 1 : 0;
 
-   hl_device_reset(hdev, false, false);
+   hl_device_reset(hdev, 0);
 
return count;
 }
diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index 2cbe9d04c229..756d7f2a7a2d 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -72,7 +72,7 @@ static void hpriv_release(struct kref *ref)
kfree(hpriv);
 
if (hdev->reset_upon_device_release)
-   hl_device_reset(hdev, false, false);
+   hl_device_reset(hdev, 0);
 }
 
 void hl_hpriv_get(struct hl_fpriv *hpriv)
@@ -276,7 +276,7 @@ static void device_hard_reset_pending(struct work_struct 
*work)
struct hl_device *hdev = device_reset_work->hdev;
int rc;
 
-   rc = hl_device_reset(hdev, true, true);
+   rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD);
if ((rc == -EBUSY) && !hdev->device_fini_pending) {
dev_info(hdev->dev,
"Could not reset device. will try again in %u seconds",
@@ -478,7 +478,7 @@ static void hl_device_heartbeat(struct work_struct *work)
goto reschedule;
 
dev_err(hdev->dev, "Device heartbeat failed!\n");
-   hl_device_reset(hdev, true, false);
+   hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT);
 
return;
 
@@ -802,7 +802,7 @@ int hl_device_resume(struct hl_device *hdev)
hdev->disabled = false;
atomic_set(&hdev->in_reset, 0);
 
-   rc = hl_device_reset(hdev, true, false);
+   rc = hl_device_reset(hdev, HL_RESET_HARD);
if (rc) {
dev_err(hdev->dev, "Failed to reset device during resume\n");
goto disable_device;
@@ -898,9 +898,7 @@ static int device_kill_open_processes(struct hl_device 
*hdev, u32 timeout)
  * hl_device_reset - reset the device
  *
  * @hdev: pointer to habanalabs device structure
- * @hard_reset: should we do hard reset to all engines or just reset the
- *  compute/dma engines
- * @from_hard_reset_thread: is the caller the hard-reset thread
+ * @flags: reset flags.
  *
  * Block future CS and wait for pending CS to be enqueued
  * Call ASIC H/W fini
@@ -912,10 +910,10 @@ static int device_kill_open_processes(struct hl_device 
*hdev, u32 timeout)
  *
  * Returns 0 for success or an error on failure.
  */
-int hl_device_reset(struct hl_device *hdev, bool hard_reset,
-   bool from_hard_reset_thread)
+int hl_device_reset(struct hl_device *hdev, u32 flags)
 {
u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
+   bool hard_reset, from_hard_reset_thread;
int i, rc;
 
if (!hdev->init_done) {
@@ -924,6 +922,9 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
return 0;
}
 
+   hard_reset = (flags & HL_RESET_HARD) != 0;
+   from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0;
+
if ((

[PATCH 4/4] habanalabs: replace GFP_ATOMIC with GFP_KERNEL

2021-02-25 Thread Oded Gabbay
From: Ofir Bitton 

As there are incorrect assumptions in which some of the
initialization and data path flows cannot sleep, most allocations
are being done using GFP_ATOMIC.
We modify the code to use GFP_ATOMIC only when realy needed, as
sleepable flow should use GFP_KERNEL.
In addition add a fallback to allocate memory using GFP_KERNEL,
once ATOMIC allocation fails.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../misc/habanalabs/common/command_buffer.c   | 12 +++---
 .../habanalabs/common/command_submission.c| 22 ++-
 drivers/misc/habanalabs/common/device.c   |  2 +-
 drivers/misc/habanalabs/common/memory.c   | 10 +++--
 drivers/misc/habanalabs/gaudi/gaudi.c |  2 +-
 drivers/misc/habanalabs/goya/goya.c   |  2 +-
 6 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_buffer.c 
b/drivers/misc/habanalabs/common/command_buffer.c
index d9adb9a5e4d8..719168c980a4 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -181,7 +181,7 @@ static void cb_release(struct kref *ref)
 static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
int ctx_id, bool internal_cb)
 {
-   struct hl_cb *cb;
+   struct hl_cb *cb = NULL;
u32 cb_offset;
void *p;
 
@@ -193,9 +193,10 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, 
u32 cb_size,
 * the kernel's copy. Hence, we must never sleep in this code section
 * and must use GFP_ATOMIC for all memory allocations.
 */
-   if (ctx_id == HL_KERNEL_ASID_ID)
+   if (ctx_id == HL_KERNEL_ASID_ID && !hdev->disabled)
cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
-   else
+
+   if (!cb)
cb = kzalloc(sizeof(*cb), GFP_KERNEL);
 
if (!cb)
@@ -214,6 +215,9 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, 
u32 cb_size,
} else if (ctx_id == HL_KERNEL_ASID_ID) {
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
&cb->bus_address, GFP_ATOMIC);
+   if (!p)
+   p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
+   cb_size, &cb->bus_address, GFP_KERNEL);
} else {
p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
&cb->bus_address,
@@ -310,6 +314,8 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr 
*mgr,
 
spin_lock(&mgr->cb_lock);
rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
+   if (rc < 0)
+   rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL);
spin_unlock(&mgr->cb_lock);
 
if (rc < 0) {
diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index 98fdb98ff4e4..7d0d3e61a6e7 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -638,6 +638,9 @@ static int allocate_cs(struct hl_device *hdev, struct 
hl_ctx *ctx,
cntr = &hdev->aggregated_cs_counters;
 
cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
+   if (!cs)
+   cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+
if (!cs) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
@@ -657,6 +660,9 @@ static int allocate_cs(struct hl_device *hdev, struct 
hl_ctx *ctx,
spin_lock_init(&cs->job_lock);
 
cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
+   if (!cs_cmpl)
+   cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL);
+
if (!cs_cmpl) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
@@ -698,6 +704,10 @@ static int allocate_cs(struct hl_device *hdev, struct 
hl_ctx *ctx,
 
cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
+   if (!cs->jobs_in_queue_cnt)
+   cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
+   sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
+
if (!cs->jobs_in_queue_cnt) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
@@ -927,6 +937,9 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
struct hl_cs_job *job;
 
job = kzalloc(sizeof(*job), GFP_ATOMIC);
+   if (!job)
+   j

[PATCH 3/4] habanalabs/gaudi: update extended async event header

2021-02-25 Thread Oded Gabbay
From: Ofir Bitton 

Update to the latest definition of the firmware

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../include/gaudi/gaudi_async_ids_map_extended.h   | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git 
a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h 
b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
index 737176ba06fb..9106395eb920 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_ids_map_extended.h
@@ -301,10 +301,10 @@ static struct gaudi_async_events_ids_map 
gaudi_irq_map_table[] = {
{ .fc_id = 274, .cpu_id = 128, .valid = 0, .name = "" },
{ .fc_id = 275, .cpu_id = 128, .valid = 0, .name = "" },
{ .fc_id = 276, .cpu_id = 128, .valid = 0, .name = "" },
-   { .fc_id = 277, .cpu_id = 129, .valid = 0, .name = "" },
-   { .fc_id = 278, .cpu_id = 129, .valid = 0, .name = "" },
-   { .fc_id = 279, .cpu_id = 129, .valid = 0, .name = "" },
-   { .fc_id = 280, .cpu_id = 129, .valid = 0, .name = "" },
+   { .fc_id = 277, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_0" },
+   { .fc_id = 278, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_1" },
+   { .fc_id = 279, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_2" },
+   { .fc_id = 280, .cpu_id = 129, .valid = 1, .name = "DMA_IF_SEI_3" },
{ .fc_id = 281, .cpu_id = 130, .valid = 0, .name = "" },
{ .fc_id = 282, .cpu_id = 131, .valid = 0, .name = "" },
{ .fc_id = 283, .cpu_id = 132, .valid = 0, .name = "" },
@@ -681,7 +681,7 @@ static struct gaudi_async_events_ids_map 
gaudi_irq_map_table[] = {
{ .fc_id = 654, .cpu_id = 503, .valid = 0, .name = "" },
{ .fc_id = 655, .cpu_id = 504, .valid = 0, .name = "" },
{ .fc_id = 656, .cpu_id = 505, .valid = 0, .name = "" },
-   { .fc_id = 657, .cpu_id = 506, .valid = 0, .name = "" },
+   { .fc_id = 657, .cpu_id = 506, .valid = 1, .name = "PKT_QUEUE_ASYNC" },
{ .fc_id = 658, .cpu_id = 507, .valid = 1, .name = "FIX_POWER_ENV_S" },
{ .fc_id = 659, .cpu_id = 508, .valid = 1, .name = "FIX_POWER_ENV_E" },
{ .fc_id = 660, .cpu_id = 509, .valid = 1,
-- 
2.25.1



[PATCH 2/4] habanalabs: return current power via INFO IOCTL

2021-02-25 Thread Oded Gabbay
From: Sagiv Ozeri 

Add driver implementation for reading the current power from the device
CPU F/W.

Signed-off-by: Sagiv Ozeri 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c  | 23 +++
 drivers/misc/habanalabs/common/habanalabs.h   |  1 +
 .../misc/habanalabs/common/habanalabs_ioctl.c | 22 ++
 .../misc/habanalabs/include/common/cpucp_if.h |  5 
 include/uapi/misc/habanalabs.h|  9 
 5 files changed, 60 insertions(+)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 6f3692bf5eff..2a58edaf984a 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -565,6 +565,29 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 
pll_index,
return rc;
 }
 
+int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
+{
+   struct cpucp_packet pkt;
+   u64 result;
+   int rc;
+
+   memset(&pkt, 0, sizeof(pkt));
+
+   pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
+   CPUCP_PKT_CTL_OPCODE_SHIFT);
+
+   rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+   HL_CPUCP_INFO_TIMEOUT_USEC, &result);
+   if (rc) {
+   dev_err(hdev->dev, "Failed to read power, error %d\n", rc);
+   return rc;
+   }
+
+   *power = result;
+
+   return rc;
+}
+
 static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 {
/* Some of the status codes below are deprecated in newer f/w
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 9129582e9339..33ada9425a62 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2363,6 +2363,7 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
u64 *total_energy);
 int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
u16 *pll_freq_arr);
+int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c 
b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 95a0b144604d..3f762e021428 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -444,6 +444,25 @@ static int pll_frequency_info(struct hl_fpriv *hpriv, 
struct hl_info_args *args)
min((size_t) max_size, sizeof(freq_info))) ? -EFAULT : 0;
 }
 
+static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+   struct hl_device *hdev = hpriv->hdev;
+   u32 max_size = args->return_size;
+   struct hl_power_info power_info = {0};
+   void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+   int rc;
+
+   if ((!max_size) || (!out))
+   return -EINVAL;
+
+   rc = hl_fw_cpucp_power_get(hdev, &power_info.power);
+   if (rc)
+   return rc;
+
+   return copy_to_user(out, &power_info,
+   min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
 {
@@ -524,6 +543,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void 
*data,
case HL_INFO_PLL_FREQUENCY:
return pll_frequency_info(hpriv, args);
 
+   case HL_INFO_POWER:
+   return power_info(hpriv, args);
+
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h 
b/drivers/misc/habanalabs/include/common/cpucp_if.h
index bf4e7900d8c8..6ba480a316ce 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -296,6 +296,9 @@ enum pq_init_status {
  *   The result is composed of 4 outputs, each is 16-bit
  *   frequency in MHz.
  *
+ * CPUCP_PACKET_POWER_GET
+ *   Fetch the present power consumption of the device (Current * Voltage).
+ *
  */
 
 enum cpucp_packet_id {
@@ -329,6 +332,8 @@ enum cpucp_packet_id {
CPUCP_PACKET_PCIE_REPLAY_CNT_GET,   /* internal */
CPUCP_PACKET_TOTAL_ENERGY_GET,  /* internal */
CPUCP_PACKET_PLL_INFO_GET,  /* internal */
+   CPUCP_PACKET_NIC_STATUS,/* internal */
+   CPUCP_PACKET_POWER_GET, /* internal */
 };
 
 #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
diff --git a/in

[PATCH 1/4] habanalabs: support HW blocks vm show

2021-02-25 Thread Oded Gabbay
From: Sagiv Ozeri 

Improve "vm" debugfs node to print also the virtual addresses which are
currently mapped to HW blocks in the device.

Signed-off-by: Sagiv Ozeri 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../ABI/testing/debugfs-driver-habanalabs |  2 +-
 drivers/misc/habanalabs/common/context.c  | 14 +++-
 drivers/misc/habanalabs/common/debugfs.c  | 16 +
 drivers/misc/habanalabs/common/habanalabs.h   | 24 +++
 drivers/misc/habanalabs/common/memory.c   | 65 +--
 5 files changed, 113 insertions(+), 8 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs 
b/Documentation/ABI/testing/debugfs-driver-habanalabs
index d447a611c41b..f9e233cbdc37 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -174,7 +174,7 @@ Date:   Jan 2019
 KernelVersion:  5.1
 Contact:ogab...@kernel.org
 Description:Displays a list with information about all the active virtual
-address mappings per ASID
+address mappings per ASID and all user mappings of HW blocks
 
 What:   /sys/kernel/debug/habanalabs/hl/stop_on_err
 Date:   Mar 2020
diff --git a/drivers/misc/habanalabs/common/context.c 
b/drivers/misc/habanalabs/common/context.c
index cda871afb8f4..62d705889ca8 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -20,6 +20,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 */
hl_pending_cb_list_flush(ctx);
 
+   /* Release all allocated HW block mapped list entries and destroy
+* the mutex.
+*/
+   hl_hw_block_mem_fini(ctx);
+
/*
 * If we arrived here, there are no jobs waiting for this context
 * on its queues so we can safely remove it.
@@ -160,13 +165,15 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx 
*ctx, bool is_kernel_ctx)
if (!ctx->cs_pending)
return -ENOMEM;
 
+   hl_hw_block_mem_init(ctx);
+
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
rc = hl_vm_ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "Failed to init mem ctx module\n");
rc = -ENOMEM;
-   goto err_free_cs_pending;
+   goto err_hw_block_mem_fini;
}
 
rc = hdev->asic_funcs->ctx_init(ctx);
@@ -179,7 +186,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, 
bool is_kernel_ctx)
if (!ctx->asid) {
dev_err(hdev->dev, "No free ASID, failed to create 
context\n");
rc = -ENOMEM;
-   goto err_free_cs_pending;
+   goto err_hw_block_mem_fini;
}
 
rc = hl_vm_ctx_init(ctx);
@@ -214,7 +221,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, 
bool is_kernel_ctx)
 err_asid_free:
if (ctx->asid != HL_KERNEL_ASID_ID)
hl_asid_free(hdev, ctx->asid);
-err_free_cs_pending:
+err_hw_block_mem_fini:
+   hl_hw_block_mem_fini(ctx);
kfree(ctx->cs_pending);
 
return rc;
diff --git a/drivers/misc/habanalabs/common/debugfs.c 
b/drivers/misc/habanalabs/common/debugfs.c
index df847a6d19f4..d4a49d6d1753 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -229,6 +229,7 @@ static int vm_show(struct seq_file *s, void *data)
 {
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+   struct hl_vm_hw_block_list_node *lnode;
struct hl_ctx *ctx;
struct hl_vm *vm;
struct hl_vm_hash_node *hnode;
@@ -272,6 +273,21 @@ static int vm_show(struct seq_file *s, void *data)
}
mutex_unlock(&ctx->mem_hash_lock);
 
+   if (ctx->asid != HL_KERNEL_ASID_ID &&
+   !list_empty(&ctx->hw_block_mem_list)) {
+   seq_puts(s, "\nhw_block mappings:\n\n");
+   seq_puts(s, "virtual addresssizeHW block 
id\n");
+   seq_puts(s, 
"---\n");
+   mutex_lock(&ctx->hw_block_list_lock);
+   list_for_each_entry(lnode, &ctx->hw_block_mem_list,
+   node) {
+   seq_printf(s,
+   "0x%-14lx   %-6u  %-9u\n",
+   lnode->vaddr, lnode->size, lnode->id);
+   }
+   mutex_unlock(&ctx-&

[PATCH] habanalabs: use a single FW loading bringup flag

2021-02-24 Thread Oded Gabbay
From: Ofir Bitton 

For simplicity, use a single bringup flag indicating which FW
binaries should loaded to device.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c| 10 +++---
 drivers/misc/habanalabs/common/habanalabs.h | 11 +++
 drivers/misc/habanalabs/common/habanalabs_drv.c |  3 +--
 drivers/misc/habanalabs/gaudi/gaudi.c   |  2 +-
 drivers/misc/habanalabs/goya/goya.c |  2 +-
 5 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 09706c571e95..6f3692bf5eff 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -623,7 +623,11 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 
cpu_boot_status_reg,
u32 status, security_status;
int rc;
 
-   if (!hdev->cpu_enable)
+   /* pldm was added for cases in which we use preboot on pldm and want
+* to load boot fit, but we can't wait for preboot because it runs
+* very slowly
+*/
+   if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm)
return 0;
 
/* Need to check two possible scenarios:
@@ -710,7 +714,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 
cpu_boot_status_reg,
u32 status;
int rc;
 
-   if (!(hdev->fw_loading & FW_TYPE_BOOT_CPU))
+   if (!(hdev->fw_components & FW_TYPE_BOOT_CPU))
return 0;
 
dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
@@ -801,7 +805,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 
cpu_boot_status_reg,
goto out;
}
 
-   if (!(hdev->fw_loading & FW_TYPE_LINUX)) {
+   if (!(hdev->fw_components & FW_TYPE_LINUX)) {
dev_info(hdev->dev, "Skip loading Linux F/W\n");
goto out;
}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index ee94953f9ea2..442eca71fd2a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -171,15 +171,19 @@ enum hl_fw_component {
 };
 
 /**
- * enum hl_fw_types - F/W types to load
+ * enum hl_fw_types - F/W types present in the system
  * @FW_TYPE_LINUX: Linux image for device CPU
  * @FW_TYPE_BOOT_CPU: Boot image for device CPU
+ * @FW_TYPE_PREBOOT_CPU: Indicates pre-loaded CPUs are present in the system
+ *   (preboot, ppboot etc...)
  * @FW_TYPE_ALL_TYPES: Mask for all types
  */
 enum hl_fw_types {
FW_TYPE_LINUX = 0x1,
FW_TYPE_BOOT_CPU = 0x2,
-   FW_TYPE_ALL_TYPES = (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU)
+   FW_TYPE_PREBOOT_CPU = 0x4,
+   FW_TYPE_ALL_TYPES =
+   (FW_TYPE_LINUX | FW_TYPE_BOOT_CPU | FW_TYPE_PREBOOT_CPU)
 };
 
 /**
@@ -2066,10 +2070,9 @@ struct hl_device {
 
/* Parameters for bring-up */
u64 nic_ports_mask;
-   u64 fw_loading;
+   u64 fw_components;
u8  mmu_enable;
u8  mmu_huge_page_opt;
-   u8  cpu_enable;
u8  reset_pcilink;
u8  cpu_queues_enable;
u8  pldm;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c 
b/drivers/misc/habanalabs/common/habanalabs_drv.c
index f695ea6253c1..59896566dca1 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -234,8 +234,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file 
*filp)
 
 static void set_driver_behavior_per_device(struct hl_device *hdev)
 {
-   hdev->cpu_enable = 1;
-   hdev->fw_loading = FW_TYPE_ALL_TYPES;
+   hdev->fw_components = FW_TYPE_ALL_TYPES;
hdev->cpu_queues_enable = 1;
hdev->heartbeat = 1;
hdev->mmu_enable = 1;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index c1f00237273c..4f39737b4808 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -3701,7 +3701,7 @@ static int gaudi_init_cpu(struct hl_device *hdev)
struct gaudi_device *gaudi = hdev->asic_specific;
int rc;
 
-   if (!hdev->cpu_enable)
+   if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
return 0;
 
if (gaudi->hw_cap_initialized & HW_CAP_CPU)
diff --git a/drivers/misc/habanalabs/goya/goya.c 
b/drivers/misc/habanalabs/goya/goya.c
index a40c428fed94..20abed6ef7e6 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+

[PATCH 2/3] habanalabs: wait for interrupt support

2021-02-22 Thread Oded Gabbay
From: Ofir Bitton 

In order to support command submissions from user space, the driver
need to add support for user interrupt completions. The driver will
allow multiple user threads to wait for an interrupt and perform
a comparison with a given user address once interrupt expires.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../habanalabs/common/command_submission.c| 213 +-
 drivers/misc/habanalabs/common/device.c   |  44 ++--
 drivers/misc/habanalabs/common/habanalabs.h   |  28 ++-
 .../misc/habanalabs/common/habanalabs_ioctl.c |   2 +-
 drivers/misc/habanalabs/common/irq.c  |  22 +-
 include/uapi/misc/habanalabs.h|  42 +++-
 6 files changed, 322 insertions(+), 29 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index 7bd4a03b3429..138cb77420bd 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -778,6 +778,44 @@ void hl_pending_cb_list_flush(struct hl_ctx *ctx)
}
 }
 
+static void
+wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
+{
+   struct hl_user_pending_interrupt *pend;
+
+   spin_lock(&interrupt->wait_list_lock);
+   list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
+   pend->fence.error = -EIO;
+   complete_all(&pend->fence.completion);
+   }
+   spin_unlock(&interrupt->wait_list_lock);
+}
+
+void hl_release_pending_user_interrupts(struct hl_device *hdev)
+{
+   struct asic_fixed_properties *prop = &hdev->asic_prop;
+   struct hl_user_interrupt *interrupt;
+   int i;
+
+   if (!prop->user_interrupt_count)
+   return;
+
+   /* We iterate through the user interrupt requests and waking up all
+* user threads waiting for interrupt completion. We iterate the
+* list under a lock, this is why all user threads, once awake,
+* will wait on the same lock and will release the waiting object upon
+* unlock.
+*/
+
+   for (i = 0 ; i < prop->user_interrupt_count ; i++) {
+   interrupt = &hdev->user_interrupt[i];
+   wake_pending_user_interrupt_threads(interrupt);
+   }
+
+   interrupt = &hdev->common_user_interrupt;
+   wake_pending_user_interrupt_threads(interrupt);
+}
+
 static void job_wq_completion(struct work_struct *work)
 {
struct hl_cs_job *job = container_of(work, struct hl_cs_job,
@@ -1818,7 +1856,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, 
struct hl_ctx *ctx,
return rc;
 }
 
-int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 {
struct hl_device *hdev = hpriv->hdev;
union hl_wait_cs_args *args = data;
@@ -1873,3 +1911,176 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 
return 0;
 }
+
+static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
+   u32 timeout_us, u64 user_address,
+   u32 target_value, u16 interrupt_offset,
+   enum hl_cs_wait_status *status)
+{
+   struct hl_user_pending_interrupt *pend;
+   struct hl_user_interrupt *interrupt;
+   unsigned long timeout;
+   long completion_rc;
+   u32 completion_value;
+   int rc = 0;
+
+   if (timeout_us == MAX_SCHEDULE_TIMEOUT)
+   timeout = timeout_us;
+   else
+   timeout = usecs_to_jiffies(timeout_us);
+
+   hl_ctx_get(hdev, ctx);
+
+   pend = kmalloc(sizeof(*pend), GFP_ATOMIC);
+   if (!pend) {
+   hl_ctx_put(ctx);
+   return -ENOMEM;
+   }
+
+   hl_fence_init(&pend->fence, ULONG_MAX);
+
+   if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID)
+   interrupt = &hdev->common_user_interrupt;
+   else
+   interrupt = &hdev->user_interrupt[interrupt_offset];
+
+   spin_lock(&interrupt->wait_list_lock);
+   if (!hl_device_operational(hdev, NULL)) {
+   rc = -EPERM;
+   goto unlock_and_free_fence;
+   }
+
+   if (copy_from_user(&completion_value, (void *) user_address, 4)) {
+   dev_err(hdev->dev,
+   "Failed to copy completion value from user\n");
+   rc = -EFAULT;
+   goto unlock_and_free_fence;
+   }
+
+   if (completion_value >= target_value)
+   *status = CS_WAIT_STATUS_COMPLETED;
+   else
+   *status = CS_WAIT_STATUS_BUSY;
+
+   if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
+   goto unlock_and_free_fence;
+
+   /* Add pending user in

[PATCH 3/3] habanalabs: use correct define for 32-bit max value

2021-02-22 Thread Oded Gabbay
Timeout in wait for interrupt is in 32-bit variable so we need to use
the correct maximum value to compare.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/command_submission.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index 138cb77420bd..98fdb98ff4e4 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -1924,7 +1924,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device 
*hdev, struct hl_ctx *ctx,
u32 completion_value;
int rc = 0;
 
-   if (timeout_us == MAX_SCHEDULE_TIMEOUT)
+   if (timeout_us == U32_MAX)
timeout = timeout_us;
else
timeout = usecs_to_jiffies(timeout_us);
-- 
2.25.1



[PATCH 1/3] habanalabs: enable all IRQs for user interrupt support

2021-02-22 Thread Oded Gabbay
From: Ofir Bitton 

In order to support user interrupts, driver must enable all MSI-X
interrupts for any case user will trigger them. We differentiate
between a valid user interrupt and a non valid one.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c | 20 +--
 drivers/misc/habanalabs/common/habanalabs.h | 16 +
 drivers/misc/habanalabs/common/irq.c| 40 +
 3 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index 6948a1c54083..06395c79f07d 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1223,7 +1223,7 @@ int hl_device_reset(struct hl_device *hdev, bool 
hard_reset,
  */
 int hl_device_init(struct hl_device *hdev, struct class *hclass)
 {
-   int i, rc, cq_cnt, cq_ready_cnt;
+   int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
char *name;
bool add_cdev_sysfs_on_err = false;
 
@@ -1312,6 +1312,19 @@ int hl_device_init(struct hl_device *hdev, struct class 
*hclass)
hdev->completion_queue[i].cq_idx = i;
}
 
+   user_interrupt_cnt = hdev->asic_prop.user_interrupt_count;
+
+   if (user_interrupt_cnt) {
+   hdev->user_interrupt = kcalloc(user_interrupt_cnt,
+   sizeof(*hdev->user_interrupt),
+   GFP_KERNEL);
+
+   if (!hdev->user_interrupt) {
+   rc = -ENOMEM;
+   goto cq_fini;
+   }
+   }
+
/*
 * Initialize the event queue. Must be done before hw_init,
 * because there the address of the event queue is being
@@ -1320,7 +1333,7 @@ int hl_device_init(struct hl_device *hdev, struct class 
*hclass)
rc = hl_eq_init(hdev, &hdev->event_queue);
if (rc) {
dev_err(hdev->dev, "failed to initialize event queue\n");
-   goto cq_fini;
+   goto user_interrupts_fini;
}
 
/* MMU S/W must be initialized before kernel context is created */
@@ -1458,6 +1471,8 @@ int hl_device_init(struct hl_device *hdev, struct class 
*hclass)
hl_mmu_fini(hdev);
 eq_fini:
hl_eq_fini(hdev, &hdev->event_queue);
+user_interrupts_fini:
+   kfree(hdev->user_interrupt);
 cq_fini:
for (i = 0 ; i < cq_ready_cnt ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]);
@@ -1595,6 +1610,7 @@ void hl_device_fini(struct hl_device *hdev)
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]);
kfree(hdev->completion_queue);
+   kfree(hdev->user_interrupt);
 
hl_hw_queues_destroy(hdev);
 
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 046bb44f70f9..bb6cb88d7257 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -412,6 +412,7 @@ struct hl_mmu_properties {
  * @first_available_user_msix_interrupt: first available msix interrupt
  *   reserved for the user
  * @first_available_cq: first available CQ for the user.
+ * @user_interrupt_count: number of user interrupts.
  * @tpc_enabled_mask: which TPCs are enabled.
  * @completion_queues_count: number of completion queues.
  * @fw_security_disabled: true if security measures are disabled in firmware,
@@ -475,6 +476,7 @@ struct asic_fixed_properties {
u16 first_available_user_mon[HL_MAX_DCORES];
u16 first_available_user_msix_interrupt;
u16 first_available_cq[HL_MAX_DCORES];
+   u16 user_interrupt_count;
u8  tpc_enabled_mask;
u8  completion_queues_count;
u8  fw_security_disabled;
@@ -689,6 +691,16 @@ struct hl_cq {
atomic_tfree_slots_cnt;
 };
 
+/**
+ * struct hl_user_interrupt - holds user interrupt information
+ * @hdev: pointer to the device structure
+ * @interrupt_id: msix interrupt id
+ */
+struct hl_user_interrupt {
+   struct hl_device*hdev;
+   u32 interrupt_id;
+};
+
 /**
  * struct hl_eq - describes the event queue (single one per device)
  * @hdev: pointer to the device structure
@@ -1821,6 +1833,7 @@ struct hl_mmu_funcs {
  * @asic_name: ASIC specific name.
  * @asic_type: ASIC specific type.
  * @completion_queue: array of hl_cq.
+ * @user_interrupt: array of hl_user_interrupt.
  * @cq_wq: work queues of completion queues for executing work in process
  * context.
  * @eq_wq: w

[PATCH 1/2] habanalabs: change default CS timeout to 30 seconds

2021-02-22 Thread Oded Gabbay
Because our graph contains network operations, we need to account
for delay in the network.

5 seconds timeout per CS is not enough to account for that.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/habanalabs_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c 
b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 032d114f01ea..f695ea6253c1 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -27,13 +27,13 @@ static struct class *hl_class;
 static DEFINE_IDR(hl_devs_idr);
 static DEFINE_MUTEX(hl_devs_idr_lock);
 
-static int timeout_locked = 5;
+static int timeout_locked = 30;
 static int reset_on_lockup = 1;
 static int memory_scrub = 1;
 
 module_param(timeout_locked, int, 0444);
 MODULE_PARM_DESC(timeout_locked,
-   "Device lockup timeout in seconds (0 = disabled, default 5s)");
+   "Device lockup timeout in seconds (0 = disabled, default 30s)");
 
 module_param(reset_on_lockup, int, 0444);
 MODULE_PARM_DESC(reset_on_lockup,
-- 
2.25.1



[PATCH 2/2] habanalabs: reset device in case of sync error

2021-02-22 Thread Oded Gabbay
From: Ohad Sharabi 

As the F/wW is the first to detect out of sync event, a new event is
added to notify the driver on such event. In which case the driver
performs hard reset.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 18 +++
 drivers/misc/habanalabs/goya/goya.c   | 23 +++
 .../misc/habanalabs/include/common/cpucp_if.h |  9 
 .../include/gaudi/gaudi_async_events.h|  1 +
 .../include/goya/goya_async_events.h  |  1 +
 5 files changed, 52 insertions(+)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 9152242778f5..c1f00237273c 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7097,6 +7097,15 @@ static void gaudi_print_irq_info(struct hl_device *hdev, 
u16 event_type,
}
 }
 
+static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
+   struct cpucp_pkt_sync_err *sync_err)
+{
+   struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
+
+   dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, 
ci=%u\n",
+   sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
+}
+
 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
 {
struct gaudi_device *gaudi = hdev->asic_specific;
@@ -7552,6 +7561,15 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
event_type, cause);
break;
 
+   case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
+   gaudi_print_irq_info(hdev, event_type, false);
+   gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
+   if (hdev->hard_reset_on_fw_events)
+   hl_device_reset(hdev, true, false);
+   else
+   hl_fw_unmask_irq(hdev, event_type);
+   break;
+
default:
dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
event_type);
diff --git a/drivers/misc/habanalabs/goya/goya.c 
b/drivers/misc/habanalabs/goya/goya.c
index ed566c52ccaa..a40c428fed94 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4401,6 +4401,8 @@ static const char *_goya_get_event_desc(u16 event_type)
return "THERMAL_ENV_S";
case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
return "THERMAL_ENV_E";
+   case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
+   return "QUEUE_OUT_OF_SYNC";
default:
return "N/A";
}
@@ -4483,6 +4485,9 @@ static void goya_get_event_desc(u16 event_type, char 
*desc, size_t size)
index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
snprintf(desc, size, _goya_get_event_desc(event_type), index);
break;
+   case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
+   snprintf(desc, size, _goya_get_event_desc(event_type));
+   break;
default:
snprintf(desc, size, _goya_get_event_desc(event_type));
break;
@@ -4534,6 +4539,15 @@ static void goya_print_mmu_error_info(struct hl_device 
*hdev)
}
 }
 
+static void goya_print_out_of_sync_info(struct hl_device *hdev,
+   struct cpucp_pkt_sync_err *sync_err)
+{
+   struct hl_hw_queue *q = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
+
+   dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, 
ci=%u\n",
+   sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
+}
+
 static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
bool razwi)
 {
@@ -4754,6 +4768,15 @@ void goya_handle_eqe(struct hl_device *hdev, struct 
hl_eq_entry *eq_entry)
goya_unmask_irq(hdev, event_type);
break;
 
+   case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
+   goya_print_irq_info(hdev, event_type, false);
+   goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
+   if (hdev->hard_reset_on_fw_events)
+   hl_device_reset(hdev, true, false);
+   else
+   hl_fw_unmask_irq(hdev, event_type);
+   break;
+
default:
dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
event_type);
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h 
b/drivers/misc/habanalabs/include/common/cpucp_if.h
index b77c1c16c32c..bf4e7900d8c8 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/

[PATCH 5/5] habanalabs: print if device is used on FD close

2021-02-22 Thread Oded Gabbay
Notify to the user that although he closed the FD, the device is
still in use because there are live CS and/or memory mappings (mmaps).

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c | 8 +---
 drivers/misc/habanalabs/common/habanalabs.h | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index 48d301a03d62..6948a1c54083 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -80,9 +80,9 @@ void hl_hpriv_get(struct hl_fpriv *hpriv)
kref_get(&hpriv->refcount);
 }
 
-void hl_hpriv_put(struct hl_fpriv *hpriv)
+int hl_hpriv_put(struct hl_fpriv *hpriv)
 {
-   kref_put(&hpriv->refcount, hpriv_release);
+   return kref_put(&hpriv->refcount, hpriv_release);
 }
 
 /*
@@ -103,7 +103,9 @@ static int hl_device_release(struct inode *inode, struct 
file *filp)
 
filp->private_data = NULL;
 
-   hl_hpriv_put(hpriv);
+   if (!hl_hpriv_put(hpriv))
+   dev_warn(hdev->dev,
+   "Device is still in use because there are live CS 
and/or memory mappings\n");
 
return 0;
 }
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 9ba48f322964..046bb44f70f9 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2182,7 +2182,7 @@ int hl_device_resume(struct hl_device *hdev);
 int hl_device_reset(struct hl_device *hdev, bool hard_reset,
bool from_hard_reset_thread);
 void hl_hpriv_get(struct hl_fpriv *hpriv);
-void hl_hpriv_put(struct hl_fpriv *hpriv);
+int hl_hpriv_put(struct hl_fpriv *hpriv);
 int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency 
freq);
 uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
 
-- 
2.25.1



[PATCH 3/5] habanalabs: fail reset if device is not idle

2021-02-22 Thread Oded Gabbay
After any reset (soft or hard) the device (the engines/QMANs) should
be idle. If they are not idle, fail the reset. If it is soft-reset,
the driver will try to do hard-reset automatically. If it is hard-reset,
the driver will make the device non-operational.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c | 25 +++--
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index 8cc3264ae378..48d301a03d62 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -71,21 +71,8 @@ static void hpriv_release(struct kref *ref)
 
kfree(hpriv);
 
-   if (hdev->reset_upon_device_release) {
-   u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
-
-   /* We try soft reset first */
+   if (hdev->reset_upon_device_release)
hl_device_reset(hdev, false, false);
-
-   /* If device is not idle perform hard reset */
-   if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
-   HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
-   dev_info(hdev->dev,
-   "device is not idle (mask %#llx %#llx) after 
soft reset, performing hard reset",
-   idle_mask[0], idle_mask[1]);
-   hl_device_reset(hdev, true, false);
-   }
-   }
 }
 
 void hl_hpriv_get(struct hl_fpriv *hpriv)
@@ -921,6 +908,7 @@ static int device_kill_open_processes(struct hl_device 
*hdev, u32 timeout)
 int hl_device_reset(struct hl_device *hdev, bool hard_reset,
bool from_hard_reset_thread)
 {
+   u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
int i, rc;
 
if (!hdev->init_done) {
@@ -1140,6 +1128,15 @@ int hl_device_reset(struct hl_device *hdev, bool 
hard_reset,
goto out_err;
}
 
+   /* If device is not idle fail the reset process */
+   if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
+   HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
+   dev_err(hdev->dev,
+   "device is not idle (mask %#llx %#llx) after reset\n",
+   idle_mask[0], idle_mask[1]);
+   goto out_err;
+   }
+
/* Check that the communication with the device is working */
rc = hdev->asic_funcs->test_queues(hdev);
if (rc) {
-- 
2.25.1



[PATCH 4/5] habanalabs: reset_upon_device_release is for bring-up

2021-02-22 Thread Oded Gabbay
Move the field to correct location in structure and remove comment.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/habanalabs.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 706361a81410..9ba48f322964 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1920,7 +1920,6 @@ struct hl_mmu_funcs {
  * @device_fini_pending: true if device_fini was called and might be
  *   waiting for the reset thread to finish
  * @supports_staged_submission: true if staged submissions are supported
- * @reset_upon_device_release: true if reset is required upon device release
  */
 struct hl_device {
struct pci_dev  *pdev;
@@ -2027,7 +2026,6 @@ struct hl_device {
u8  process_kill_trial_cnt;
u8  device_fini_pending;
u8  supports_staged_submission;
-   u8  reset_upon_device_release;
 
/* Parameters for bring-up */
u64 nic_ports_mask;
@@ -2045,6 +2043,7 @@ struct hl_device {
u8  bmc_enable;
u8  rl_enable;
u8  reset_on_preboot_fail;
+   u8  reset_upon_device_release;
 };
 
 
-- 
2.25.1



[PATCH 2/5] habanalabs: reset after device is actually released

2021-02-22 Thread Oded Gabbay
The device is actually released only after the refcnt of the hpriv
structure is 0, which means all its contexts were closed.

If we reset the device while a context is still open, there are
possibilities for unexpected behavior and crashes. For example, if the
process has a mapping of a register block that is now currently being
reset, and the process writes/reads to that block during the reset,
the device can get stuck.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c | 32 -
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index ed1838c15c78..8cc3264ae378 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -70,6 +70,22 @@ static void hpriv_release(struct kref *ref)
mutex_unlock(&hdev->fpriv_list_lock);
 
kfree(hpriv);
+
+   if (hdev->reset_upon_device_release) {
+   u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
+
+   /* We try soft reset first */
+   hl_device_reset(hdev, false, false);
+
+   /* If device is not idle perform hard reset */
+   if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
+   HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
+   dev_info(hdev->dev,
+   "device is not idle (mask %#llx %#llx) after 
soft reset, performing hard reset",
+   idle_mask[0], idle_mask[1]);
+   hl_device_reset(hdev, true, false);
+   }
+   }
 }
 
 void hl_hpriv_get(struct hl_fpriv *hpriv)
@@ -98,22 +114,6 @@ static int hl_device_release(struct inode *inode, struct 
file *filp)
hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
 
-   if (hdev->reset_upon_device_release) {
-   u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
-
-   /* We try soft reset first */
-   hl_device_reset(hdev, false, false);
-
-   /* If device is not idle perform hard reset */
-   if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
-   HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
-   dev_info(hdev->dev,
-   "device is not idle (mask %#llx %#llx) after 
soft reset, performing hard reset",
-   idle_mask[0], idle_mask[1]);
-   hl_device_reset(hdev, true, false);
-   }
-   }
-
filp->private_data = NULL;
 
hl_hpriv_put(hpriv);
-- 
2.25.1



[PATCH 1/5] habanalabs: add reset support when user closes FD

2021-02-22 Thread Oded Gabbay
From: Ofir Bitton 

In order to support command submissions that are done directly from
user space, the driver must perform soft reset once user closes its FD.
In case the soft reset fails or device is not idle, a hard reset should
be performed.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c | 21 +++--
 drivers/misc/habanalabs/common/habanalabs.h |  2 ++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index 15fcb5c31c4b..ed1838c15c78 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -93,9 +93,26 @@ void hl_hpriv_put(struct hl_fpriv *hpriv)
 static int hl_device_release(struct inode *inode, struct file *filp)
 {
struct hl_fpriv *hpriv = filp->private_data;
+   struct hl_device *hdev = hpriv->hdev;
 
-   hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
-   hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
+   hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
+   hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
+
+   if (hdev->reset_upon_device_release) {
+   u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
+
+   /* We try soft reset first */
+   hl_device_reset(hdev, false, false);
+
+   /* If device is not idle perform hard reset */
+   if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
+   HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
+   dev_info(hdev->dev,
+   "device is not idle (mask %#llx %#llx) after 
soft reset, performing hard reset",
+   idle_mask[0], idle_mask[1]);
+   hl_device_reset(hdev, true, false);
+   }
+   }
 
filp->private_data = NULL;
 
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index dc9f5a83dfc9..706361a81410 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1920,6 +1920,7 @@ struct hl_mmu_funcs {
  * @device_fini_pending: true if device_fini was called and might be
  *   waiting for the reset thread to finish
  * @supports_staged_submission: true if staged submissions are supported
+ * @reset_upon_device_release: true if reset is required upon device release
  */
 struct hl_device {
struct pci_dev  *pdev;
@@ -2026,6 +2027,7 @@ struct hl_device {
u8  process_kill_trial_cnt;
u8  device_fini_pending;
u8  supports_staged_submission;
+   u8  reset_upon_device_release;
 
/* Parameters for bring-up */
u64 nic_ports_mask;
-- 
2.25.1



[PATCH 2/2] habanalabs: Disable file operations after device is removed

2021-02-22 Thread Oded Gabbay
From: Tomer Tayar 

A device can be removed from the PCI subsystem while a process holds the
file descriptor opened.
In such a case, the driver attempts to kill the process, but as it is
still possible that the process will be alive after this step, the
device removal will complete, and we will end up with a process object
that points to a device object which was already released.

To prevent the usage of this released device object, disable the
following file operations for this process object, and avoid the cleanup
steps when the file descriptor is eventually closed.
The latter is just a best effort, as memory leak will occur.

Signed-off-by: Tomer Tayar 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c   | 40 ---
 .../misc/habanalabs/common/habanalabs_ioctl.c | 12 ++
 2 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index 9ecd805f0e88..334009e83823 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -93,12 +93,19 @@ void hl_hpriv_put(struct hl_fpriv *hpriv)
 static int hl_device_release(struct inode *inode, struct file *filp)
 {
struct hl_fpriv *hpriv = filp->private_data;
+   struct hl_device *hdev = hpriv->hdev;
+
+   filp->private_data = NULL;
+
+   if (!hdev) {
+   pr_crit("Closing FD after device was removed. Memory leak will 
occur and it is advised to reboot.\n");
+   put_pid(hpriv->taskpid);
+   return 0;
+   }
 
hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
 
-   filp->private_data = NULL;
-
hl_hpriv_put(hpriv);
 
return 0;
@@ -107,16 +114,19 @@ static int hl_device_release(struct inode *inode, struct 
file *filp)
 static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
 {
struct hl_fpriv *hpriv = filp->private_data;
-   struct hl_device *hdev;
+   struct hl_device *hdev = hpriv->hdev;
 
filp->private_data = NULL;
 
-   hdev = hpriv->hdev;
+   if (!hdev) {
+   pr_err("Closing FD after device was removed\n");
+   goto out;
+   }
 
mutex_lock(&hdev->fpriv_list_lock);
list_del(&hpriv->dev_node);
mutex_unlock(&hdev->fpriv_list_lock);
-
+out:
put_pid(hpriv->taskpid);
 
kfree(hpriv);
@@ -136,8 +146,14 @@ static int hl_device_release_ctrl(struct inode *inode, 
struct file *filp)
 static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 {
struct hl_fpriv *hpriv = filp->private_data;
+   struct hl_device *hdev = hpriv->hdev;
unsigned long vm_pgoff;
 
+   if (!hdev) {
+   pr_err_ratelimited("Trying to mmap after device was removed! 
Please close FD\n");
+   return -ENODEV;
+   }
+
vm_pgoff = vma->vm_pgoff;
vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
 
@@ -885,6 +901,16 @@ static int device_kill_open_processes(struct hl_device 
*hdev, u32 timeout)
return -EBUSY;
 }
 
+static void device_disable_open_processes(struct hl_device *hdev)
+{
+   struct hl_fpriv *hpriv;
+
+   mutex_lock(&hdev->fpriv_list_lock);
+   list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
+   hpriv->hdev = NULL;
+   mutex_unlock(&hdev->fpriv_list_lock);
+}
+
 /*
  * hl_device_reset - reset the device
  *
@@ -1558,8 +1584,10 @@ void hl_device_fini(struct hl_device *hdev)
HL_PENDING_RESET_LONG_SEC);
 
rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC);
-   if (rc)
+   if (rc) {
dev_crit(hdev->dev, "Failed to kill all open processes\n");
+   device_disable_open_processes(hdev);
+   }
 
hl_cb_pool_fini(hdev);
 
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c 
b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 03af61cecd37..083a30969c5f 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -5,6 +5,8 @@
  * All Rights Reserved.
  */
 
+#define pr_fmt(fmt)"habanalabs: " fmt
+
 #include 
 #include "habanalabs.h"
 
@@ -682,6 +684,11 @@ long hl_ioctl(struct file *filep, unsigned int cmd, 
unsigned long arg)
const struct hl_ioctl_desc *ioctl = NULL;
unsigned int nr = _IOC_NR(cmd);
 
+   if (!hdev) {
+   pr_err_ratelimited("Sending ioctl after device was removed! 
Please close FD\n");
+   return -ENODEV;
+   }
+
if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
ioctl = &hl_ioctls[nr];
   

[PATCH 1/2] habanalabs: Call put_pid() when releasing control device

2021-02-22 Thread Oded Gabbay
From: Tomer Tayar 

The refcount of the "hl_fpriv" structure is not used for the control
device, and thus hl_hpriv_put() is not called when releasing this
device.
This results with no call to put_pid(), so add it explicitly in
hl_device_release_ctrl().

Signed-off-by: Tomer Tayar 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index 15fcb5c31c4b..9ecd805f0e88 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -117,6 +117,8 @@ static int hl_device_release_ctrl(struct inode *inode, 
struct file *filp)
list_del(&hpriv->dev_node);
mutex_unlock(&hdev->fpriv_list_lock);
 
+   put_pid(hpriv->taskpid);
+
kfree(hpriv);
 
return 0;
-- 
2.25.1



[PATCH] habanalabs: set max asid to 2

2021-02-16 Thread Oded Gabbay
From: farah kassabri 

currently we support only 2 asids in all asics.
asid 0 for driver, and asic 1 for user.
no need to setup 1024 asids configurations at init phase.

Signed-off-by: farah kassabri 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/include/gaudi/gaudi.h | 2 +-
 drivers/misc/habanalabs/include/goya/goya.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi.h 
b/drivers/misc/habanalabs/include/gaudi/gaudi.h
index f9ea897ae42c..ffae107b1693 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi.h
@@ -38,7 +38,7 @@
 
 #define QMAN_PQ_ENTRY_SIZE 16  /* Bytes */
 
-#define MAX_ASID   1024
+#define MAX_ASID   2
 
 #define PROT_BITS_OFFS 0xF80
 
diff --git a/drivers/misc/habanalabs/include/goya/goya.h 
b/drivers/misc/habanalabs/include/goya/goya.h
index 43d241891e45..1b4ca435021d 100644
--- a/drivers/misc/habanalabs/include/goya/goya.h
+++ b/drivers/misc/habanalabs/include/goya/goya.h
@@ -30,7 +30,7 @@
 
 #define QMAN_PQ_ENTRY_SIZE 16  /* Bytes */
 
-#define MAX_ASID   1024
+#define MAX_ASID   2
 
 #define PROT_BITS_OFFS 0xF80
 
-- 
2.25.1



[PATCH] habanalabs: mark hl_eq_inc_ptr() as static

2021-02-16 Thread Oded Gabbay
hl_eq_inc_ptr() is not called from anywhere outside irq.c so mark
it as static

Reported-by: kernel test robot 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/irq.c 
b/drivers/misc/habanalabs/common/irq.c
index de53fb5f978a..44a0522b59b9 100644
--- a/drivers/misc/habanalabs/common/irq.c
+++ b/drivers/misc/habanalabs/common/irq.c
@@ -47,7 +47,7 @@ inline u32 hl_cq_inc_ptr(u32 ptr)
  * Increment ptr by 1. If it reaches the number of event queue
  * entries, set it to 0
  */
-inline u32 hl_eq_inc_ptr(u32 ptr)
+static inline u32 hl_eq_inc_ptr(u32 ptr)
 {
ptr++;
if (unlikely(ptr == HL_EQ_LENGTH))
-- 
2.25.1



Re: [PATCH] drivers: habanalabs: remove unused dentry pointer for debugfs files

2021-02-16 Thread Oded Gabbay
On Tue, Feb 16, 2021 at 5:08 PM Greg Kroah-Hartman
 wrote:
>
> The dentry for the created debugfs file was being saved, but never used
> anywhere.  As the pointer isn't needed for anything, and the debugfs
> files are being properly removed by removing the parent directory,
> remove the saved pointer as well, saving a tiny bit of memory and logic.
>
> Cc: Oded Gabbay 
> Cc: Arnd Bergmann 
> Cc: Tomer Tayar 
> Cc: Moti Haimovski 
> Cc: Omer Shpigelman 
> Cc: Ofir Bitton 
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Greg Kroah-Hartman 
> ---
>  drivers/misc/habanalabs/common/debugfs.c| 5 +
>  drivers/misc/habanalabs/common/habanalabs.h | 2 --
>  2 files changed, 1 insertion(+), 6 deletions(-)
>
> diff --git a/drivers/misc/habanalabs/common/debugfs.c 
> b/drivers/misc/habanalabs/common/debugfs.c
> index cef716643979..770b0131397d 100644
> --- a/drivers/misc/habanalabs/common/debugfs.c
> +++ b/drivers/misc/habanalabs/common/debugfs.c
> @@ -965,7 +965,6 @@ void hl_debugfs_add_device(struct hl_device *hdev)
> struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
> int count = ARRAY_SIZE(hl_debugfs_list);
> struct hl_debugfs_entry *entry;
> -   struct dentry *ent;
> int i;
>
> dev_entry->hdev = hdev;
> @@ -1072,13 +1071,11 @@ void hl_debugfs_add_device(struct hl_device *hdev)
> &hl_stop_on_err_fops);
>
> for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
> -
> -   ent = debugfs_create_file(hl_debugfs_list[i].name,
> +   debugfs_create_file(hl_debugfs_list[i].name,
> 0444,
> dev_entry->root,
> entry,
> &hl_debugfs_fops);
> -   entry->dent = ent;
> entry->info_ent = &hl_debugfs_list[i];
> entry->dev_entry = dev_entry;
> }
> diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
> b/drivers/misc/habanalabs/common/habanalabs.h
> index 60e16dc4bcac..48937e9eed83 100644
> --- a/drivers/misc/habanalabs/common/habanalabs.h
> +++ b/drivers/misc/habanalabs/common/habanalabs.h
> @@ -1393,12 +1393,10 @@ struct hl_info_list {
>
>  /**
>   * struct hl_debugfs_entry - debugfs dentry wrapper.
> - * @dent: base debugfs entry structure.
>   * @info_ent: dentry realted ops.
>   * @dev_entry: ASIC specific debugfs manager.
>   */
>  struct hl_debugfs_entry {
> -   struct dentry   *dent;
> const struct hl_info_list   *info_ent;
> struct hl_dbg_device_entry  *dev_entry;
>  };
> --
> 2.30.1
>

This patch is:
Reviewed-by: Oded Gabbay 

Thanks,
Oded


[git pull] habanalabs second pull request for kernel 5.12

2021-02-08 Thread Oded Gabbay
Hi Greg,

This is habanalabs second pull request for the merge window of
kernel 5.12.
It contains important fixes, especially in the firmware-related code.
Details are in the tag.

Thanks,
Oded

The following changes since commit 369aea84595189200a2e6b028f556a7efa0ec489:

  mei: implement client dma setup. (2021-02-06 15:48:11 +0100)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git 
tags/misc-habanalabs-next-2021-02-08

for you to fetch changes up to da5dfbb97a82ff698e1dc7b229d4d4f5759dad2b:

  habanalabs/gaudi: don't enable clock gating on DMA5 (2021-02-08 18:20:08 
+0200)


This tag contains the following changes for 5.12-rc1:

- Improve communication protocol with device CPU CP application.
  The change prevents random (rare) out-of-sync errors.

- Notify F/W to start sending events only after initialization of
  device is done. This fixes the issue where fatal events were received
  but ignored.

- Fix integer handling (static analysis warning).

- Always fetch HBM ECC errors from F/W (if available).

- Minor fix in GAUDI-specific initialization code.


Oded Gabbay (4):
  habanalabs: fix integer handling issue
  habanalabs: enable F/W events after init done
  habanalabs: return block size + block ID
  habanalabs/gaudi: don't enable clock gating on DMA5

Ofir Bitton (2):
  habanalabs: improve communication protocol with cpucp
  habanalabs: support fetching first available user CQ

Ohad Sharabi (2):
  habanalabs/gaudi: use HBM_ECC_EN bit for ECC ERR
  habanalabs: update security map after init CPU Qs

 drivers/misc/habanalabs/common/device.c| 23 --
 drivers/misc/habanalabs/common/firmware_if.c   | 14 ++-
 drivers/misc/habanalabs/common/habanalabs.h| 15 +--
 drivers/misc/habanalabs/common/habanalabs_ioctl.c  |  3 +-
 drivers/misc/habanalabs/common/memory.c| 19 +
 drivers/misc/habanalabs/common/mmu/mmu.c   |  2 +-
 drivers/misc/habanalabs/gaudi/gaudi.c  | 49 ++
 drivers/misc/habanalabs/goya/goya.c| 27 +---
 .../misc/habanalabs/include/common/hl_boot_if.h|  5 +++
 include/uapi/misc/habanalabs.h | 30 ++---
 10 files changed, 148 insertions(+), 39 deletions(-)


[PATCH 3/3] habanalabs/gaudi: don't enable clock gating on DMA5

2021-02-06 Thread Oded Gabbay
Graph Compiler uses DMA5 in a non-standard way and it requires the
driver to disable clock gating on that DMA.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 35342edd4a02..9152242778f5 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -3460,6 +3460,12 @@ static void gaudi_set_clock_gating(struct hl_device 
*hdev)
enable = !!(hdev->clock_gating_mask &
(BIT_ULL(gaudi_dma_assignment[i])));
 
+   /* GC sends work to DMA engine through Upper CP in DMA5 so
+* we need to not enable clock gating in that DMA
+*/
+   if (i == GAUDI_HBM_DMA_4)
+   enable = 0;
+
qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
enable ? QMAN_CGM1_PWR_GATE_EN : 0);
-- 
2.25.1



[PATCH 1/3] habanalabs: update security map after init CPU Qs

2021-02-06 Thread Oded Gabbay
From: Ohad Sharabi 

when reading CPU_BOOT_DEV_STS0 reg after FW reports SRAM AVAILABLE the
value in the register might not yet be updated by FW.
to overcome this issue another "up-to-date" read of this register is
done at the end of CPU queues init.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c | 7 ++-
 drivers/misc/habanalabs/common/habanalabs.h  | 3 ---
 drivers/misc/habanalabs/gaudi/gaudi.c| 6 +-
 drivers/misc/habanalabs/goya/goya.c  | 6 +-
 4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 31b52a223f02..09706c571e95 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -125,7 +125,8 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 
hw_queue_id, u32 *msg,
goto out;
}
 
-   if (hdev->asic_prop.fw_cpucp_ack_with_pi)
+   if (hdev->asic_prop.fw_app_security_map &
+   CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
expected_ack_val = queue->pi;
else
expected_ack_val = CPUCP_PACKET_FENCE_VAL;
@@ -786,10 +787,6 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 
cpu_boot_status_reg,
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
 
-   if (prop->fw_boot_cpu_security_map &
-   CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
-   prop->fw_cpucp_ack_with_pi = true;
-
dev_dbg(hdev->dev,
"Firmware boot CPU security status %#x\n",
prop->fw_boot_cpu_security_map);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 18ed3a6000b0..46c37b0c704a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -420,8 +420,6 @@ struct hl_mmu_properties {
  *from BOOT_DEV_STS0
  * @dram_supports_virtual_memory: is there an MMU towards the DRAM
  * @hard_reset_done_by_fw: true if firmware is handling hard reset flow
- * @fw_cpucp_ack_with_pi: true if cpucp is acking messages with the PQ PI
- *instead of a magic number
  * @num_functional_hbms: number of functional HBMs in each DCORE.
  */
 struct asic_fixed_properties {
@@ -483,7 +481,6 @@ struct asic_fixed_properties {
u8  fw_security_status_valid;
u8  dram_supports_virtual_memory;
u8  hard_reset_done_by_fw;
-   u8  fw_cpucp_ack_with_pi;
u8  num_functional_hbms;
 };
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 6905857b363b..f937d90786b2 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -536,7 +536,6 @@ static int gaudi_get_fixed_properties(struct hl_device 
*hdev)
prop->fw_security_disabled = true;
prop->fw_security_status_valid = false;
prop->hard_reset_done_by_fw = false;
-   prop->fw_cpucp_ack_with_pi = false;
 
return 0;
 }
@@ -3727,6 +3726,7 @@ static int gaudi_init_cpu(struct hl_device *hdev)
 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
 {
struct gaudi_device *gaudi = hdev->asic_specific;
+   struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_eq *eq;
u32 status;
struct hl_hw_queue *cpu_pq =
@@ -3783,6 +3783,10 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, 
u32 cpu_timeout)
return -EIO;
}
 
+   /* update FW application security bits */
+   if (prop->fw_security_status_valid)
+   prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
+
gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
return 0;
 }
diff --git a/drivers/misc/habanalabs/goya/goya.c 
b/drivers/misc/habanalabs/goya/goya.c
index af6a5760924c..c30524660761 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -464,7 +464,6 @@ int goya_get_fixed_properties(struct hl_device *hdev)
prop->fw_security_disabled = true;
prop->fw_security_status_valid = false;
prop->hard_reset_done_by_fw = false;
-   prop->fw_cpucp_ack_with_pi = false;
 
return 0;
 }
@@ -1189,6 +1188,7 @@ static int goya_stop_external_queues(struct hl_device 
*hdev)
 int goya_init_cpu_queues(struct hl_device *hdev)
 {
struct goya_device *goya = hdev->asic_specific;
+   struct asic_fixed_properties *prop = 

[PATCH 2/3] habanalabs: return block size + block ID

2021-02-06 Thread Oded Gabbay
When user gives us a block address to get its ID to mmap it, he also
needs to get from us the block size to pass to the driver in the mmap
function.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/habanalabs.h |  4 ++-
 drivers/misc/habanalabs/common/memory.c | 19 +--
 drivers/misc/habanalabs/gaudi/gaudi.c   |  2 +-
 drivers/misc/habanalabs/goya/goya.c |  2 +-
 include/uapi/misc/habanalabs.h  | 27 -
 5 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 46c37b0c704a..dc9f5a83dfc9 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -862,6 +862,8 @@ enum div_select_defs {
  *   showing it to users.
  * @ack_protection_bits_errors: ack and dump all security violations
  * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it.
+ *   also returns the size of the block if caller supplies
+ *   a valid pointer for it
  * @hw_block_mmap: mmap a HW block with a given id.
  * @enable_events_from_fw: send interrupt to firmware to notify them the
  * driver is ready to receive asynchronous events. This
@@ -980,7 +982,7 @@ struct hl_asic_funcs {
u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
void (*ack_protection_bits_errors)(struct hl_device *hdev);
int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr,
-   u32 *block_id);
+   u32 *block_size, u32 *block_id);
int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
u32 block_id, u32 block_size);
void (*enable_events_from_fw)(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/common/memory.c 
b/drivers/misc/habanalabs/common/memory.c
index 7171e8820a2d..7cadf75ebb81 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1289,12 +1289,13 @@ static int unmap_device_va(struct hl_ctx *ctx, struct 
hl_mem_in *args,
return rc;
 }
 
-static int map_block(struct hl_device *hdev, u64 address, u64 *handle)
+static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
+   u32 *size)
 {
u32 block_id = 0;
int rc;
 
-   rc = hdev->asic_funcs->get_hw_block_id(hdev, address, &block_id);
+   rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
 
*handle = block_id | HL_MMAP_TYPE_BLOCK;
*handle <<= PAGE_SHIFT;
@@ -1371,7 +1372,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union 
hl_mem_args *args)
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
u64 block_handle, device_addr = 0;
-   u32 handle = 0;
+   u32 handle = 0, block_size;
int rc;
 
switch (args->in.op) {
@@ -1416,8 +1417,9 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union 
hl_mem_args *args)
 
case HL_MEM_OP_MAP_BLOCK:
rc = map_block(hdev, args->in.map_block.block_addr,
-   &block_handle);
-   args->out.handle = block_handle;
+   &block_handle, &block_size);
+   args->out.block_handle = block_handle;
+   args->out.block_size = block_size;
break;
 
default:
@@ -1437,7 +1439,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
u64 block_handle, device_addr = 0;
-   u32 handle = 0;
+   u32 handle = 0, block_size;
int rc;
 
if (!hl_device_operational(hdev, &status)) {
@@ -1524,8 +1526,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 
case HL_MEM_OP_MAP_BLOCK:
rc = map_block(hdev, args->in.map_block.block_addr,
-   &block_handle);
-   args->out.handle = block_handle;
+   &block_handle, &block_size);
+   args->out.block_handle = block_handle;
+   args->out.block_size = block_size;
break;
 
default:
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index f937d90786b2..35342edd4a02 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -8490,7 +8490,7 @@ static u64 gaudi_get_device_time(struct hl_device *hdev)
 }
 
 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
-   u32 *block_id)
+   u32 *block_size,

[PATCH 1/4] habanalabs: improve communication protocol with cpucp

2021-02-02 Thread Oded Gabbay
From: Ofir Bitton 

Current messaging communictaion protocol with cpucp can get out
of sync due to coherency issues. In order to improve the protocol
reliability, we modify the protocol to expect a different
acknowledgment for every packet sent to cpucp.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c| 17 +++--
 drivers/misc/habanalabs/common/habanalabs.h |  3 +++
 drivers/misc/habanalabs/gaudi/gaudi.c   |  6 +-
 drivers/misc/habanalabs/goya/goya.c |  6 +-
 .../misc/habanalabs/include/common/hl_boot_if.h |  5 +
 5 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index ba6920f2b4ab..31b52a223f02 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -90,9 +90,10 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 
opcode)
 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
u16 len, u32 timeout, u64 *result)
 {
+   struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
struct cpucp_packet *pkt;
dma_addr_t pkt_dma_addr;
-   u32 tmp;
+   u32 tmp, expected_ack_val;
int rc = 0;
 
pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
@@ -115,14 +116,22 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 
hw_queue_id, u32 *msg,
goto out;
}
 
+   /* set fence to a non valid value */
+   pkt->fence = UINT_MAX;
+
rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
if (rc) {
dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
goto out;
}
 
+   if (hdev->asic_prop.fw_cpucp_ack_with_pi)
+   expected_ack_val = queue->pi;
+   else
+   expected_ack_val = CPUCP_PACKET_FENCE_VAL;
+
rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
-   (tmp == CPUCP_PACKET_FENCE_VAL), 1000,
+   (tmp == expected_ack_val), 1000,
timeout, true);
 
hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
@@ -777,6 +786,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 
cpu_boot_status_reg,
CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
prop->hard_reset_done_by_fw = true;
 
+   if (prop->fw_boot_cpu_security_map &
+   CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
+   prop->fw_cpucp_ack_with_pi = true;
+
dev_dbg(hdev->dev,
"Firmware boot CPU security status %#x\n",
prop->fw_boot_cpu_security_map);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 30f32f2edb8a..3c54010f7ab9 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -419,6 +419,8 @@ struct hl_mmu_properties {
  *from BOOT_DEV_STS0
  * @dram_supports_virtual_memory: is there an MMU towards the DRAM
  * @hard_reset_done_by_fw: true if firmware is handling hard reset flow
+ * @fw_cpucp_ack_with_pi: true if cpucp is acking messages with the PQ PI
+ *instead of a magic number
  * @num_functional_hbms: number of functional HBMs in each DCORE.
  */
 struct asic_fixed_properties {
@@ -479,6 +481,7 @@ struct asic_fixed_properties {
u8  fw_security_status_valid;
u8  dram_supports_virtual_memory;
u8  hard_reset_done_by_fw;
+   u8  fw_cpucp_ack_with_pi;
u8  num_functional_hbms;
 };
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 52fcaf25531a..006c34ae35c2 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -533,6 +533,7 @@ static int gaudi_get_fixed_properties(struct hl_device 
*hdev)
prop->fw_security_disabled = true;
prop->fw_security_status_valid = false;
prop->hard_reset_done_by_fw = false;
+   prop->fw_cpucp_ack_with_pi = false;
 
return 0;
 }
@@ -4438,9 +4439,12 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, 
u32 hw_queue_id, u32 pi)
/* ring the doorbell */
WREG32(db_reg_offset, db_value);
 
-   if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
+   if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
+   /* make sure device CPU will read latest data from host *

[PATCH 2/4] habanalabs: support fetching first available user CQ

2021-02-02 Thread Oded Gabbay
From: Ofir Bitton 

User must be aware of the available CQs when it needs to use them.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/habanalabs.h   | 2 ++
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 3 ++-
 drivers/misc/habanalabs/gaudi/gaudi.c | 3 +++
 drivers/misc/habanalabs/goya/goya.c   | 3 +++
 include/uapi/misc/habanalabs.h| 3 +++
 5 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 3c54010f7ab9..98163317ec43 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -411,6 +411,7 @@ struct hl_mmu_properties {
  * @first_available_user_mon: first monitor available for the user
  * @first_available_user_msix_interrupt: first available msix interrupt
  *   reserved for the user
+ * @first_available_cq: first available CQ for the user.
  * @tpc_enabled_mask: which TPCs are enabled.
  * @completion_queues_count: number of completion queues.
  * @fw_security_disabled: true if security measures are disabled in firmware,
@@ -475,6 +476,7 @@ struct asic_fixed_properties {
u16 first_available_user_sob[HL_MAX_DCORES];
u16 first_available_user_mon[HL_MAX_DCORES];
u16 first_available_user_msix_interrupt;
+   u16 first_available_cq[HL_MAX_DCORES];
u8  tpc_enabled_mask;
u8  completion_queues_count;
u8  fw_security_disabled;
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c 
b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index e86f46d4b613..03af61cecd37 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -397,7 +397,8 @@ static int sync_manager_info(struct hl_fpriv *hpriv, struct 
hl_info_args *args)
prop->first_available_user_sob[args->dcore_id];
sm_info.first_available_monitor =
prop->first_available_user_mon[args->dcore_id];
-
+   sm_info.first_available_cq =
+   prop->first_available_cq[args->dcore_id];
 
return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size,
sizeof(sm_info))) ? -EFAULT : 0;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 006c34ae35c2..8fc0de3cf3a9 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -529,6 +529,9 @@ static int gaudi_get_fixed_properties(struct hl_device 
*hdev)
 
prop->first_available_user_msix_interrupt = USHRT_MAX;
 
+   for (i = 0 ; i < HL_MAX_DCORES ; i++)
+   prop->first_available_cq[i] = USHRT_MAX;
+
/* disable fw security for now, set it in a later stage */
prop->fw_security_disabled = true;
prop->fw_security_status_valid = false;
diff --git a/drivers/misc/habanalabs/goya/goya.c 
b/drivers/misc/habanalabs/goya/goya.c
index 53db7e966866..d26b405f0c17 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -457,6 +457,9 @@ int goya_get_fixed_properties(struct hl_device *hdev)
 
prop->first_available_user_msix_interrupt = USHRT_MAX;
 
+   for (i = 0 ; i < HL_MAX_DCORES ; i++)
+   prop->first_available_cq[i] = USHRT_MAX;
+
/* disable fw security for now, set it in a later stage */
prop->fw_security_disabled = true;
prop->fw_security_status_valid = false;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index ebde42b37b43..64ae83b5f8e5 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -414,10 +414,13 @@ struct hl_pll_frequency_info {
  * struct hl_info_sync_manager - sync manager information
  * @first_available_sync_object: first available sob
  * @first_available_monitor: first available monitor
+ * @first_available_cq: first available cq
  */
 struct hl_info_sync_manager {
__u32 first_available_sync_object;
__u32 first_available_monitor;
+   __u32 first_available_cq;
+   __u32 reserved;
 };
 
 /**
-- 
2.25.1



[PATCH 4/4] habanalabs: enable F/W events after init done

2021-02-02 Thread Oded Gabbay
Only after the initialization of the device is done, the driver is
ready to receive events from the F/W. The driver can't handle events
before that because of races so it will ignore events. In case of
a fatal event, the driver won't know about it and the device will be
operational although it shouldn't be.

Same logic should be applied after hard-reset.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c | 23 +
 drivers/misc/habanalabs/common/habanalabs.h |  9 ++--
 drivers/misc/habanalabs/gaudi/gaudi.c   | 10 ++---
 drivers/misc/habanalabs/goya/goya.c | 12 +++
 4 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index 59219c862ca0..15fcb5c31c4b 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1159,12 +1159,20 @@ int hl_device_reset(struct hl_device *hdev, bool 
hard_reset,
atomic_set(&hdev->in_reset, 0);
hdev->needs_reset = false;
 
-   if (hard_reset)
+   dev_notice(hdev->dev, "Successfully finished resetting the device\n");
+
+   if (hard_reset) {
hdev->hard_reset_cnt++;
-   else
-   hdev->soft_reset_cnt++;
 
-   dev_warn(hdev->dev, "Successfully finished resetting the device\n");
+   /* After reset is done, we are ready to receive events from
+* the F/W. We can't do it before because we will ignore events
+* and if those events are fatal, we won't know about it and
+* the device will be operational although it shouldn't be
+*/
+   hdev->asic_funcs->enable_events_from_fw(hdev);
+   } else {
+   hdev->soft_reset_cnt++;
+   }
 
return 0;
 
@@ -1415,6 +1423,13 @@ int hl_device_init(struct hl_device *hdev, struct class 
*hclass)
 
hdev->init_done = true;
 
+   /* After initialization is done, we are ready to receive events from
+* the F/W. We can't do it before because we will ignore events and if
+* those events are fatal, we won't know about it and the device will
+* be operational although it shouldn't be
+*/
+   hdev->asic_funcs->enable_events_from_fw(hdev);
+
return 0;
 
 release_ctx:
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 98163317ec43..18ed3a6000b0 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -860,12 +860,16 @@ enum div_select_defs {
  *   and place them in the relevant cs jobs
  * @collective_wait_create_jobs: allocate collective wait cs jobs
  * @scramble_addr: Routine to scramble the address prior of mapping it
- *  in the MMU.
+ * in the MMU.
  * @descramble_addr: Routine to de-scramble the address prior of
- *  showing it to users.
+ *   showing it to users.
  * @ack_protection_bits_errors: ack and dump all security violations
  * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it.
  * @hw_block_mmap: mmap a HW block with a given id.
+ * @enable_events_from_fw: send interrupt to firmware to notify them the
+ * driver is ready to receive asynchronous events. This
+ * function should be called during the first init and
+ * after every hard-reset of the device
  */
 struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -982,6 +986,7 @@ struct hl_asic_funcs {
u32 *block_id);
int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
u32 block_id, u32 block_size);
+   void (*enable_events_from_fw)(struct hl_device *hdev);
 };
 
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index b929e602fa3d..6905857b363b 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -1383,8 +1383,6 @@ static int gaudi_late_init(struct hl_device *hdev)
return rc;
}
 
-   WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
-
rc = gaudi_fetch_psoc_frequency(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
@@ -8500,6 +8498,11 @@ static int gaudi_block_mmap(struct hl_device *hdev,
return -EPERM;
 }
 
+static void gaudi_enable_events_from_fw(struct hl_device *hdev)
+{
+   WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
+}
+
 static const struct hl_asic_funcs gaudi_funcs = {
.early_init = gaudi_e

[PATCH 3/4] habanalabs/gaudi: use HBM_ECC_EN bit for ECC ERR

2021-02-02 Thread Oded Gabbay
From: Ohad Sharabi 

driver should use ECC info from FW only if HBM ECC CAP is set.
otherwise, try to fetch the data from MC regs only if security is
disabled.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 8fc0de3cf3a9..b929e602fa3d 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7105,7 +7105,9 @@ static int gaudi_hbm_read_interrupts(struct hl_device 
*hdev, int device,
u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
int err = 0;
 
-   if (!hdev->asic_prop.fw_security_disabled) {
+   if (hdev->asic_prop.fw_security_status_valid &&
+   (hdev->asic_prop.fw_app_security_map &
+   CPU_BOOT_DEV_STS0_HBM_ECC_EN)) {
if (!hbm_ecc_data) {
dev_err(hdev->dev, "No FW ECC data");
return 0;
@@ -7127,14 +7129,24 @@ static int gaudi_hbm_read_interrupts(struct hl_device 
*hdev, int device,
le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
 
dev_err(hdev->dev,
-   "HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, 
CA_PAR=%d, SERR=%d, DERR=%d\n",
-   device, ch, type, wr_par, rd_par, ca_par, serr, derr);
+   "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, 
CA_PAR=%d, SERR=%d, DERR=%d\n",
+   device, ch, wr_par, rd_par, ca_par, serr, derr);
+   dev_err(hdev->dev,
+   "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 
1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
+   device, ch, hbm_ecc_data->first_addr, type,
+   hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
+   hbm_ecc_data->dec_cnt);
 
err = 1;
 
return 0;
}
 
+   if (!hdev->asic_prop.fw_security_disabled) {
+   dev_info(hdev->dev, "Cannot access MC regs for ECC data while 
security is enabled\n");
+   return 0;
+   }
+
base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x);
-- 
2.25.1



[git pull] habanalabs pull request for kernel 5.12

2021-01-27 Thread Oded Gabbay
Hi Greg,

This is habanalabs pull request for the merge window of kernel 5.12.
It contains changes and new features, support for new firmware.
Details are in the tag.

Thanks,
Oded

The following changes since commit 0fc99422bc034de018607ef6b70f92d4bc4a236d:

  firmware: xilinx: Remove PM_API_MAX value (2021-01-26 19:38:54 +0100)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git 
tags/misc-habanalabs-next-2021-01-27

for you to fetch changes up to f1aebf5e3d606a2a910eed54bc8d17655f12b606:

  habanalabs: update to latest hl_boot_if.h spec from F/W (2021-01-27 21:03:51 
+0200)


This tag contains habanalabs driver changes for v5.12:

- Add feature called "staged command submissions". In this feature,
  the driver allows the user to submit multiple command submissions
  that describe a single pass on the deep learning graph. The driver
  tracks the completion of the entire pass by the last stage CS.

- Update code to support the latest firmware image

- Optimizations and improvements to MMU code:
  - Support page size that is not power-of-2
  - Make the locks scheme simpler
  - mmap areas in device configuration space to userspace

- Security fixes:
  - Make ETR non-secured
  - Remove access to kernel memory through debug-fs interface
  - Remove access through PCI bar to SyncManager register block
in Gaudi

- Many small bug fixes


Alon Mizrahi (2):
  habanalabs: replace WARN/WARN_ON with dev_crit in driver
  habanalabs: return dram virtual address in info ioctl

Christophe JAILLET (1):
  habanalabs: Use 'dma_set_mask_and_coherent()'

Moti Haimovski (2):
  habanalabs: report dram_page_size in hw_ip_info ioctl
  habanalabs: support non power-of-2 DRAM phys page sizes

Oded Gabbay (8):
  habanalabs: update firmware boot interface
  habanalabs: add ASIC property of functional HBMs
  habanalabs: update to latest hl_boot_if.h
  habanalabs: update email address in sysfs/debugfs docs
  CREDITS: update email address and home address
  habanalabs: update SyncManager interrupt handling
  habanalabs/gaudi: unmask HBM interrupts after handling
  habanalabs: update to latest hl_boot_if.h spec from F/W

Ofir Bitton (20):
  habanalabs: Init the VM module for kernel context
  habanalabs/gaudi: support CS with no completion
  habanalabs: allow user to pass a staged submission seq
  habanalabs/gaudi: remove duplicated gaudi packets masks
  habanalabs/gaudi: add debug prints for security status
  habanalabs/gaudi: set uninitialized symbol
  habanalabs: remove access to kernel memory using debugfs
  habanalabs: report correct dram size in info ioctl
  habanalabs: read device boot errors after cpucp is up
  habanalabs: separate common code to dedicated folders
  habanalabs: increment ctx ref from within a cs allocation
  habanalabs: add driver support for internal cb scheduling
  habanalabs/gaudi: remove PCI access to SM block
  habanalabs/gaudi: print sync manager SEI interrupt info
  habanalabs: ignore F/W BMC errors in case no BMC present
  habanalabs: add security violations dump to debugfs
  habanalabs: add user available interrupt to hw_ip
  habanalabs: add new mem ioctl op for mapping hw blocks
  habanalabs: add CS completion and timeout properties
  habanalabs: staged submission support

Ohad Sharabi (4):
  habanalabs: refactor MMU locks code
  habanalabs/goya: move mmu_prepare to context init
  habanalabs: modify device_idle interface
  habanalabs: fix ETR security issue

Omer Shpigelman (2):
  habanalabs: kernel doc format in memory functions
  habanalabs: modify memory functions signatures

farah kassabri (2):
  habanalabs: always try to use the hint address
  habanalabs: fix MMU debugfs related nodes

 CREDITS|   8 +-
 .../ABI/testing/debugfs-driver-habanalabs  |  50 +-
 Documentation/ABI/testing/sysfs-driver-habanalabs  |  58 +-
 drivers/misc/habanalabs/common/Makefile|  10 +-
 drivers/misc/habanalabs/common/asid.c  |   6 +-
 drivers/misc/habanalabs/common/command_buffer.c|   8 +-
 .../misc/habanalabs/common/command_submission.c| 473 ++--
 drivers/misc/habanalabs/common/context.c   |  33 +-
 drivers/misc/habanalabs/common/debugfs.c   |  43 +-
 drivers/misc/habanalabs/common/device.c|  23 +-
 drivers/misc/habanalabs/common/firmware_if.c   | 143 +++--
 drivers/misc/habanalabs/common/habanalabs.h| 100 +++-
 drivers/misc/habanalabs/common/habanalabs_ioctl.c  |  22 +-
 drivers/misc/habanalabs/common/hw_queue.c  |  51 +-
 drivers/misc/habanalabs/common/memory.c| 614 +
 drivers/mi

[PATCH 2/3] habanalabs/gaudi: unmask HBM interrupts after handling

2021-01-26 Thread Oded Gabbay
As the driver does with all interrupts, we need to tell F/W to unmask
the HBM interrupts after the driver handled them.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index a9bd5aef6c02..52fcaf25531a 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7377,6 +7377,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
gaudi_hbm_read_interrupts(hdev,
gaudi_hbm_event_to_dev(event_type),
&eq_entry->hbm_ecc_data);
+   hl_fw_unmask_irq(hdev, event_type);
break;
 
case GAUDI_EVENT_TPC0_DEC:
-- 
2.25.1



[PATCH 1/3] habanalabs: update SyncManager interrupt handling

2021-01-26 Thread Oded Gabbay
The firmware provides more information about SyncManager events.
Adjust the code to the latest firmware interface file.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 14 +++---
 drivers/misc/habanalabs/include/common/cpucp_if.h | 11 +--
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 1348016309e3..a9bd5aef6c02 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -6860,24 +6860,24 @@ static void gaudi_print_sm_sei_info(struct hl_device 
*hdev, u16 event_type,
u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
 
switch (sei_data->sei_cause) {
-   case GAUDI_SM_SEI_SO_OVERFLOW:
+   case SM_SEI_SO_OVERFLOW:
dev_err(hdev->dev,
"SM %u SEI Error: SO %u overflow/underflow",
-   index, le16_to_cpu(sei_data->sei_log));
+   index, le32_to_cpu(sei_data->sei_log));
break;
-   case GAUDI_SM_SEI_LBW_4B_UNALIGNED:
+   case SM_SEI_LBW_4B_UNALIGNED:
dev_err(hdev->dev,
"SM %u SEI Error: Unaligned 4B LBW access, monitor 
agent address low - %#x",
-   index, le16_to_cpu(sei_data->sei_log));
+   index, le32_to_cpu(sei_data->sei_log));
break;
-   case GAUDI_SM_SEI_AXI_RESPONSE_ERR:
+   case SM_SEI_AXI_RESPONSE_ERR:
dev_err(hdev->dev,
"SM %u SEI Error: AXI ID %u response error",
-   index, le16_to_cpu(sei_data->sei_log));
+   index, le32_to_cpu(sei_data->sei_log));
break;
default:
dev_err(hdev->dev, "Unknown SM SEI cause %u",
-   le16_to_cpu(sei_data->sei_log));
+   le32_to_cpu(sei_data->sei_log));
break;
}
 }
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h 
b/drivers/misc/habanalabs/include/common/cpucp_if.h
index d75d1077461b..b77c1c16c32c 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -58,10 +58,17 @@ struct hl_eq_ecc_data {
__u8 pad[7];
 };
 
+enum hl_sm_sei_cause {
+   SM_SEI_SO_OVERFLOW,
+   SM_SEI_LBW_4B_UNALIGNED,
+   SM_SEI_AXI_RESPONSE_ERR
+};
+
 struct hl_eq_sm_sei_data {
-   __le16 sei_log;
+   __le32 sei_log;
+   /* enum hl_sm_sei_cause */
__u8 sei_cause;
-   __u8 pad[5];
+   __u8 pad[3];
 };
 
 struct hl_eq_entry {
-- 
2.25.1



[PATCH 3/3] habanalabs: update to latest hl_boot_if.h spec from F/W

2021-01-26 Thread Oded Gabbay
It adds the definition for indication that the F/W handles HBM ECC
events.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/include/common/hl_boot_if.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h 
b/drivers/misc/habanalabs/include/common/hl_boot_if.h
index e29c77bdea07..57785478a4ef 100644
--- a/drivers/misc/habanalabs/include/common/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h
@@ -69,8 +69,9 @@
  * image has failed to match expected
  * checksum. Trying to program image again
  * might solve this.
+ *
  * CPU_BOOT_ERR0_PLL_FAIL  PLL settings failed, meaning that one
- * of the PLLs remained in REF_CLK
+ * of the PLLs remains in REF_CLK
  *
  * CPU_BOOT_ERR0_ENABLED   Error registers enabled.
  * This is a main indication that the
@@ -161,6 +162,10 @@
  * FW initialized Clock Gating.
  * Initialized in: preboot
  *
+ * CPU_BOOT_DEV_STS0_HBM_ECC_ENHBM ECC handling Enabled.
+ * FW handles HBM ECC indications.
+ * Initialized in: linux
+ *
  * CPU_BOOT_DEV_STS0_ENABLED   Device status register enabled.
  * This is a main indication that the
  * running FW populates the device status
@@ -184,6 +189,7 @@
 #define CPU_BOOT_DEV_STS0_PLL_INFO_EN  (1 << 11)
 #define CPU_BOOT_DEV_STS0_SP_SRAM_EN   (1 << 12)
 #define CPU_BOOT_DEV_STS0_CLK_GATE_EN  (1 << 13)
+#define CPU_BOOT_DEV_STS0_HBM_ECC_EN   (1 << 14)
 #define CPU_BOOT_DEV_STS0_ENABLED  (1 << 31)
 
 enum cpu_boot_status {
-- 
2.25.1



[PATCH 3/3] habanalabs: fix ETR security issue

2021-01-26 Thread Oded Gabbay
From: Ohad Sharabi 

ETR should always be non-secured as it is used by the users to record
profiling/trace data.
This patch fixes the configuration to match those requirements.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../misc/habanalabs/gaudi/gaudi_coresight.c| 18 +++---
 drivers/misc/habanalabs/goya/goya_coresight.c  | 11 +--
 .../habanalabs/include/gaudi/gaudi_masks.h |  5 -
 .../include/goya/asic_reg/goya_masks.h |  5 -
 4 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c 
b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
index 88a09d42e111..6e56fa1c6c69 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c
@@ -634,9 +634,21 @@ static int gaudi_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
WREG32(mmPSOC_ETR_MODE, input->sink_mode);
-   /* Workaround for H3 #HW-2075 bug: use small data chunks */
-   WREG32(mmPSOC_ETR_AXICTL, (is_host ? 0 : 0x700) |
-   PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT);
+   if (hdev->asic_prop.fw_security_disabled) {
+   /* make ETR not privileged */
+   val = FIELD_PREP(
+   PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
+   /* make ETR non-secured (inverted logic) */
+   val |= FIELD_PREP(
+   PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK, 1);
+   /*
+* Workaround for H3 #HW-2075 bug: use small data
+* chunks
+*/
+   val |= FIELD_PREP(PSOC_ETR_AXICTL_WRBURSTLEN_MASK,
+   is_host ? 0 : 7);
+   WREG32(mmPSOC_ETR_AXICTL, val);
+   }
WREG32(mmPSOC_ETR_DBALO,
lower_32_bits(input->buffer_address));
WREG32(mmPSOC_ETR_DBAHI,
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c 
b/drivers/misc/habanalabs/goya/goya_coresight.c
index 6fa03933b438..6b7445cca580 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -434,8 +434,15 @@ static int goya_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
WREG32(mmPSOC_ETR_MODE, input->sink_mode);
-   WREG32(mmPSOC_ETR_AXICTL,
-   0x700 | PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT);
+   if (hdev->asic_prop.fw_security_disabled) {
+   /* make ETR not privileged */
+   val = FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
+   /* make ETR non-secured (inverted logic) */
+   val |= FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK, 1);
+   /* burst size 8 */
+   val |= FIELD_PREP(PSOC_ETR_AXICTL_WRBURSTLEN_MASK, 7);
+   WREG32(mmPSOC_ETR_AXICTL, val);
+   }
WREG32(mmPSOC_ETR_DBALO,
lower_32_bits(input->buffer_address));
WREG32(mmPSOC_ETR_DBAHI,
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h 
b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h
index b9b90d079e23..b53aeda9a982 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h
@@ -388,7 +388,10 @@ enum axi_id {
 #define RAZWI_INITIATOR_ID_X_Y_TPC6RAZWI_INITIATOR_ID_X_Y(7, 6)
 #define RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5  RAZWI_INITIATOR_ID_X_Y(8, 6)
 
-#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT   1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT 1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK  0x1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK  0x2
+#define PSOC_ETR_AXICTL_WRBURSTLEN_MASK0xF00
 
 /* STLB_CACHE_INV */
 #define STLB_CACHE_INV_PRODUCER_INDEX_SHIFT  0
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h 
b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
index 067489bd048e..9ff3cb245580 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
@@ -259,6 +259,9 @@
 #define DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
 #define DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
 
-#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT 

[PATCH 2/3] habanalabs: staged submission support

2021-01-26 Thread Oded Gabbay
From: Ofir Bitton 

We introduce a new mechanism named Staged Submission.
This mechanism allows the user to send a whole CS in pieces.
Each CS will not require completion rather than the
last CS. Timeout timer will be triggered upon reception of the first
CS in group.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../habanalabs/common/command_submission.c| 214 --
 drivers/misc/habanalabs/common/habanalabs.h   |   9 +
 drivers/misc/habanalabs/common/hw_queue.c |  27 +++
 drivers/misc/habanalabs/gaudi/gaudi.c |   1 +
 4 files changed, 227 insertions(+), 24 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index 57daff0e59ae..7bd4a03b3429 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -334,6 +334,133 @@ static void complete_job(struct hl_device *hdev, struct 
hl_cs_job *job)
cs_job_put(job);
 }
 
+/*
+ * hl_staged_cs_find_first - locate the first CS in this staged submission
+ *
+ * @hdev: pointer to device structure
+ * @cs_seq: staged submission sequence number
+ *
+ * @note: This function must be called under 'hdev->cs_mirror_lock'
+ *
+ * Find and return a CS pointer with the given sequence
+ */
+struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
+{
+   struct hl_cs *cs;
+
+   list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
+   if (cs->staged_cs && cs->staged_first &&
+   cs->sequence == cs_seq)
+   return cs;
+
+   return NULL;
+}
+
+/*
+ * is_staged_cs_last_exists - returns true if the last CS in sequence exists
+ *
+ * @hdev: pointer to device structure
+ * @cs: staged submission member
+ *
+ */
+bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
+{
+   struct hl_cs *last_entry;
+
+   last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
+   staged_cs_node);
+
+   if (last_entry->staged_last)
+   return true;
+
+   return false;
+}
+
+/*
+ * staged_cs_get - get CS reference if this CS is a part of a staged CS
+ *
+ * @hdev: pointer to device structure
+ * @cs: current CS
+ * @cs_seq: staged submission sequence number
+ *
+ * Increment CS reference for every CS in this staged submission except for
+ * the CS which get completion.
+ */
+static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
+{
+   /* Only the last CS in this staged submission will get a completion.
+* We must increment the reference for all other CS's in this
+* staged submission.
+* Once we get a completion we will release the whole staged submission.
+*/
+   if (!cs->staged_last)
+   cs_get(cs);
+}
+
+/*
+ * staged_cs_put - put a CS in case it is part of staged submission
+ *
+ * @hdev: pointer to device structure
+ * @cs: CS to put
+ *
+ * This function decrements a CS reference (for a non completion CS)
+ */
+static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
+{
+   /* We release all CS's in a staged submission except the last
+* CS which we have never incremented its reference.
+*/
+   if (!cs_needs_completion(cs))
+   cs_put(cs);
+}
+
+static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
+{
+   bool next_entry_found = false;
+   struct hl_cs *next;
+
+   if (!cs_needs_timeout(cs))
+   return;
+
+   spin_lock(&hdev->cs_mirror_lock);
+
+   /* We need to handle tdr only once for the complete staged submission.
+* Hence, we choose the CS that reaches this function first which is
+* the CS marked as 'staged_last'.
+*/
+   if (cs->staged_cs && cs->staged_last)
+   cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
+
+   spin_unlock(&hdev->cs_mirror_lock);
+
+   /* Don't cancel TDR in case this CS was timedout because we might be
+* running from the TDR context
+*/
+   if (cs && (cs->timedout ||
+   hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
+   return;
+
+   if (cs && cs->tdr_active)
+   cancel_delayed_work_sync(&cs->work_tdr);
+
+   spin_lock(&hdev->cs_mirror_lock);
+
+   /* queue TDR for next CS */
+   list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
+   if (cs_needs_timeout(next)) {
+   next_entry_found = true;
+   break;
+   }
+
+   if (next_entry_found && !next->tdr_active) {
+   next->t

[PATCH 1/3] habanalabs: modify device_idle interface

2021-01-26 Thread Oded Gabbay
From: Ohad Sharabi 

Currently this API uses single 64 bits mask for engines idle indication.
Recently, it was observed that more bits are needed for some ASICs.
This patch modifies the use of the idle mask and the idle_extensions
mask.

Signed-off-by: Ohad Sharabi 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/context.c  |  9 +++--
 drivers/misc/habanalabs/common/debugfs.c  |  2 +-
 drivers/misc/habanalabs/common/habanalabs.h   |  4 +-
 .../misc/habanalabs/common/habanalabs_ioctl.c |  5 ++-
 drivers/misc/habanalabs/gaudi/gaudi.c | 37 ---
 drivers/misc/habanalabs/goya/goya.c   | 21 +--
 include/uapi/misc/habanalabs.h|  4 +-
 7 files changed, 40 insertions(+), 42 deletions(-)

diff --git a/drivers/misc/habanalabs/common/context.c 
b/drivers/misc/habanalabs/common/context.c
index 829fe98eed61..cda871afb8f4 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -12,7 +12,7 @@
 static void hl_ctx_fini(struct hl_ctx *ctx)
 {
struct hl_device *hdev = ctx->hdev;
-   u64 idle_mask = 0;
+   u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
int i;
 
/* Release all allocated pending cb's, those cb's were never
@@ -55,10 +55,11 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 
if ((!hdev->pldm) && (hdev->pdev) &&
(!hdev->asic_funcs->is_device_idle(hdev,
-   &idle_mask, NULL)))
+   idle_mask,
+   HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)))
dev_notice(hdev->dev,
-   "device not idle after user context is closed 
(0x%llx)\n",
-   idle_mask);
+   "device not idle after user context is 
closed (0x%llx, 0x%llx)\n",
+   idle_mask[0], idle_mask[1]);
} else {
dev_dbg(hdev->dev, "closing kernel context\n");
hdev->asic_funcs->ctx_fini(ctx);
diff --git a/drivers/misc/habanalabs/common/debugfs.c 
b/drivers/misc/habanalabs/common/debugfs.c
index 9e3c1efe56ba..df847a6d19f4 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -414,7 +414,7 @@ static int engines_show(struct seq_file *s, void *data)
return 0;
}
 
-   hdev->asic_funcs->is_device_idle(hdev, NULL, s);
+   hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s);
 
return 0;
 }
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index fd2fffd20ba1..be7947d69dfa 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -933,8 +933,8 @@ struct hl_asic_funcs {
void (*set_clock_gating)(struct hl_device *hdev);
void (*disable_clock_gating)(struct hl_device *hdev);
int (*debug_coresight)(struct hl_device *hdev, void *data);
-   bool (*is_device_idle)(struct hl_device *hdev, u64 *mask,
-   struct seq_file *s);
+   bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
+   u8 mask_len, struct seq_file *s);
int (*soft_reset_late_init)(struct hl_device *hdev);
void (*hw_queues_lock)(struct hl_device *hdev);
void (*hw_queues_unlock)(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c 
b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 628bdc56dca3..e86f46d4b613 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -145,9 +145,10 @@ static int hw_idle(struct hl_device *hdev, struct 
hl_info_args *args)
return -EINVAL;
 
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
-   &hw_idle.busy_engines_mask_ext, NULL);
+   hw_idle.busy_engines_mask_ext,
+   HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
hw_idle.busy_engines_mask =
-   lower_32_bits(hw_idle.busy_engines_mask_ext);
+   lower_32_bits(hw_idle.busy_engines_mask_ext[0]);
 
return copy_to_user(out, &hw_idle,
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index e1c6072e5fb3..9a3d2fb477a8 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -4538,7 +4538,6 @@ static int gaudi_scrub_device_mem(struct hl_de

[PATCH] habanalabs: add CS completion and timeout properties

2021-01-26 Thread Oded Gabbay
From: Ofir Bitton 

In order to support staged submission feature, we need to
distinguish on which command submission we want to receive
timeout and for which we want to receive completion.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../habanalabs/common/command_submission.c| 104 ++
 drivers/misc/habanalabs/common/habanalabs.h   |  15 ++-
 drivers/misc/habanalabs/common/hw_queue.c |  24 +++-
 3 files changed, 117 insertions(+), 26 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index a5e9bb0a4855..57daff0e59ae 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -149,9 +149,10 @@ void hl_fence_get(struct hl_fence *fence)
kref_get(&fence->refcount);
 }
 
-static void hl_fence_init(struct hl_fence *fence)
+static void hl_fence_init(struct hl_fence *fence, u64 sequence)
 {
kref_init(&fence->refcount);
+   fence->cs_sequence = sequence;
fence->error = 0;
fence->timestamp = ktime_set(0, 0);
init_completion(&fence->completion);
@@ -184,6 +185,28 @@ static void cs_job_put(struct hl_cs_job *job)
kref_put(&job->refcount, cs_job_do_release);
 }
 
+bool cs_needs_completion(struct hl_cs *cs)
+{
+   /* In case this is a staged CS, only the last CS in sequence should
+* get a completion, any non staged CS will always get a completion
+*/
+   if (cs->staged_cs && !cs->staged_last)
+   return false;
+
+   return true;
+}
+
+bool cs_needs_timeout(struct hl_cs *cs)
+{
+   /* In case this is a staged CS, only the first CS in sequence should
+* get a timeout, any non staged CS will always get a timeout
+*/
+   if (cs->staged_cs && !cs->staged_first)
+   return false;
+
+   return true;
+}
+
 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
 {
/*
@@ -225,7 +248,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct 
hl_cs_job *job)
parser.queue_type = job->queue_type;
parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
job->patched_cb = NULL;
-   parser.completion = true;
+   parser.completion = cs_needs_completion(job->cs);
 
rc = hdev->asic_funcs->cs_parser(hdev, &parser);
 
@@ -291,8 +314,21 @@ static void complete_job(struct hl_device *hdev, struct 
hl_cs_job *job)
 
hl_debugfs_remove_job(hdev, job);
 
-   if (job->queue_type == QUEUE_TYPE_EXT ||
-   job->queue_type == QUEUE_TYPE_HW)
+   /* We decrement reference only for a CS that gets completion
+* because the reference was incremented only for this kind of CS
+* right before it was scheduled.
+*
+* In staged submission, only the last CS marked as 'staged_last'
+* gets completion, hence its release function will be called from here.
+* As for all the rest CS's in the staged submission which do not get
+* completion, their CS reference will be decremented by the
+* 'staged_last' CS during the CS release flow.
+* All relevant PQ CI counters will be incremented during the CS release
+* flow by calling 'hl_hw_queue_update_ci'.
+*/
+   if (cs_needs_completion(cs) &&
+   (job->queue_type == QUEUE_TYPE_EXT ||
+   job->queue_type == QUEUE_TYPE_HW))
cs_put(cs);
 
cs_job_put(job);
@@ -347,8 +383,8 @@ static void cs_do_release(struct kref *ref)
 
hdev->asic_funcs->hw_queues_unlock(hdev);
 
-   /* Need to update CI for internal queues */
-   hl_int_hw_queue_update_ci(cs);
+   /* Need to update CI for all queue jobs that does not get completion */
+   hl_hw_queue_update_ci(cs);
 
/* remove CS from CS mirror list */
spin_lock(&hdev->cs_mirror_lock);
@@ -359,6 +395,7 @@ static void cs_do_release(struct kref *ref)
 * running from the TDR context
 */
if (!cs->timedout && hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
+   bool next_entry_found = false;
struct hl_cs *next;
 
if (cs->tdr_active)
@@ -367,10 +404,13 @@ static void cs_do_release(struct kref *ref)
spin_lock(&hdev->cs_mirror_lock);
 
/* queue TDR for next CS */
-   next = list_first_entry_or_null(&hdev->cs_mirror_list,
-   struct hl_cs, mirror_node);
+   list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
+   if (cs_needs_timeout(next)) {
+   next_

[PATCH] habanalabs: add new mem ioctl op for mapping hw blocks

2021-01-24 Thread Oded Gabbay
From: Ofir Bitton 

For future ASIC support the driver allows user to map certain regions
in the device's configuration space for direct access from userspace.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c |  3 +
 drivers/misc/habanalabs/common/habanalabs.h | 16 +++-
 drivers/misc/habanalabs/common/memory.c | 93 -
 drivers/misc/habanalabs/gaudi/gaudi.c   | 15 +++-
 drivers/misc/habanalabs/goya/goya.c | 15 +++-
 include/uapi/misc/habanalabs.h  | 18 +++-
 6 files changed, 150 insertions(+), 10 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index a438279b8691..0926cfb256ef 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -142,6 +142,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct 
*vma)
switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
case HL_MMAP_TYPE_CB:
return hl_cb_mmap(hpriv, vma);
+
+   case HL_MMAP_TYPE_BLOCK:
+   return hl_hw_block_mmap(hpriv, vma);
}
 
return -EINVAL;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 4129e4e6a7b5..e105612ed577 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -28,17 +28,18 @@
 #define HL_NAME"habanalabs"
 
 /* Use upper bits of mmap offset to store habana driver specific information.
- * bits[63:62] - Encode mmap type
+ * bits[63:61] - Encode mmap type
  * bits[45:0]  - mmap offset value
  *
  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  *  defines are w.r.t to PAGE_SIZE
  */
-#define HL_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
-#define HL_MMAP_TYPE_MASK  (0x3ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_SHIFT (61 - PAGE_SHIFT)
+#define HL_MMAP_TYPE_MASK  (0x7ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_BLOCK (0x4ull << HL_MMAP_TYPE_SHIFT)
 #define HL_MMAP_TYPE_CB(0x2ull << HL_MMAP_TYPE_SHIFT)
 
-#define HL_MMAP_OFFSET_VALUE_MASK  (0x3FFFull >> PAGE_SHIFT)
+#define HL_MMAP_OFFSET_VALUE_MASK  (0x1FFFull >> PAGE_SHIFT)
 #define HL_MMAP_OFFSET_VALUE_GET(off)  (off & HL_MMAP_OFFSET_VALUE_MASK)
 
 #define HL_PENDING_RESET_PER_SEC   10
@@ -856,6 +857,8 @@ enum div_select_defs {
  * @descramble_addr: Routine to de-scramble the address prior of
  *  showing it to users.
  * @ack_protection_bits_errors: ack and dump all security violations
+ * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it.
+ * @hw_block_mmap: mmap a HW block with a given id.
  */
 struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -968,6 +971,10 @@ struct hl_asic_funcs {
u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
void (*ack_protection_bits_errors)(struct hl_device *hdev);
+   int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr,
+   u32 *block_id);
+   int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+   u32 block_id, u32 block_size);
 };
 
 
@@ -2167,6 +2174,7 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr 
*mgr,
bool map_cb, u64 *handle);
 int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 
cb_handle);
 int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
 struct hl_cb *hl_cb_get(struct hl_device *hdev,struct hl_cb_mgr *mgr,
u32 handle);
 void hl_cb_put(struct hl_cb *cb);
diff --git a/drivers/misc/habanalabs/common/memory.c 
b/drivers/misc/habanalabs/common/memory.c
index 36de0d05d3a2..7171e8820a2d 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1289,11 +1289,88 @@ static int unmap_device_va(struct hl_ctx *ctx, struct 
hl_mem_in *args,
return rc;
 }
 
+static int map_block(struct hl_device *hdev, u64 address, u64 *handle)
+{
+   u32 block_id = 0;
+   int rc;
+
+   rc = hdev->asic_funcs->get_hw_block_id(hdev, address, &block_id);
+
+   *handle = block_id | HL_MMAP_TYPE_BLOCK;
+   *handle <<= PAGE_SHIFT;
+
+   return rc;
+}
+
+static void hw_block_vm_close(struct vm_area_struct *vma)
+{
+   struct hl_ctx *ctx = (struct hl_ctx *) vma->vm_private_data;
+
+   hl_ctx_put(ctx);
+   vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct hw_block_vm_ops = {
+   .close = hw_block_vm_close
+}

[git pull] habanalabs fixes for 5.11-rc5

2021-01-21 Thread Oded Gabbay
Hi Greg,

This pull request contains three important bug fixes for 5.11-rc5.
One of the fixes prevent a possible host machine crash, another
one prevents random card reset and the third adds a missing
backward compatibility to a uapi. More details are in the tag.

Thanks,
Oded

The following changes since commit cb5c681ab9037e25fcca20689c82cf034566d610:

  intel_th: pci: Add Alder Lake-P support (2021-01-21 18:54:43 +0100)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git 
tags/misc-habanalabs-fixes-2021-01-21

for you to fetch changes up to 2dc4a6d79168e7e426e8ddf8e7219c9ffd13b2b1:

  habanalabs: disable FW events on device removal (2021-01-21 20:30:22 +0200)


This tag contains the following bug fixes for 5.11-rc5/6:

- Clear the fence field in the PCI counters packet before sending
  the packet to the F/W. Not clearing it might cause the driver
  and F/W to get out-of-sync

- Fix backward compatibility in the uapi of IDLE check that is
  part of the INFO IOCTL.

- Tell the F/W to not access the Host (device outbound) while
  the driver removes the device. If that happens, the server
  might crash.


Oded Gabbay (2):
  habanalabs: fix backward compatibility of idle check
  habanalabs: disable FW events on device removal

Ofir Bitton (1):
  habanalabs: zero pci counters packet before submit to FW

 drivers/misc/habanalabs/common/device.c   | 9 +
 drivers/misc/habanalabs/common/firmware_if.c  | 5 +
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 ++
 3 files changed, 16 insertions(+)


[PATCH 3/3] habanalabs: fix MMU debugfs related nodes

2021-01-18 Thread Oded Gabbay
From: farah kassabri 

In mmu debugfs node show un-scrambled physical addresses.
before read/write through data nodes, need to unscramble the
physical address before using it for pci transaction.

Signed-off-by: farah kassabri 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/debugfs.c| 21 --
 drivers/misc/habanalabs/common/habanalabs.h | 14 +++-
 drivers/misc/habanalabs/common/mmu/mmu.c| 74 ++---
 drivers/misc/habanalabs/gaudi/gaudi.c   |  3 +-
 drivers/misc/habanalabs/goya/goya.c |  3 +-
 5 files changed, 93 insertions(+), 22 deletions(-)

diff --git a/drivers/misc/habanalabs/common/debugfs.c 
b/drivers/misc/habanalabs/common/debugfs.c
index 323d0381a60a..9e3c1efe56ba 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -310,8 +310,8 @@ static int mmu_show(struct seq_file *s, void *data)
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
struct hl_ctx *ctx;
-   struct hl_mmu_hop_info hops_info;
-   u64 virt_addr = dev_entry->mmu_addr;
+   struct hl_mmu_hop_info hops_info = {0};
+   u64 virt_addr = dev_entry->mmu_addr, phys_addr;
int i;
 
if (!hdev->mmu_enable)
@@ -333,10 +333,19 @@ static int mmu_show(struct seq_file *s, void *data)
return 0;
}
 
-   seq_printf(s,
-   "asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 0x%llx\n",
-   dev_entry->mmu_asid, dev_entry->mmu_addr,
-   hops_info.scrambled_vaddr);
+   phys_addr = hops_info.hop_info[hops_info.used_hops - 1].hop_pte_val;
+
+   if (hops_info.scrambled_vaddr &&
+   (dev_entry->mmu_addr != hops_info.scrambled_vaddr))
+   seq_printf(s,
+   "asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 
0x%llx,\nphys_addr: 0x%llx, scrambled_phys_addr: 0x%llx\n",
+   dev_entry->mmu_asid, dev_entry->mmu_addr,
+   hops_info.scrambled_vaddr,
+   hops_info.unscrambled_paddr, phys_addr);
+   else
+   seq_printf(s,
+   "asid: %u, virt_addr: 0x%llx, phys_addr: 0x%llx\n",
+   dev_entry->mmu_asid, dev_entry->mmu_addr, phys_addr);
 
for (i = 0 ; i < hops_info.used_hops ; i++) {
seq_printf(s, "hop%d_addr: 0x%llx\n",
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index ce1a1e70a6d5..4129e4e6a7b5 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -851,8 +851,10 @@ enum div_select_defs {
  * @collective_wait_init_cs: Generate collective master/slave packets
  *   and place them in the relevant cs jobs
  * @collective_wait_create_jobs: allocate collective wait cs jobs
- * @scramble_vaddr: Routine to scramble the virtual address prior of mapping it
+ * @scramble_addr: Routine to scramble the address prior of mapping it
  *  in the MMU.
+ * @descramble_addr: Routine to de-scramble the address prior of
+ *  showing it to users.
  * @ack_protection_bits_errors: ack and dump all security violations
  */
 struct hl_asic_funcs {
@@ -963,7 +965,8 @@ struct hl_asic_funcs {
int (*collective_wait_create_jobs)(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
u32 collective_engine_id);
-   u64 (*scramble_vaddr)(struct hl_device *hdev, u64 virt_addr);
+   u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
+   u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
void (*ack_protection_bits_errors)(struct hl_device *hdev);
 };
 
@@ -1726,13 +1729,17 @@ struct hl_mmu_per_hop_info {
  * @scrambled_vaddr: The value of the virtual address after scrambling. This
  *   address replaces the original virtual-address when mapped
  *   in the MMU tables.
+ * @unscrambled_paddr: The un-scrambled physical address.
  * @hop_info: Array holding the per-hop information used for the translation.
  * @used_hops: The number of hops used for the translation.
+ * @range_type: virtual address range type.
  */
 struct hl_mmu_hop_info {
u64 scrambled_vaddr;
+   u64 unscrambled_paddr;
struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS];
u32 used_hops;
+   enum hl_va_range_type range_type;
 };
 
 /**
@@ -,7 +2229,8 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct 
hl_mmu_funcs *mmu);
 int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
 int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
struct hl_mmu_hop_info *hops);
-u64 hl_mmu_scramble_v

[PATCH 1/3] habanalabs: always try to use the hint address

2021-01-18 Thread Oded Gabbay
From: farah kassabri 

Currently hint address is ignored in case va block page size
is not power of 2. We need to support th user hint address also in this
case, but only if the hint address is aligned to page size.

Signed-off-by: farah kassabri 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/memory.c | 133 ++--
 1 file changed, 33 insertions(+), 100 deletions(-)

diff --git a/drivers/misc/habanalabs/common/memory.c 
b/drivers/misc/habanalabs/common/memory.c
index ada977eb136c..585425514cad 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -520,8 +520,8 @@ static inline int add_va_block(struct hl_device *hdev,
 }
 
 /**
- * get_va_block_pow2() - get a virtual block for the given size and alignment
- *   where alignment is a power of 2.
+ * get_va_block() - get a virtual block for the given size and alignment.
+ *
  * @hdev: pointer to the habanalabs device structure.
  * @va_range: pointer to the virtual addresses range.
  * @size: requested block size.
@@ -530,11 +530,11 @@ static inline int add_va_block(struct hl_device *hdev,
  *
  * This function does the following:
  * - Iterate on the virtual block list to find a suitable virtual block for the
- *   given size and alignment.
+ *   given size, hint address and alignment.
  * - Reserve the requested block and update the list.
  * - Return the start address of the virtual block.
  */
-static u64 get_va_block_pow2(struct hl_device *hdev,
+static u64 get_va_block(struct hl_device *hdev,
struct hl_va_range *va_range,
u64 size, u64 hint_addr, u32 va_block_align)
 {
@@ -542,22 +542,37 @@ static u64 get_va_block_pow2(struct hl_device *hdev,
u64 valid_start, valid_size, prev_start, prev_end, align_mask,
reserved_valid_start = 0, reserved_valid_size = 0;
bool add_prev = false;
+   bool is_align_pow_2  = is_power_of_2(va_range->page_size);
 
-   align_mask = ~((u64)va_block_align - 1);
+   if (is_align_pow_2)
+   align_mask = ~((u64)va_block_align - 1);
+   else {
+   /*
+* with non-power-of-2 range we work only with page granularity
+* and the start address is page aligned,
+* so no need for alignment checking.
+*/
+   size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
+   va_range->page_size;
+   }
 
-   /* check if hint_addr is aligned */
-   if (hint_addr & (va_block_align - 1))
+   /* Check if we need to ignore hint address */
+   if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
+   (!is_align_pow_2 && (hint_addr % va_range->page_size))) {
+   dev_info(hdev->dev, "Hint address 0x%llx will be ignored\n",
+   hint_addr);
hint_addr = 0;
+   }
 
mutex_lock(&va_range->lock);
 
print_va_list_locked(hdev, &va_range->list);
 
list_for_each_entry(va_block, &va_range->list, node) {
-   /* calc the first possible aligned addr */
+   /* Calc the first possible aligned addr */
valid_start = va_block->start;
 
-   if (valid_start & (va_block_align - 1)) {
+   if (is_align_pow_2 && (valid_start & (va_block_align - 1))) {
valid_start &= align_mask;
valid_start += va_block_align;
if (valid_start > va_block->end)
@@ -565,9 +580,11 @@ static u64 get_va_block_pow2(struct hl_device *hdev,
}
 
valid_size = va_block->end - valid_start;
+   if (valid_size < size)
+   continue;
 
-   if (valid_size >= size && (!new_va_block ||
-   valid_size < reserved_valid_size)) {
+   /* Pick the minimal length block which has the required size */
+   if (!new_va_block || (valid_size < reserved_valid_size)) {
new_va_block = va_block;
reserved_valid_start = valid_start;
reserved_valid_size = valid_size;
@@ -584,10 +601,14 @@ static u64 get_va_block_pow2(struct hl_device *hdev,
 
if (!new_va_block) {
dev_err(hdev->dev, "no available va block for size %llu\n",
-   size);
+   size);
goto out;
}
 
+   /*
+* Check if there is some leftover range due to reserving the new
+* va block, then return it to the main virtual addresses list.
+*/
if (reserved_val

[PATCH 2/3] habanalabs: add user available interrupt to hw_ip

2021-01-18 Thread Oded Gabbay
From: Ofir Bitton 

In order to support completions that arrive directly to the user,
the driver needs to supply the user with the first available msix
interrupt available.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/habanalabs.h   | 3 +++
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 ++
 drivers/misc/habanalabs/gaudi/gaudi.c | 2 ++
 drivers/misc/habanalabs/goya/goya.c   | 2 ++
 include/uapi/misc/habanalabs.h| 6 --
 5 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index 3923b03e99aa..ce1a1e70a6d5 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -408,6 +408,8 @@ struct hl_mmu_properties {
  * @sync_stream_first_mon: first monitor available for sync stream use
  * @first_available_user_sob: first sob available for the user
  * @first_available_user_mon: first monitor available for the user
+ * @first_available_user_msix_interrupt: first available msix interrupt
+ *   reserved for the user
  * @tpc_enabled_mask: which TPCs are enabled.
  * @completion_queues_count: number of completion queues.
  * @fw_security_disabled: true if security measures are disabled in firmware,
@@ -469,6 +471,7 @@ struct asic_fixed_properties {
u16 sync_stream_first_mon;
u16 first_available_user_sob[HL_MAX_DCORES];
u16 first_available_user_mon[HL_MAX_DCORES];
+   u16 first_available_user_msix_interrupt;
u8  tpc_enabled_mask;
u8  completion_queues_count;
u8  fw_security_disabled;
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c 
b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index e216865b8102..d94825be029c 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -90,6 +90,8 @@ static int hw_ip_info(struct hl_device *hdev, struct 
hl_info_args *args)
hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
 
+   hw_ip.first_available_interrupt_id =
+   prop->first_available_user_msix_interrupt;
return copy_to_user(out, &hw_ip,
min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
 }
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 2b01c081404a..69b3867bc151 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -527,6 +527,8 @@ static int gaudi_get_fixed_properties(struct hl_device 
*hdev)
prop->sync_stream_first_mon +
(num_sync_stream_queues * HL_RSVD_MONS);
 
+   prop->first_available_user_msix_interrupt = USHRT_MAX;
+
/* disable fw security for now, set it in a later stage */
prop->fw_security_disabled = true;
prop->fw_security_status_valid = false;
diff --git a/drivers/misc/habanalabs/goya/goya.c 
b/drivers/misc/habanalabs/goya/goya.c
index 50dcefc02cdd..82f69274def7 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -455,6 +455,8 @@ int goya_get_fixed_properties(struct hl_device *hdev)
 
prop->max_pending_cs = GOYA_MAX_PENDING_CS;
 
+   prop->first_available_user_msix_interrupt = USHRT_MAX;
+
/* disable fw security for now, set it in a later stage */
prop->fw_security_disabled = true;
prop->fw_security_status_valid = false;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index b431a70e1b8b..866355a53188 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -309,7 +309,9 @@ struct hl_info_hw_ip_info {
__u32 num_of_events;
__u32 device_id; /* PCI Device ID */
__u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
-   __u32 reserved[2];
+   __u32 reserved;
+   __u16 first_available_interrupt_id;
+   __u16 reserved2;
__u32 cpld_version;
__u32 psoc_pci_pll_nr;
__u32 psoc_pci_pll_nf;
@@ -320,7 +322,7 @@ struct hl_info_hw_ip_info {
__u8 pad[2];
__u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
-   __u64 reserved2;
+   __u64 reserved3;
__u64 dram_page_size;
 };
 
-- 
2.25.1



[PATCH 1/3] habanalabs: zero pci counters packet before submit to FW

2021-01-18 Thread Oded Gabbay
From: Ofir Bitton 

Driver does not zero some pci counters packets before sending
to FW. This causes an out of sync PI/CI between driver and FW.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index 20f77f58edef..c9a12980218a 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -402,6 +402,10 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
}
counters->rx_throughput = result;
 
+   memset(&pkt, 0, sizeof(pkt));
+   pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
+   CPUCP_PKT_CTL_OPCODE_SHIFT);
+
/* Fetch PCI tx counter */
pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -414,6 +418,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
counters->tx_throughput = result;
 
/* Fetch PCI replay counter */
+   memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
 
-- 
2.25.1



[PATCH 3/3] habanalabs: disable FW events on device removal

2021-01-18 Thread Oded Gabbay
When device is removed, we need to make sure the F/W won't send us
any more events because during the remove process we disable the
interrupts.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index 1ea57d86caa3..69d04eca767f 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1487,6 +1487,15 @@ void hl_device_fini(struct hl_device *hdev)
}
}
 
+   /* Disable PCI access from device F/W so it won't send us additional
+* interrupts. We disable MSI/MSI-X at the halt_engines function and we
+* can't have the F/W sending us interrupts after that. We need to
+* disable the access here because if the device is marked disable, the
+* message won't be send. Also, in case of heartbeat, the device CPU is
+* marked as disable so this message won't be sent
+*/
+   hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
+
/* Mark device as disabled */
hdev->disabled = true;
 
-- 
2.25.1



[PATCH 2/3] habanalabs: fix backward compatibility of idle check

2021-01-18 Thread Oded Gabbay
Need to take the lower 32 bits of the driver's 64-bit idle mask and put
it in the legacy 32-bit variable that the userspace reads to know the
idle mask.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c 
b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 12efbd9d2e3a..d25892d61ec9 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -133,6 +133,8 @@ static int hw_idle(struct hl_device *hdev, struct 
hl_info_args *args)
 
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
&hw_idle.busy_engines_mask_ext, NULL);
+   hw_idle.busy_engines_mask =
+   lower_32_bits(hw_idle.busy_engines_mask_ext);
 
return copy_to_user(out, &hw_idle,
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
-- 
2.25.1



[git pull] habanalabs fixes for 5.11-rc4

2021-01-12 Thread Oded Gabbay
Hi Greg,

This pull request contains three important bug fixes for 5.11-rc4.
Basically the driver won't work without the dma address fix, and 
IMO the two other fixes are also improtant enough to be included 
at this stage.

Thanks,
Oded

The following changes since commit f970d1d01af8606233f47901c1cf39f3ae21fd74:

  Merge tag 'phy-fixes-5.11' of 
git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy into char-misc-next 
(2021-01-11 15:37:40 +0100)

are available in the Git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git 
tags/misc-habanalabs-fixes-2021-01-13

for you to fetch changes up to 9488307a5559255f2fc9a3ab61e1c31e243ca7c6:

  habanalabs: prevent soft lockup during unmap (2021-01-12 15:00:10 +0200)


This tag contains the following bug fixes:

- Fix the dma address that is passed to dma_mmap_coherent. We passed
  an address that includes an offset that is needed by our device and
  that caused dma_mmap_coherent to do an errounous mapping.

- Fix the reset process in case failures happen during the reset process.
  Without this fix, if the user would have asked to perform reset after
  the previous reset failed he would get a kernel panic

- WA to prevent soft lockup BUG during unmap of host memory. In case of
  tens of thousands of mappings, the unmapping can take a long time that
  exceeds the soft lockup timeout. This WA adds a small sleep every 32K
  page unmappings to prevent that.

--------
Oded Gabbay (3):
  habanalabs: fix dma_addr passed to dma_mmap_coherent
  habanalabs: fix reset process in case of failures
  habanalabs: prevent soft lockup during unmap

 drivers/misc/habanalabs/common/device.c |  2 +-
 drivers/misc/habanalabs/common/habanalabs.h |  1 +
 drivers/misc/habanalabs/common/memory.c | 10 --
 drivers/misc/habanalabs/common/mmu.c|  6 +++---
 drivers/misc/habanalabs/common/mmu_v1.c | 12 ++--
 drivers/misc/habanalabs/gaudi/gaudi.c   |  3 ++-
 drivers/misc/habanalabs/goya/goya.c |  3 ++-
 7 files changed, 27 insertions(+), 10 deletions(-)


[PATCH 2/2] CREDITS: update email address and home address

2021-01-12 Thread Oded Gabbay
Update my email address to kernel.org account and my home address
to my new house.

Signed-off-by: Oded Gabbay 
---
 CREDITS | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/CREDITS b/CREDITS
index 090ed4b004a5..ebd00ca9515b 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1240,10 +1240,10 @@ S: 80050-430 - Curitiba - ParanĂ¡
 S: Brazil
 
 N: Oded Gabbay
-E: oded.gab...@gmail.com
-D: HabanaLabs and AMD KFD maintainer
-S: 12 Shraga Raphaeli
-S: Petah-Tikva, 4906418
+E: ogab...@kernel.org
+D: HabanaLabs maintainer
+S: 29 Duchifat St.
+S: Ra'anana 4372029
 S: Israel
 
 N: Kumar Gala
-- 
2.25.1



[PATCH 1/2] habanalabs: update email address in sysfs/debugfs docs

2021-01-12 Thread Oded Gabbay
Use my kernel.org address for contact point instead of my private email
address.

Signed-off-by: Oded Gabbay 
---
 .../ABI/testing/debugfs-driver-habanalabs | 44 +++---
 .../ABI/testing/sysfs-driver-habanalabs   | 58 +--
 2 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs 
b/Documentation/ABI/testing/debugfs-driver-habanalabs
index 3979bfdaa080..d447a611c41b 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -1,7 +1,7 @@
 What:   /sys/kernel/debug/habanalabs/hl/addr
 Date:   Jan 2019
 KernelVersion:  5.1
-Contact:oded.gab...@gmail.com
+Contact:ogab...@kernel.org
 Description:Sets the device address to be used for read or write through
 PCI bar, or the device VA of a host mapped memory to be read or
 written directly from the host. The latter option is allowed
@@ -11,7 +11,7 @@ Description:Sets the device address to be used for read 
or write through
 What:   /sys/kernel/debug/habanalabs/hl/clk_gate
 Date:   May 2020
 KernelVersion:  5.8
-Contact:oded.gab...@gmail.com
+Contact:ogab...@kernel.org
 Description:Allow the root user to disable/enable in runtime the clock
 gating mechanism in Gaudi. Due to how Gaudi is built, the
 clock gating needs to be disabled in order to access the
@@ -34,28 +34,28 @@ Description:Allow the root user to disable/enable in 
runtime the clock
 What:   /sys/kernel/debug/habanalabs/hl/command_buffers
 Date:   Jan 2019
 KernelVersion:  5.1
-Contact:oded.gab...@gmail.com
+Contact:ogab...@kernel.org
 Description:Displays a list with information about the currently allocated
 command buffers
 
 What:   /sys/kernel/debug/habanalabs/hl/command_submission
 Date:   Jan 2019
 KernelVersion:  5.1
-Contact:oded.gab...@gmail.com
+Contact:ogab...@kernel.org
 Description:Displays a list with information about the currently active
 command submissions
 
 What:   /sys/kernel/debug/habanalabs/hl/command_submission_jobs
 Date:   Jan 2019
 KernelVersion:  5.1
-Contact:oded.gab...@gmail.com
+Contact:ogab...@kernel.org
 Description:Displays a list with detailed information about each JOB (CB) 
of
 each active command submission
 
 What:   /sys/kernel/debug/habanalabs/hl/data32
 Date:   Jan 2019
 KernelVersion:  5.1
-Contact:oded.gab...@gmail.com
+Contact:ogab...@kernel.org
 Description:Allows the root user to read or write directly through the
 device's PCI bar. Writing to this file generates a write
 transaction while reading from the file generates a read
@@ -70,7 +70,7 @@ Description:Allows the root user to read or write 
directly through the
 What:   /sys/kernel/debug/habanalabs/hl/data64
 Date:   Jan 2020
 KernelVersion:  5.6
-Contact:oded.gab...@gmail.com
+Contact:ogab...@kernel.org
 Description:Allows the root user to read or write 64 bit data directly
 through the device's PCI bar. Writing to this file generates a
 write transaction while reading from the file generates a read
@@ -85,7 +85,7 @@ Description:Allows the root user to read or write 64 bit 
data directly
 What:   /sys/kernel/debug/habanalabs/hl/device
 Date:   Jan 2019
 KernelVersion:  5.1
-Contact:oded.gab...@gmail.com
+Contact:ogab...@kernel.org
 Description:Enables the root user to set the device to specific state.
 Valid values are "disable", "enable", "suspend", "resume".
 User can read this property to see the valid values
@@ -93,28 +93,28 @@ Description:Enables the root user to set the device to 
specific state.
 What:   /sys/kernel/debug/habanalabs/hl/engines
 Date:   Jul 2019
 KernelVersion:  5.3
-Contact:oded.gab...@gmail.com
+Contact:ogab...@kernel.org
 Description:Displays the status registers values of the device engines and
 their derived idle status
 
 What:   /sys/kernel/debug/habanalabs/hl/i2c_addr
 Date:   Jan 2019
 KernelVersion:  5.1
-Contact:oded.gab...@gmail.com
+Contact:ogab...@kernel.org
 Description:Sets I2C device address for I2C transaction that is generated
 by the device's CPU
 
 What:   /sys/kernel/debug/habanalabs/hl/i2c_bus
 Date:   Jan 2019
 KernelVersion:  5.1
-Contact:oded.gab...@gmail.com
+Contact:ogab...@kernel.org
 Description:Sets I2C bus address for I2C transaction that is generated by
 the device's CPU

[PATCH 3/4] habanalabs: ignore F/W BMC errors in case no BMC present

2021-01-12 Thread Oded Gabbay
From: Ofir Bitton 

In order to support operation mode in which BMC is not active,
driver must not take BMC errors into consideration.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/firmware_if.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c 
b/drivers/misc/habanalabs/common/firmware_if.c
index a6ea5bbeb699..dcd6c3614044 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -308,9 +308,15 @@ static int fw_read_errors(struct hl_device *hdev, u32 
boot_err0_reg,
if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
dev_warn(hdev->dev,
"Device boot warning - Skipped DRAM initialization\n");
-   if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
-   dev_warn(hdev->dev,
-   "Device boot error - Skipped waiting for BMC\n");
+
+   if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
+   if (hdev->bmc_enable)
+   dev_warn(hdev->dev,
+   "Device boot error - Skipped waiting for 
BMC\n");
+   else
+   err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
+   }
+
if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
dev_err(hdev->dev,
"Device boot error - Serdes data from BMC not 
available\n");
-- 
2.25.1



[PATCH 2/4] habanalabs/gaudi: print sync manager SEI interrupt info

2021-01-12 Thread Oded Gabbay
From: Ofir Bitton 

Driver must print sync manager SEI information upon receiving
interrupt from FW.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 41 +++
 .../misc/habanalabs/include/common/cpucp_if.h |  7 
 .../include/gaudi/gaudi_async_events.h|  4 ++
 3 files changed, 52 insertions(+)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 4b602aa7a6a3..126650e3a9ad 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -225,6 +225,12 @@ gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] 
= {
"MSG AXI LBW returned with error"
 };
 
+enum gaudi_sm_sei_cause {
+   GAUDI_SM_SEI_SO_OVERFLOW,
+   GAUDI_SM_SEI_LBW_4B_UNALIGNED,
+   GAUDI_SM_SEI_AXI_RESPONSE_ERR
+};
+
 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
@@ -6845,6 +6851,34 @@ static void gaudi_handle_qman_err_generic(struct 
hl_device *hdev,
}
 }
 
+static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
+   struct hl_eq_sm_sei_data *sei_data)
+{
+   u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
+
+   switch (sei_data->sei_cause) {
+   case GAUDI_SM_SEI_SO_OVERFLOW:
+   dev_err(hdev->dev,
+   "SM %u SEI Error: SO %u overflow/underflow",
+   index, le16_to_cpu(sei_data->sei_log));
+   break;
+   case GAUDI_SM_SEI_LBW_4B_UNALIGNED:
+   dev_err(hdev->dev,
+   "SM %u SEI Error: Unaligned 4B LBW access, monitor 
agent address low - %#x",
+   index, le16_to_cpu(sei_data->sei_log));
+   break;
+   case GAUDI_SM_SEI_AXI_RESPONSE_ERR:
+   dev_err(hdev->dev,
+   "SM %u SEI Error: AXI ID %u response error",
+   index, le16_to_cpu(sei_data->sei_log));
+   break;
+   default:
+   dev_err(hdev->dev, "Unknown SM SEI cause %u",
+   le16_to_cpu(sei_data->sei_log));
+   break;
+   }
+}
+
 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
struct hl_eq_ecc_data *ecc_data)
 {
@@ -7468,6 +7502,13 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
hl_fw_unmask_irq(hdev, event_type);
break;
 
+   case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
+   gaudi_print_irq_info(hdev, event_type, false);
+   gaudi_print_sm_sei_info(hdev, event_type,
+   &eq_entry->sm_sei_data);
+   hl_fw_unmask_irq(hdev, event_type);
+   break;
+
case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
gaudi_print_clk_change_info(hdev, event_type);
hl_fw_unmask_irq(hdev, event_type);
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h 
b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 00bd9b392f93..d75d1077461b 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -58,11 +58,18 @@ struct hl_eq_ecc_data {
__u8 pad[7];
 };
 
+struct hl_eq_sm_sei_data {
+   __le16 sei_log;
+   __u8 sei_cause;
+   __u8 pad[5];
+};
+
 struct hl_eq_entry {
struct hl_eq_header hdr;
union {
struct hl_eq_ecc_data ecc_data;
struct hl_eq_hbm_ecc_data hbm_ecc_data;
+   struct hl_eq_sm_sei_data sm_sei_data;
__le64 data[7];
};
 };
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h 
b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
index 9ccba8437ec9..49335e8334b4 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
@@ -212,6 +212,10 @@ enum gaudi_async_event_id {
GAUDI_EVENT_NIC_SEI_2 = 266,
GAUDI_EVENT_NIC_SEI_3 = 267,
GAUDI_EVENT_NIC_SEI_4 = 268,
+   GAUDI_EVENT_DMA_IF_SEI_0 = 277,
+   GAUDI_EVENT_DMA_IF_SEI_1 = 278,
+   GAUDI_EVENT_DMA_IF_SEI_2 = 279,
+   GAUDI_EVENT_DMA_IF_SEI_3 = 280,
GAUDI_EVENT_PCIE_FLR = 290,
GAUDI_EVENT_TPC0_BMON_SPMU = 300,
GAUDI_EVENT_TPC0_KRN_ERR = 301,
-- 
2.25.1



[PATCH 4/4] habanalabs: add security violations dump to debugfs

2021-01-12 Thread Oded Gabbay
From: Ofir Bitton 

In order to improve driver security debuggability, we add
security violations dump to debugfs.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../ABI/testing/debugfs-driver-habanalabs |  8 +++
 drivers/misc/habanalabs/common/debugfs.c  | 22 +++
 drivers/misc/habanalabs/common/habanalabs.h   |  2 ++
 drivers/misc/habanalabs/gaudi/gaudi.c |  3 ++-
 drivers/misc/habanalabs/gaudi/gaudiP.h|  1 +
 .../misc/habanalabs/gaudi/gaudi_security.c|  5 +
 drivers/misc/habanalabs/goya/goya.c   |  3 ++-
 drivers/misc/habanalabs/goya/goyaP.h  |  1 +
 drivers/misc/habanalabs/goya/goya_security.c  |  5 +
 9 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs 
b/Documentation/ABI/testing/debugfs-driver-habanalabs
index c5d678d39144..3979bfdaa080 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -182,3 +182,11 @@ KernelVersion:  5.6
 Contact:oded.gab...@gmail.com
 Description:Sets the stop-on_error option for the device engines. Value of
 "0" is for disable, otherwise enable.
+
+What:   /sys/kernel/debug/habanalabs/hl/dump_security_violations
+Date:   Jan 2021
+KernelVersion:  5.12
+Contact:oded.gab...@gmail.com
+Description:Dumps all security violations to dmesg. This will also ack
+all security violations meanings those violations will not be
+dumped next time user calls this API
diff --git a/drivers/misc/habanalabs/common/debugfs.c 
b/drivers/misc/habanalabs/common/debugfs.c
index 50ca8eea6648..323d0381a60a 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -867,6 +867,17 @@ static ssize_t hl_stop_on_err_write(struct file *f, const 
char __user *buf,
return count;
 }
 
+static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
+   size_t count, loff_t *ppos)
+{
+   struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+   struct hl_device *hdev = entry->hdev;
+
+   hdev->asic_funcs->ack_protection_bits_errors(hdev);
+
+   return 0;
+}
+
 static const struct file_operations hl_data32b_fops = {
.owner = THIS_MODULE,
.read = hl_data_read32,
@@ -924,6 +935,11 @@ static const struct file_operations hl_stop_on_err_fops = {
.write = hl_stop_on_err_write
 };
 
+static const struct file_operations hl_security_violations_fops = {
+   .owner = THIS_MODULE,
+   .read = hl_security_violations_read
+};
+
 static const struct hl_info_list hl_debugfs_list[] = {
{"command_buffers", command_buffers_show, NULL},
{"command_submission", command_submission_show, NULL},
@@ -1073,6 +1089,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_stop_on_err_fops);
 
+   debugfs_create_file("dump_security_violations",
+   0644,
+   dev_entry->root,
+   dev_entry,
+   &hl_security_violations_fops);
+
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
 
ent = debugfs_create_file(hl_debugfs_list[i].name,
diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index d3c73fc7baf7..454ef3bfe2e7 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -850,6 +850,7 @@ enum div_select_defs {
  * @collective_wait_create_jobs: allocate collective wait cs jobs
  * @scramble_vaddr: Routine to scramble the virtual address prior of mapping it
  *  in the MMU.
+ * @ack_protection_bits_errors: ack and dump all security violations
  */
 struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -960,6 +961,7 @@ struct hl_asic_funcs {
struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
u32 collective_engine_id);
u64 (*scramble_vaddr)(struct hl_device *hdev, u64 virt_addr);
+   void (*ack_protection_bits_errors)(struct hl_device *hdev);
 };
 
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 126650e3a9ad..36e2cc22d108 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -8545,7 +8545,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
.get_device_time = gaudi_get_device_time,
.collective_wait_init_cs = gaudi_collective_wait_init_cs,
.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
- 

[PATCH 1/4] habanalabs: Use 'dma_set_mask_and_coherent()'

2021-01-12 Thread Oded Gabbay
From: Christophe JAILLET 

Axe 'hl_pci_set_dma_mask()' and replace it with an equivalent
'dma_set_mask_and_coherent()' call.

This makes the code a bit less verbose.

It also removes an erroneous comment, because 'hl_pci_set_dma_mask()'
does not try to use a fall-back value.

Signed-off-by: Christophe JAILLET 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/pci/pci.c | 43 
 1 file changed, 7 insertions(+), 36 deletions(-)

diff --git a/drivers/misc/habanalabs/common/pci/pci.c 
b/drivers/misc/habanalabs/common/pci/pci.c
index c56ec1574127..b799f9258fb0 100644
--- a/drivers/misc/habanalabs/common/pci/pci.c
+++ b/drivers/misc/habanalabs/common/pci/pci.c
@@ -307,40 +307,6 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
return rc;
 }
 
-/**
- * hl_pci_set_dma_mask() - Set DMA masks for the device.
- * @hdev: Pointer to hl_device structure.
- *
- * This function sets the DMA masks (regular and consistent) for a specified
- * value. If it doesn't succeed, it tries to set it to a fall-back value
- *
- * Return: 0 on success, non-zero for failure.
- */
-static int hl_pci_set_dma_mask(struct hl_device *hdev)
-{
-   struct pci_dev *pdev = hdev->pdev;
-   int rc;
-
-   /* set DMA mask */
-   rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
-   if (rc) {
-   dev_err(hdev->dev,
-   "Failed to set pci dma mask to %d bits, error %d\n",
-   hdev->dma_mask, rc);
-   return rc;
-   }
-
-   rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
-   if (rc) {
-   dev_err(hdev->dev,
-   "Failed to set pci consistent dma mask to %d bits, 
error %d\n",
-   hdev->dma_mask, rc);
-   return rc;
-   }
-
-   return 0;
-}
-
 /**
  * hl_pci_init() - PCI initialization code.
  * @hdev: Pointer to hl_device structure.
@@ -377,9 +343,14 @@ int hl_pci_init(struct hl_device *hdev)
goto unmap_pci_bars;
}
 
-   rc = hl_pci_set_dma_mask(hdev);
-   if (rc)
+   rc = dma_set_mask_and_coherent(&pdev->dev,
+   DMA_BIT_MASK(hdev->dma_mask));
+   if (rc) {
+   dev_err(hdev->dev,
+   "Failed to set dma mask to %d bits, error %d\n",
+   hdev->dma_mask, rc);
goto unmap_pci_bars;
+   }
 
return 0;
 
-- 
2.25.1



[PATCH 3/3] habanalabs: prevent soft lockup during unmap

2021-01-12 Thread Oded Gabbay
When using Deep learning framework such as tensorflow or pytorch, there
are tens of thousands of host memory mappings. When the user frees
all those mappings at the same time, the process of unmapping and
unpinning them can take a long time, which may cause a soft lockup
bug.

To prevent this, we need to free the core to do other things during
the unmapping process. For now, we chose to do it every 32K unmappings
(each unmap is a single 4K page).

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/habanalabs.h |  1 +
 drivers/misc/habanalabs/common/memory.c | 10 --
 drivers/misc/habanalabs/common/mmu.c|  6 +++---
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h 
b/drivers/misc/habanalabs/common/habanalabs.h
index e0d7f5fbaa5c..60e16dc4bcac 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2182,6 +2182,7 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct 
hl_mmu_funcs *mmu);
 int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
 int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
struct hl_mmu_hop_info *hops);
+bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
 
 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
void __iomem *dst, u32 src_offset, u32 size);
diff --git a/drivers/misc/habanalabs/common/memory.c 
b/drivers/misc/habanalabs/common/memory.c
index cbe9da4e0211..5d4fbdcaefe3 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -886,8 +886,10 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 
vaddr,
 {
struct hl_device *hdev = ctx->hdev;
u64 next_vaddr, i;
+   bool is_host_addr;
u32 page_size;
 
+   is_host_addr = !hl_is_dram_va(hdev, vaddr);
page_size = phys_pg_pack->page_size;
next_vaddr = vaddr;
 
@@ -900,9 +902,13 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 
vaddr,
/*
 * unmapping on Palladium can be really long, so avoid a CPU
 * soft lockup bug by sleeping a little between unmapping pages
+*
+* In addition, when unmapping host memory we pass through
+* the Linux kernel to unpin the pages and that takes a long
+* time. Therefore, sleep every 32K pages to avoid soft lockup
 */
-   if (hdev->pldm)
-   usleep_range(500, 1000);
+   if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0))
+   usleep_range(50, 200);
}
 }
 
diff --git a/drivers/misc/habanalabs/common/mmu.c 
b/drivers/misc/habanalabs/common/mmu.c
index 33ae953d3a36..28a4638741d8 100644
--- a/drivers/misc/habanalabs/common/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu.c
@@ -9,7 +9,7 @@
 
 #include "habanalabs.h"
 
-static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
+bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr)
 {
struct asic_fixed_properties *prop = &hdev->asic_prop;
 
@@ -156,7 +156,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, 
u32 page_size,
if (!hdev->mmu_enable)
return 0;
 
-   is_dram_addr = is_dram_va(hdev, virt_addr);
+   is_dram_addr = hl_is_dram_va(hdev, virt_addr);
 
if (is_dram_addr)
mmu_prop = &prop->dmmu;
@@ -236,7 +236,7 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 
phys_addr,
if (!hdev->mmu_enable)
return 0;
 
-   is_dram_addr = is_dram_va(hdev, virt_addr);
+   is_dram_addr = hl_is_dram_va(hdev, virt_addr);
 
if (is_dram_addr)
mmu_prop = &prop->dmmu;
-- 
2.25.1



[PATCH 2/3] habanalabs: fix reset process in case of failures

2021-01-12 Thread Oded Gabbay
There are some points in the reset process where if the code fails
for some reason, and the system admin tries to initiate the reset
process again we will get a kernel panic.

This is because there aren't any protections in different fini
functions that are called during the reset process.

The protections that are added in this patch make sure that if the fini
functions are called multiple times, without calling init functions
between them, there won't be double release of already released
resources.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/common/device.c |  2 +-
 drivers/misc/habanalabs/common/mmu_v1.c | 12 ++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c 
b/drivers/misc/habanalabs/common/device.c
index 1456eabf9601..1ea57d86caa3 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1037,7 +1037,7 @@ int hl_device_reset(struct hl_device *hdev, bool 
hard_reset,
 
if (hard_reset) {
/* Release kernel context */
-   if (hl_ctx_put(hdev->kernel_ctx) == 1)
+   if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
hdev->kernel_ctx = NULL;
hl_vm_fini(hdev);
hl_mmu_fini(hdev);
diff --git a/drivers/misc/habanalabs/common/mmu_v1.c 
b/drivers/misc/habanalabs/common/mmu_v1.c
index 2ce6ea89d4fa..06d8a44dd5d4 100644
--- a/drivers/misc/habanalabs/common/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu_v1.c
@@ -467,8 +467,16 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
 {
/* MMU H/W fini was already done in device hw_fini() */
 
-   kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
-   gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
+   if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
+   kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
+   gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
+   }
+
+   /* Make sure that if we arrive here again without init was called we
+* won't cause kernel panic. This can happen for example if we fail
+* during hard reset code at certain points
+*/
+   hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
 }
 
 /**
-- 
2.25.1



[PATCH 1/3] habanalabs: fix dma_addr passed to dma_mmap_coherent

2021-01-12 Thread Oded Gabbay
When doing dma_alloc_coherent in the driver, we add a certain hard-coded
offset to the DMA address before returning to the callee function. This
offset is needed when our device use this DMA address to perform
outbound transactions to the host.

However, if we want to map the DMA'able memory to the user via
dma_mmap_coherent(), we need to pass the original dma address, without
this offset. Otherwise, we will get erronouos mapping.

Signed-off-by: Oded Gabbay 
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 3 ++-
 drivers/misc/habanalabs/goya/goya.c   | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c 
b/drivers/misc/habanalabs/gaudi/gaudi.c
index 8c09e4466af8..b328ddaa64ee 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -4002,7 +4002,8 @@ static int gaudi_cb_mmap(struct hl_device *hdev, struct 
vm_area_struct *vma,
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
VM_DONTCOPY | VM_NORESERVE;
 
-   rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
+   rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
+   (dma_addr - HOST_PHYS_BASE), size);
if (rc)
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
 
diff --git a/drivers/misc/habanalabs/goya/goya.c 
b/drivers/misc/habanalabs/goya/goya.c
index b8b4aa636b7c..63679a747d2c 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -2719,7 +2719,8 @@ static int goya_cb_mmap(struct hl_device *hdev, struct 
vm_area_struct *vma,
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
VM_DONTCOPY | VM_NORESERVE;
 
-   rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
+   rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
+   (dma_addr - HOST_PHYS_BASE), size);
if (rc)
dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
 
-- 
2.25.1



Re: [PATCH] habanalabs: Use 'dma_set_mask_and_coherent()' instead of hand-writing it

2021-01-05 Thread Oded Gabbay
On Tue, Dec 29, 2020 at 1:17 PM Christophe JAILLET
 wrote:
>
> Axe 'hl_pci_set_dma_mask()' and replace it with an equivalent
> 'dma_set_mask_and_coherent()' call.
>
> This makes the code a bit less verbose.
>
> It also removes an erroneous comment, because 'hl_pci_set_dma_mask()' does
> not try to use a fall-back value.
>
> Signed-off-by: Christophe JAILLET 
> ---
>  drivers/misc/habanalabs/common/pci.c | 42 
>  1 file changed, 6 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/misc/habanalabs/common/pci.c 
> b/drivers/misc/habanalabs/common/pci.c
> index 923b2606e29f..eb8a784ba863 100644
> --- a/drivers/misc/habanalabs/common/pci.c
> +++ b/drivers/misc/habanalabs/common/pci.c
> @@ -301,40 +301,6 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
> return rc;
>  }
>
> -/**
> - * hl_pci_set_dma_mask() - Set DMA masks for the device.
> - * @hdev: Pointer to hl_device structure.
> - *
> - * This function sets the DMA masks (regular and consistent) for a specified
> - * value. If it doesn't succeed, it tries to set it to a fall-back value
> - *
> - * Return: 0 on success, non-zero for failure.
> - */
> -static int hl_pci_set_dma_mask(struct hl_device *hdev)
> -{
> -   struct pci_dev *pdev = hdev->pdev;
> -   int rc;
> -
> -   /* set DMA mask */
> -   rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
> -   if (rc) {
> -   dev_err(hdev->dev,
> -   "Failed to set pci dma mask to %d bits, error %d\n",
> -   hdev->dma_mask, rc);
> -   return rc;
> -   }
> -
> -   rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
> -   if (rc) {
> -   dev_err(hdev->dev,
> -   "Failed to set pci consistent dma mask to %d bits, 
> error %d\n",
> -   hdev->dma_mask, rc);
> -   return rc;
> -   }
> -
> -   return 0;
> -}
> -
>  /**
>   * hl_pci_init() - PCI initialization code.
>   * @hdev: Pointer to hl_device structure.
> @@ -371,9 +337,13 @@ int hl_pci_init(struct hl_device *hdev)
> goto unmap_pci_bars;
> }
>
> -   rc = hl_pci_set_dma_mask(hdev);
> -   if (rc)
> +   rc = dma_set_mask_and_coherent(&pdev->dev, 
> DMA_BIT_MASK(hdev->dma_mask));
> +   if (rc) {
> +   dev_err(hdev->dev,
> +   "Failed to set dma mask to %d bits, error %d\n",
> +   hdev->dma_mask, rc);
> goto unmap_pci_bars;
> +   }
>
> return 0;
>
> --
> 2.27.0
>

Reviewed-by: Oded Gabbay 
Applied to -next
Oded


[PATCH 2/4] habanalabs: increment ctx ref from within a cs allocation

2021-01-03 Thread Oded Gabbay
From: Ofir Bitton 

A CS must increment the relevant context reference count.
We want to increment the reference inside the CS allocation function
as opposed for today where we increment it outside.
This is logical since we want to avoid explicitly incrementing
the context every time we call the CS allocate function.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../misc/habanalabs/common/command_submission.c   | 15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index f7fac82ac41d..3affb350070c 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -479,6 +479,9 @@ static int allocate_cs(struct hl_device *hdev, struct 
hl_ctx *ctx,
return -ENOMEM;
}
 
+   /* increment refcnt for context */
+   hl_ctx_get(hdev, ctx);
+
cs->ctx = ctx;
cs->submitted = false;
cs->completed = false;
@@ -550,6 +553,7 @@ static int allocate_cs(struct hl_device *hdev, struct 
hl_ctx *ctx,
kfree(cs_cmpl);
 free_cs:
kfree(cs);
+   hl_ctx_put(ctx);
return rc;
 }
 
@@ -827,14 +831,9 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void 
__user *chunks,
if (rc)
goto out;
 
-   /* increment refcnt for context */
-   hl_ctx_get(hdev, hpriv->ctx);
-
rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs);
-   if (rc) {
-   hl_ctx_put(hpriv->ctx);
+   if (rc)
goto free_cs_chunk_array;
-   }
 
cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
*cs_seq = cs->sequence;
@@ -1276,15 +1275,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, 
enum hl_cs_type cs_type,
}
}
 
-   /* increment refcnt for context */
-   hl_ctx_get(hdev, ctx);
-
rc = allocate_cs(hdev, ctx, cs_type, &cs);
if (rc) {
if (cs_type == CS_TYPE_WAIT ||
cs_type == CS_TYPE_COLLECTIVE_WAIT)
hl_fence_put(sig_fence);
-   hl_ctx_put(ctx);
goto free_cs_chunk_array;
}
 
-- 
2.25.1



[PATCH 3/4] habanalabs: add driver support for internal cb scheduling

2021-01-03 Thread Oded Gabbay
From: Ofir Bitton 

In order to support scnenarios in which driver needs access to
HW components but it cannot access them directly, we add support for
scheduling command buffers internally.
These command buffers will be transmitted upon next user command
submission context.

Signed-off-by: Ofir Bitton 
Reviewed-by: Oded Gabbay 
Signed-off-by: Oded Gabbay 
---
 .../habanalabs/common/command_submission.c| 141 ++
 drivers/misc/habanalabs/common/context.c  |   3 +
 drivers/misc/habanalabs/common/device.c   |   2 +
 drivers/misc/habanalabs/common/habanalabs.h   |  26 
 4 files changed, 172 insertions(+)

diff --git a/drivers/misc/habanalabs/common/command_submission.c 
b/drivers/misc/habanalabs/common/command_submission.c
index 3affb350070c..2dd69d6d4782 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -585,6 +585,18 @@ void hl_cs_rollback_all(struct hl_device *hdev)
}
 }
 
+void hl_pending_cb_rollback_all(struct hl_device *hdev)
+{
+   struct hl_pending_cb *pending_cb, *tmp;
+
+   list_for_each_entry_safe(pending_cb, tmp,
+   &hdev->kernel_ctx->pending_cb_list, cb_node) {
+   list_del(&pending_cb->cb_node);
+   hl_cb_put(pending_cb->cb);
+   kfree(pending_cb);
+   }
+}
+
 static void job_wq_completion(struct work_struct *work)
 {
struct hl_cs_job *job = container_of(work, struct hl_cs_job,
@@ -954,6 +966,131 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void 
__user *chunks,
return rc;
 }
 
+static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx,
+   struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id)
+{
+   struct hw_queue_properties *hw_queue_prop;
+   struct hl_cs_counters_atomic *cntr;
+   struct hl_cs_job *job;
+
+   hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id];
+   cntr = &hdev->aggregated_cs_counters;
+
+   job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
+   if (!job) {
+   atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+   atomic64_inc(&cntr->out_of_mem_drop_cnt);
+   dev_err(hdev->dev, "Failed to allocate a new job\n");
+   return -ENOMEM;
+   }
+
+   job->id = 0;
+   job->cs = cs;
+   job->user_cb = cb;
+   atomic_inc(&job->user_cb->cs_cnt);
+   job->user_cb_size = size;
+   job->hw_queue_id = hw_queue_id;
+   job->patched_cb = job->user_cb;
+   job->job_cb_size = job->user_cb_size;
+
+   /* increment refcount as for external queues we get completion */
+   cs_get(cs);
+
+   cs->jobs_in_queue_cnt[job->hw_queue_id]++;
+
+   list_add_tail(&job->cs_node, &cs->job_list);
+
+   hl_debugfs_add_job(hdev, job);
+
+   return 0;
+}
+
+static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
+{
+   struct hw_queue_properties *hw_queue_prop;
+   struct hl_device *hdev = hpriv->hdev;
+   struct hl_ctx *ctx = hpriv->ctx;
+   struct hl_pending_cb *pending_cb, *tmp;
+   struct list_head local_cb_list;
+   struct hl_cs *cs;
+   struct hl_cb *cb;
+   u32 hw_queue_id;
+   u32 cb_size;
+   int process_list, rc = 0;
+
+   if (list_empty(&ctx->pending_cb_list))
+   return 0;
+
+   process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0);
+
+   /* Only a single thread is allowed to process the list */
+   if (!process_list)
+   return 0;
+
+   if (list_empty(&ctx->pending_cb_list))
+   goto free_pending_cb_token;
+
+   /* move all list elements to a local list */
+   INIT_LIST_HEAD(&local_cb_list);
+   spin_lock(&ctx->pending_cb_lock);
+   list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list,
+   cb_node)
+   list_move_tail(&pending_cb->cb_node, &local_cb_list);
+   spin_unlock(&ctx->pending_cb_lock);
+
+   rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, &cs);
+   if (rc)
+   goto add_list_elements;
+
+   hl_debugfs_add_cs(cs);
+
+   /* Iterate through pending cb list, create jobs and add to CS */
+   list_for_each_entry(pending_cb, &local_cb_list, cb_node) {
+   cb = pending_cb->cb;
+   cb_size = pending_cb->cb_size;
+   hw_queue_id = pending_cb->hw_queue_id;
+   hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id];
+
+   rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size,
+   hw_queue_id);
+

  1   2   3   4   5   6   7   8   9   10   >