[PATCH v3 2/4] drm/v3d: fix up register addresses for V3D 7.x

2023-10-31 Thread Iago Toral Quiroga
This patch updates a number of register addresses that have
been changed in Raspberry Pi 5 (V3D 7.1) and updates the
code to use the corresponding registers and addresses based
on the actual V3D version.

Signed-off-by: Iago Toral Quiroga 
Reviewed-by: Maíra Canal 
---
 drivers/gpu/drm/v3d/v3d_debugfs.c | 178 +-
 drivers/gpu/drm/v3d/v3d_gem.c |   4 +-
 drivers/gpu/drm/v3d/v3d_irq.c |  46 
 drivers/gpu/drm/v3d/v3d_regs.h|  94 +---
 drivers/gpu/drm/v3d/v3d_sched.c   |  38 ---
 5 files changed, 204 insertions(+), 156 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c 
b/drivers/gpu/drm/v3d/v3d_debugfs.c
index 330669f51fa7..f843a50d5dce 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -12,69 +12,83 @@
 #include "v3d_drv.h"
 #include "v3d_regs.h"
 
-#define REGDEF(reg) { reg, #reg }
+#define REGDEF(min_ver, max_ver, reg) { min_ver, max_ver, reg, #reg }
 struct v3d_reg_def {
+   u32 min_ver;
+   u32 max_ver;
u32 reg;
const char *name;
 };
 
 static const struct v3d_reg_def v3d_hub_reg_defs[] = {
-   REGDEF(V3D_HUB_AXICFG),
-   REGDEF(V3D_HUB_UIFCFG),
-   REGDEF(V3D_HUB_IDENT0),
-   REGDEF(V3D_HUB_IDENT1),
-   REGDEF(V3D_HUB_IDENT2),
-   REGDEF(V3D_HUB_IDENT3),
-   REGDEF(V3D_HUB_INT_STS),
-   REGDEF(V3D_HUB_INT_MSK_STS),
-
-   REGDEF(V3D_MMU_CTL),
-   REGDEF(V3D_MMU_VIO_ADDR),
-   REGDEF(V3D_MMU_VIO_ID),
-   REGDEF(V3D_MMU_DEBUG_INFO),
+   REGDEF(33, 42, V3D_HUB_AXICFG),
+   REGDEF(33, 71, V3D_HUB_UIFCFG),
+   REGDEF(33, 71, V3D_HUB_IDENT0),
+   REGDEF(33, 71, V3D_HUB_IDENT1),
+   REGDEF(33, 71, V3D_HUB_IDENT2),
+   REGDEF(33, 71, V3D_HUB_IDENT3),
+   REGDEF(33, 71, V3D_HUB_INT_STS),
+   REGDEF(33, 71, V3D_HUB_INT_MSK_STS),
+
+   REGDEF(33, 71, V3D_MMU_CTL),
+   REGDEF(33, 71, V3D_MMU_VIO_ADDR),
+   REGDEF(33, 71, V3D_MMU_VIO_ID),
+   REGDEF(33, 71, V3D_MMU_DEBUG_INFO),
+
+   REGDEF(71, 71, V3D_GMP_STATUS(71)),
+   REGDEF(71, 71, V3D_GMP_CFG(71)),
+   REGDEF(71, 71, V3D_GMP_VIO_ADDR(71)),
 };
 
 static const struct v3d_reg_def v3d_gca_reg_defs[] = {
-   REGDEF(V3D_GCA_SAFE_SHUTDOWN),
-   REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK),
+   REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN),
+   REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN_ACK),
 };
 
 static const struct v3d_reg_def v3d_core_reg_defs[] = {
-   REGDEF(V3D_CTL_IDENT0),
-   REGDEF(V3D_CTL_IDENT1),
-   REGDEF(V3D_CTL_IDENT2),
-   REGDEF(V3D_CTL_MISCCFG),
-   REGDEF(V3D_CTL_INT_STS),
-   REGDEF(V3D_CTL_INT_MSK_STS),
-   REGDEF(V3D_CLE_CT0CS),
-   REGDEF(V3D_CLE_CT0CA),
-   REGDEF(V3D_CLE_CT0EA),
-   REGDEF(V3D_CLE_CT1CS),
-   REGDEF(V3D_CLE_CT1CA),
-   REGDEF(V3D_CLE_CT1EA),
-
-   REGDEF(V3D_PTB_BPCA),
-   REGDEF(V3D_PTB_BPCS),
-
-   REGDEF(V3D_GMP_STATUS),
-   REGDEF(V3D_GMP_CFG),
-   REGDEF(V3D_GMP_VIO_ADDR),
-
-   REGDEF(V3D_ERR_FDBGO),
-   REGDEF(V3D_ERR_FDBGB),
-   REGDEF(V3D_ERR_FDBGS),
-   REGDEF(V3D_ERR_STAT),
+   REGDEF(33, 71, V3D_CTL_IDENT0),
+   REGDEF(33, 71, V3D_CTL_IDENT1),
+   REGDEF(33, 71, V3D_CTL_IDENT2),
+   REGDEF(33, 71, V3D_CTL_MISCCFG),
+   REGDEF(33, 71, V3D_CTL_INT_STS),
+   REGDEF(33, 71, V3D_CTL_INT_MSK_STS),
+   REGDEF(33, 71, V3D_CLE_CT0CS),
+   REGDEF(33, 71, V3D_CLE_CT0CA),
+   REGDEF(33, 71, V3D_CLE_CT0EA),
+   REGDEF(33, 71, V3D_CLE_CT1CS),
+   REGDEF(33, 71, V3D_CLE_CT1CA),
+   REGDEF(33, 71, V3D_CLE_CT1EA),
+
+   REGDEF(33, 71, V3D_PTB_BPCA),
+   REGDEF(33, 71, V3D_PTB_BPCS),
+
+   REGDEF(33, 41, V3D_GMP_STATUS(33)),
+   REGDEF(33, 41, V3D_GMP_CFG(33)),
+   REGDEF(33, 41, V3D_GMP_VIO_ADDR(33)),
+
+   REGDEF(33, 71, V3D_ERR_FDBGO),
+   REGDEF(33, 71, V3D_ERR_FDBGB),
+   REGDEF(33, 71, V3D_ERR_FDBGS),
+   REGDEF(33, 71, V3D_ERR_STAT),
 };
 
 static const struct v3d_reg_def v3d_csd_reg_defs[] = {
-   REGDEF(V3D_CSD_STATUS),
-   REGDEF(V3D_CSD_CURRENT_CFG0),
-   REGDEF(V3D_CSD_CURRENT_CFG1),
-   REGDEF(V3D_CSD_CURRENT_CFG2),
-   REGDEF(V3D_CSD_CURRENT_CFG3),
-   REGDEF(V3D_CSD_CURRENT_CFG4),
-   REGDEF(V3D_CSD_CURRENT_CFG5),
-   REGDEF(V3D_CSD_CURRENT_CFG6),
+   REGDEF(41, 71, V3D_CSD_STATUS),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG0(41)),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG1(41)),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG2(41)),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG3(41)),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG4(41)),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG5(41)),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG6(41)),
+   REGDEF(71, 71, V3D_CSD_CURRENT_CFG0(71)),
+   REGDEF(71, 71, V3D_CSD_CURRENT_CFG1(71)),
+   REGDEF(71, 71, V3D_CSD_CURRENT_CFG2(71)),
+   REGDEF(71, 71, V3D_CSD_CURRENT_CFG3(71)),
+   REGDEF(71, 71, V3D

[PATCH v3 0/4] V3D module changes for Pi5

2023-10-31 Thread Iago Toral Quiroga
This series includes patches to update the V3D kernel module
that drives the VideoCore VI GPU in Raspberry Pi 4 to also support
the Video Core VII iteration present in Raspberry Pi 5.

The first patch in the series adds a small uAPI update required for
TFU jobs, the second patch addresses the bulk of the work and
involves mostly updates to register addresses, the third and fourth
patches match the 'brcm,2712-v3d' device string from Pi5 with the
V3D driver.

The changes for the user-space driver can be found in the
corresponding Mesa MR here:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25450

Changes in v2:
  - Addeded s-o-b to patches (Maíra Canal)
  - patch 2: fixed style warnings (Maíra Canal)
  - patch 2: Use macro with version param to get reg addresses (Maíra Canal)
  - new patch: Update the device tree binding (Stefan Wahren)

Changes in v3:
  - Moved changelog entries in patches to cover letter (Stefan Wahren)
  - Added DT maintainers (Stefan Wahren, Krzysztof Kozlowski)

Iago Toral Quiroga (4):
  drm/v3d: update UAPI to match user-space for V3D 7.x
  drm/v3d: fix up register addresses for V3D 7.x
  dt-bindings: gpu: v3d: Add BCM2712's compatible
  drm/v3d: add brcm,2712-v3d as a compatible V3D device

 .../devicetree/bindings/gpu/brcm,bcm-v3d.yaml |   1 +
 drivers/gpu/drm/v3d/v3d_debugfs.c | 178 ++
 drivers/gpu/drm/v3d/v3d_drv.c |   1 +
 drivers/gpu/drm/v3d/v3d_gem.c |   4 +-
 drivers/gpu/drm/v3d/v3d_irq.c |  46 +++--
 drivers/gpu/drm/v3d/v3d_regs.h|  94 +
 drivers/gpu/drm/v3d/v3d_sched.c   |  38 ++--
 include/uapi/drm/v3d_drm.h|   5 +
 8 files changed, 211 insertions(+), 156 deletions(-)

-- 
2.39.2



[PATCH v3 1/4] drm/v3d: update UAPI to match user-space for V3D 7.x

2023-10-31 Thread Iago Toral Quiroga
V3D 7.x takes a new parameter to configure TFU jobs that needs
to be provided by user space.

Signed-off-by: Iago Toral Quiroga 
Reviewed-by: Maíra Canal 
---
v2: added s-o-b, fixed typo in commit message (Maíra Canal)

 include/uapi/drm/v3d_drm.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 3dfc0af8756a..1a7d7a689de3 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -319,6 +319,11 @@ struct drm_v3d_submit_tfu {
 
/* Pointer to an array of ioctl extensions*/
__u64 extensions;
+
+   struct {
+   __u32 ioc;
+   __u32 pad;
+   } v71;
 };
 
 /* Submits a compute shader for dispatch.  This job will block on any
-- 
2.39.2



[PATCH v3 3/4] dt-bindings: gpu: v3d: Add BCM2712's compatible

2023-10-31 Thread Iago Toral Quiroga
BCM2712, Raspberry Pi 5's SoC, contains a V3D core. So add its specific
compatible to the bindings.

Signed-off-by: Iago Toral Quiroga 
Reviewed-by: Maíra Canal 
---
 Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml 
b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
index dae55b8a267b..dc078ceeca9a 100644
--- a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
+++ b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
@@ -17,6 +17,7 @@ properties:
   compatible:
 enum:
   - brcm,2711-v3d
+  - brcm,2712-v3d
   - brcm,7268-v3d
   - brcm,7278-v3d
 
-- 
2.39.2



[PATCH v3 4/4] drm/v3d: add brcm,2712-v3d as a compatible V3D device

2023-10-31 Thread Iago Toral Quiroga
This is required to get the V3D module to load with Raspberry Pi 5.

Signed-off-by: Iago Toral Quiroga 
Reviewed-by: Stefan Wahren 
Reviewed-by: Maíra Canal 
---
 drivers/gpu/drm/v3d/v3d_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index ffbbe9d527d3..1ab46bdf8ad7 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -187,6 +187,7 @@ static const struct drm_driver v3d_drm_driver = {
 
 static const struct of_device_id v3d_of_match[] = {
{ .compatible = "brcm,2711-v3d" },
+   { .compatible = "brcm,2712-v3d" },
{ .compatible = "brcm,7268-v3d" },
{ .compatible = "brcm,7278-v3d" },
{},
-- 
2.39.2



[PATCH v2 4/4] drm/v3d: add brcm,2712-v3d as a compatible V3D device

2023-10-30 Thread Iago Toral Quiroga
This is required to get the V3D module to load with Raspberry Pi 5.

v2:
 - added s-o-b and commit message. (Maíra)
 - keep order of compatible strings. (Stefan Wahren)

Signed-off-by: Iago Toral Quiroga 
---
 drivers/gpu/drm/v3d/v3d_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index ffbbe9d527d3..1ab46bdf8ad7 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -187,6 +187,7 @@ static const struct drm_driver v3d_drm_driver = {
 
 static const struct of_device_id v3d_of_match[] = {
{ .compatible = "brcm,2711-v3d" },
+   { .compatible = "brcm,2712-v3d" },
{ .compatible = "brcm,7268-v3d" },
{ .compatible = "brcm,7278-v3d" },
{},
-- 
2.39.2



[PATCH v2 2/4] drm/v3d: fix up register addresses for V3D 7.x

2023-10-30 Thread Iago Toral Quiroga
This patch updates a number of register addresses that have
been changed in Raspberry Pi 5 (V3D 7.1) and updates the
code to use the corresponding registers and addresses based
on the actual V3D version.

v2:
 - added s-o-b and commit message. (Maíra Canal)
 - Used macro that takes version as argument and returns
   appropriate values instead of two different definitions
   for post-v71 and pre-v71 hardware when possible. (Maíra Canal)
 - fixed style warnings from checkpatch.pl. (Maíra Canal)

Signed-off-by: Iago Toral Quiroga 
---
 drivers/gpu/drm/v3d/v3d_debugfs.c | 178 +-
 drivers/gpu/drm/v3d/v3d_gem.c |   4 +-
 drivers/gpu/drm/v3d/v3d_irq.c |  46 
 drivers/gpu/drm/v3d/v3d_regs.h|  94 +---
 drivers/gpu/drm/v3d/v3d_sched.c   |  38 ---
 5 files changed, 204 insertions(+), 156 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c 
b/drivers/gpu/drm/v3d/v3d_debugfs.c
index 330669f51fa7..f843a50d5dce 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -12,69 +12,83 @@
 #include "v3d_drv.h"
 #include "v3d_regs.h"
 
-#define REGDEF(reg) { reg, #reg }
+#define REGDEF(min_ver, max_ver, reg) { min_ver, max_ver, reg, #reg }
 struct v3d_reg_def {
+   u32 min_ver;
+   u32 max_ver;
u32 reg;
const char *name;
 };
 
 static const struct v3d_reg_def v3d_hub_reg_defs[] = {
-   REGDEF(V3D_HUB_AXICFG),
-   REGDEF(V3D_HUB_UIFCFG),
-   REGDEF(V3D_HUB_IDENT0),
-   REGDEF(V3D_HUB_IDENT1),
-   REGDEF(V3D_HUB_IDENT2),
-   REGDEF(V3D_HUB_IDENT3),
-   REGDEF(V3D_HUB_INT_STS),
-   REGDEF(V3D_HUB_INT_MSK_STS),
-
-   REGDEF(V3D_MMU_CTL),
-   REGDEF(V3D_MMU_VIO_ADDR),
-   REGDEF(V3D_MMU_VIO_ID),
-   REGDEF(V3D_MMU_DEBUG_INFO),
+   REGDEF(33, 42, V3D_HUB_AXICFG),
+   REGDEF(33, 71, V3D_HUB_UIFCFG),
+   REGDEF(33, 71, V3D_HUB_IDENT0),
+   REGDEF(33, 71, V3D_HUB_IDENT1),
+   REGDEF(33, 71, V3D_HUB_IDENT2),
+   REGDEF(33, 71, V3D_HUB_IDENT3),
+   REGDEF(33, 71, V3D_HUB_INT_STS),
+   REGDEF(33, 71, V3D_HUB_INT_MSK_STS),
+
+   REGDEF(33, 71, V3D_MMU_CTL),
+   REGDEF(33, 71, V3D_MMU_VIO_ADDR),
+   REGDEF(33, 71, V3D_MMU_VIO_ID),
+   REGDEF(33, 71, V3D_MMU_DEBUG_INFO),
+
+   REGDEF(71, 71, V3D_GMP_STATUS(71)),
+   REGDEF(71, 71, V3D_GMP_CFG(71)),
+   REGDEF(71, 71, V3D_GMP_VIO_ADDR(71)),
 };
 
 static const struct v3d_reg_def v3d_gca_reg_defs[] = {
-   REGDEF(V3D_GCA_SAFE_SHUTDOWN),
-   REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK),
+   REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN),
+   REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN_ACK),
 };
 
 static const struct v3d_reg_def v3d_core_reg_defs[] = {
-   REGDEF(V3D_CTL_IDENT0),
-   REGDEF(V3D_CTL_IDENT1),
-   REGDEF(V3D_CTL_IDENT2),
-   REGDEF(V3D_CTL_MISCCFG),
-   REGDEF(V3D_CTL_INT_STS),
-   REGDEF(V3D_CTL_INT_MSK_STS),
-   REGDEF(V3D_CLE_CT0CS),
-   REGDEF(V3D_CLE_CT0CA),
-   REGDEF(V3D_CLE_CT0EA),
-   REGDEF(V3D_CLE_CT1CS),
-   REGDEF(V3D_CLE_CT1CA),
-   REGDEF(V3D_CLE_CT1EA),
-
-   REGDEF(V3D_PTB_BPCA),
-   REGDEF(V3D_PTB_BPCS),
-
-   REGDEF(V3D_GMP_STATUS),
-   REGDEF(V3D_GMP_CFG),
-   REGDEF(V3D_GMP_VIO_ADDR),
-
-   REGDEF(V3D_ERR_FDBGO),
-   REGDEF(V3D_ERR_FDBGB),
-   REGDEF(V3D_ERR_FDBGS),
-   REGDEF(V3D_ERR_STAT),
+   REGDEF(33, 71, V3D_CTL_IDENT0),
+   REGDEF(33, 71, V3D_CTL_IDENT1),
+   REGDEF(33, 71, V3D_CTL_IDENT2),
+   REGDEF(33, 71, V3D_CTL_MISCCFG),
+   REGDEF(33, 71, V3D_CTL_INT_STS),
+   REGDEF(33, 71, V3D_CTL_INT_MSK_STS),
+   REGDEF(33, 71, V3D_CLE_CT0CS),
+   REGDEF(33, 71, V3D_CLE_CT0CA),
+   REGDEF(33, 71, V3D_CLE_CT0EA),
+   REGDEF(33, 71, V3D_CLE_CT1CS),
+   REGDEF(33, 71, V3D_CLE_CT1CA),
+   REGDEF(33, 71, V3D_CLE_CT1EA),
+
+   REGDEF(33, 71, V3D_PTB_BPCA),
+   REGDEF(33, 71, V3D_PTB_BPCS),
+
+   REGDEF(33, 41, V3D_GMP_STATUS(33)),
+   REGDEF(33, 41, V3D_GMP_CFG(33)),
+   REGDEF(33, 41, V3D_GMP_VIO_ADDR(33)),
+
+   REGDEF(33, 71, V3D_ERR_FDBGO),
+   REGDEF(33, 71, V3D_ERR_FDBGB),
+   REGDEF(33, 71, V3D_ERR_FDBGS),
+   REGDEF(33, 71, V3D_ERR_STAT),
 };
 
 static const struct v3d_reg_def v3d_csd_reg_defs[] = {
-   REGDEF(V3D_CSD_STATUS),
-   REGDEF(V3D_CSD_CURRENT_CFG0),
-   REGDEF(V3D_CSD_CURRENT_CFG1),
-   REGDEF(V3D_CSD_CURRENT_CFG2),
-   REGDEF(V3D_CSD_CURRENT_CFG3),
-   REGDEF(V3D_CSD_CURRENT_CFG4),
-   REGDEF(V3D_CSD_CURRENT_CFG5),
-   REGDEF(V3D_CSD_CURRENT_CFG6),
+   REGDEF(41, 71, V3D_CSD_STATUS),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG0(41)),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG1(41)),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG2(41)),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG3(41)),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG4(41)),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG5(41)),
+   REGDEF(41, 41, 

[PATCH v2 3/4] dt-bindings: gpu: v3d: Add BCM2712's compatible

2023-10-30 Thread Iago Toral Quiroga
BCM2712, Raspberry Pi 5's SoC, contains a V3D core. So add its specific
compatible to the bindings.

v2: new, requested by Stefan Wahren.

Signed-off-by: Iago Toral Quiroga 
---
 Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml 
b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
index dae55b8a267b..dc078ceeca9a 100644
--- a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
+++ b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
@@ -17,6 +17,7 @@ properties:
   compatible:
 enum:
   - brcm,2711-v3d
+  - brcm,2712-v3d
   - brcm,7268-v3d
   - brcm,7278-v3d
 
-- 
2.39.2



[PATCH v2 1/4] drm/v3d: update UAPI to match user-space for V3D 7.x

2023-10-30 Thread Iago Toral Quiroga
V3D 7.x takes a new parameter to configure TFU jobs that needs
to be provided by user space.

v2: added s-o-b, fixed typo in commit message (Maíra Canal)

Signed-off-by: Iago Toral Quiroga 
---
 include/uapi/drm/v3d_drm.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 3dfc0af8756a..1a7d7a689de3 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -319,6 +319,11 @@ struct drm_v3d_submit_tfu {
 
/* Pointer to an array of ioctl extensions*/
__u64 extensions;
+
+   struct {
+   __u32 ioc;
+   __u32 pad;
+   } v71;
 };
 
 /* Submits a compute shader for dispatch.  This job will block on any
-- 
2.39.2



[PATCH v2 0/4] V3D module changes for Pi5

2023-10-30 Thread Iago Toral Quiroga
This series includes patches to update the V3D kernel module
that drives the VideoCore VI GPU in Raspberry Pi 4 to also support
the Video Core VII iteration present in Raspberry Pi 5.

The first patch in the series adds a small uAPI update required for
TFU jobs, the second patch addresses the bulk of the work and
involves mostly updates to register addresses, the third and fourth
patches match the 'brcm,2712-v3d' device string from Pi5 with the
V3D driver.

The changes for the user-space driver can be found in the
corresponding Mesa MR here:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25450

Iago Toral Quiroga (4):
  drm/v3d: update UAPI to match user-space for V3D 7.x
  drm/v3d: fix up register addresses for V3D 7.x
  dt-bindings: gpu: v3d: Add BCM2712's compatible
  drm/v3d: add brcm,2712-v3d as a compatible V3D device

 .../devicetree/bindings/gpu/brcm,bcm-v3d.yaml |   1 +
 drivers/gpu/drm/v3d/v3d_debugfs.c | 178 ++
 drivers/gpu/drm/v3d/v3d_drv.c |   1 +
 drivers/gpu/drm/v3d/v3d_gem.c |   4 +-
 drivers/gpu/drm/v3d/v3d_irq.c |  46 +++--
 drivers/gpu/drm/v3d/v3d_regs.h|  94 +
 drivers/gpu/drm/v3d/v3d_sched.c   |  38 ++--
 include/uapi/drm/v3d_drm.h|   5 +
 8 files changed, 211 insertions(+), 156 deletions(-)

-- 
2.39.2



[PATCH 2/3] drm/v3d: update UAPI to match user-space for V3D 7.x

2023-09-28 Thread Iago Toral Quiroga
V3D t.x takes a new parameter to configure TFU jobs that needs
to be provided by user space.
---
 include/uapi/drm/v3d_drm.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 3dfc0af8756a..1a7d7a689de3 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -319,6 +319,11 @@ struct drm_v3d_submit_tfu {
 
/* Pointer to an array of ioctl extensions*/
__u64 extensions;
+
+   struct {
+   __u32 ioc;
+   __u32 pad;
+   } v71;
 };
 
 /* Submits a compute shader for dispatch.  This job will block on any
-- 
2.39.2



[PATCH 1/3] drm/v3d: fix up register addresses for V3D 7.x

2023-09-28 Thread Iago Toral Quiroga
---
 drivers/gpu/drm/v3d/v3d_debugfs.c | 173 +-
 drivers/gpu/drm/v3d/v3d_gem.c |   3 +
 drivers/gpu/drm/v3d/v3d_irq.c |  47 
 drivers/gpu/drm/v3d/v3d_regs.h|  51 -
 drivers/gpu/drm/v3d/v3d_sched.c   |  41 ---
 5 files changed, 200 insertions(+), 115 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c 
b/drivers/gpu/drm/v3d/v3d_debugfs.c
index 330669f51fa7..90b2b5b2710c 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -12,69 +12,83 @@
 #include "v3d_drv.h"
 #include "v3d_regs.h"
 
-#define REGDEF(reg) { reg, #reg }
+#define REGDEF(min_ver, max_ver, reg) { min_ver, max_ver, reg, #reg }
 struct v3d_reg_def {
+   u32 min_ver;
+   u32 max_ver;
u32 reg;
const char *name;
 };
 
 static const struct v3d_reg_def v3d_hub_reg_defs[] = {
-   REGDEF(V3D_HUB_AXICFG),
-   REGDEF(V3D_HUB_UIFCFG),
-   REGDEF(V3D_HUB_IDENT0),
-   REGDEF(V3D_HUB_IDENT1),
-   REGDEF(V3D_HUB_IDENT2),
-   REGDEF(V3D_HUB_IDENT3),
-   REGDEF(V3D_HUB_INT_STS),
-   REGDEF(V3D_HUB_INT_MSK_STS),
-
-   REGDEF(V3D_MMU_CTL),
-   REGDEF(V3D_MMU_VIO_ADDR),
-   REGDEF(V3D_MMU_VIO_ID),
-   REGDEF(V3D_MMU_DEBUG_INFO),
+   REGDEF(33, 42, V3D_HUB_AXICFG),
+   REGDEF(33, 71, V3D_HUB_UIFCFG),
+   REGDEF(33, 71, V3D_HUB_IDENT0),
+   REGDEF(33, 71, V3D_HUB_IDENT1),
+   REGDEF(33, 71, V3D_HUB_IDENT2),
+   REGDEF(33, 71, V3D_HUB_IDENT3),
+   REGDEF(33, 71, V3D_HUB_INT_STS),
+   REGDEF(33, 71, V3D_HUB_INT_MSK_STS),
+
+   REGDEF(33, 71, V3D_MMU_CTL),
+   REGDEF(33, 71, V3D_MMU_VIO_ADDR),
+   REGDEF(33, 71, V3D_MMU_VIO_ID),
+   REGDEF(33, 71, V3D_MMU_DEBUG_INFO),
+
+   REGDEF(71, 71, V3D_V7_GMP_STATUS),
+   REGDEF(71, 71, V3D_V7_GMP_CFG),
+   REGDEF(71, 71, V3D_V7_GMP_VIO_ADDR),
 };
 
 static const struct v3d_reg_def v3d_gca_reg_defs[] = {
-   REGDEF(V3D_GCA_SAFE_SHUTDOWN),
-   REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK),
+   REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN),
+   REGDEF(33, 33, V3D_GCA_SAFE_SHUTDOWN_ACK),
 };
 
 static const struct v3d_reg_def v3d_core_reg_defs[] = {
-   REGDEF(V3D_CTL_IDENT0),
-   REGDEF(V3D_CTL_IDENT1),
-   REGDEF(V3D_CTL_IDENT2),
-   REGDEF(V3D_CTL_MISCCFG),
-   REGDEF(V3D_CTL_INT_STS),
-   REGDEF(V3D_CTL_INT_MSK_STS),
-   REGDEF(V3D_CLE_CT0CS),
-   REGDEF(V3D_CLE_CT0CA),
-   REGDEF(V3D_CLE_CT0EA),
-   REGDEF(V3D_CLE_CT1CS),
-   REGDEF(V3D_CLE_CT1CA),
-   REGDEF(V3D_CLE_CT1EA),
-
-   REGDEF(V3D_PTB_BPCA),
-   REGDEF(V3D_PTB_BPCS),
-
-   REGDEF(V3D_GMP_STATUS),
-   REGDEF(V3D_GMP_CFG),
-   REGDEF(V3D_GMP_VIO_ADDR),
-
-   REGDEF(V3D_ERR_FDBGO),
-   REGDEF(V3D_ERR_FDBGB),
-   REGDEF(V3D_ERR_FDBGS),
-   REGDEF(V3D_ERR_STAT),
+   REGDEF(33, 71, V3D_CTL_IDENT0),
+   REGDEF(33, 71, V3D_CTL_IDENT1),
+   REGDEF(33, 71, V3D_CTL_IDENT2),
+   REGDEF(33, 71, V3D_CTL_MISCCFG),
+   REGDEF(33, 71, V3D_CTL_INT_STS),
+   REGDEF(33, 71, V3D_CTL_INT_MSK_STS),
+   REGDEF(33, 71, V3D_CLE_CT0CS),
+   REGDEF(33, 71, V3D_CLE_CT0CA),
+   REGDEF(33, 71, V3D_CLE_CT0EA),
+   REGDEF(33, 71, V3D_CLE_CT1CS),
+   REGDEF(33, 71, V3D_CLE_CT1CA),
+   REGDEF(33, 71, V3D_CLE_CT1EA),
+
+   REGDEF(33, 71, V3D_PTB_BPCA),
+   REGDEF(33, 71, V3D_PTB_BPCS),
+
+   REGDEF(33, 41, V3D_GMP_STATUS),
+   REGDEF(33, 41, V3D_GMP_CFG),
+   REGDEF(33, 41, V3D_GMP_VIO_ADDR),
+
+   REGDEF(33, 71, V3D_ERR_FDBGO),
+   REGDEF(33, 71, V3D_ERR_FDBGB),
+   REGDEF(33, 71, V3D_ERR_FDBGS),
+   REGDEF(33, 71, V3D_ERR_STAT),
 };
 
 static const struct v3d_reg_def v3d_csd_reg_defs[] = {
-   REGDEF(V3D_CSD_STATUS),
-   REGDEF(V3D_CSD_CURRENT_CFG0),
-   REGDEF(V3D_CSD_CURRENT_CFG1),
-   REGDEF(V3D_CSD_CURRENT_CFG2),
-   REGDEF(V3D_CSD_CURRENT_CFG3),
-   REGDEF(V3D_CSD_CURRENT_CFG4),
-   REGDEF(V3D_CSD_CURRENT_CFG5),
-   REGDEF(V3D_CSD_CURRENT_CFG6),
+   REGDEF(41, 71, V3D_CSD_STATUS),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG0),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG1),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG2),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG3),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG4),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG5),
+   REGDEF(41, 41, V3D_CSD_CURRENT_CFG6),
+   REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG0),
+   REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG1),
+   REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG2),
+   REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG3),
+   REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG4),
+   REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG5),
+   REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG6),
+   REGDEF(71, 71, V3D_V7_CSD_CURRENT_CFG7),
 };
 
 static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
@@ -85,38 +99,37 @@ static int v3d_v3d_debugfs_regs(struct seq_file *m, void 
*unused)
int i, core;
 
 

[PATCH 0/3] V3D module changes for Pi5

2023-09-28 Thread Iago Toral Quiroga
This series includes patches to update the V3D kernel module
that drives the VideoCore VI GPU in Raspberry Pi 4 to also support
the Video Core VII iteration present in Raspberry Pi 5.

The first patch in the series addresses the bulk of the work and
involves mostly updates to register addresses. The second patch
adds a small uAPI update required for TFU jobs and the third and
final patch matches the 'brcm,2712-v3d' device string from Pi5
with the V3D driver.

The changes for the user-space driver can be found in the
corresponding Mesa MR here:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25450

Iago Toral Quiroga (3):
  drm/v3d: fix up register addresses for V3D 7.x
  drm/v3d: update UAPI to match user-space for V3D 7.x
  drm/v3d: add brcm,2712-v3d as a compatible V3D device

 drivers/gpu/drm/v3d/v3d_debugfs.c | 173 +-
 drivers/gpu/drm/v3d/v3d_drv.c |   1 +
 drivers/gpu/drm/v3d/v3d_gem.c |   3 +
 drivers/gpu/drm/v3d/v3d_irq.c |  47 
 drivers/gpu/drm/v3d/v3d_regs.h|  51 -
 drivers/gpu/drm/v3d/v3d_sched.c   |  41 ---
 include/uapi/drm/v3d_drm.h|   5 +
 7 files changed, 206 insertions(+), 115 deletions(-)

-- 
2.39.2



[PATCH 3/3] drm/v3d: add brcm,2712-v3d as a compatible V3D device

2023-09-28 Thread Iago Toral Quiroga
---
 drivers/gpu/drm/v3d/v3d_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index ffbbe9d527d3..0ed2e7ba8b33 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -186,6 +186,7 @@ static const struct drm_driver v3d_drm_driver = {
 };
 
 static const struct of_device_id v3d_of_match[] = {
+   { .compatible = "brcm,2712-v3d" },
{ .compatible = "brcm,2711-v3d" },
{ .compatible = "brcm,7268-v3d" },
{ .compatible = "brcm,7278-v3d" },
-- 
2.39.2



[PATCH v2] drm/v3d: fix wait for TMU write combiner flush

2021-09-15 Thread Iago Toral Quiroga
The hardware sets the TMUWCF bit back to 0 when the TMU write
combiner flush completes so we should be checking for that instead
of the L2TFLS bit.

v2 (Melissa Wen):
  - Add Signed-off-by and Fixes tags.
  - Change the error message for the timeout to be more clear.

Fixes spurious Vulkan CTS failures in:
dEQP-VK.binding_model.descriptorset_random.*

Fixes: d223f98f02099 ("drm/v3d: Add support for compute shader dispatch")
Signed-off-by: Iago Toral Quiroga 
Reviewed-by: Melissa Wen 
---
 drivers/gpu/drm/v3d/v3d_gem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index a3529809d547..1953706bdaeb 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -197,8 +197,8 @@ v3d_clean_caches(struct v3d_dev *v3d)
 
V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
-  V3D_L2TCACTL_L2TFLS), 100)) {
-   DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
+  V3D_L2TCACTL_TMUWCF), 100)) {
+   DRM_ERROR("Timeout waiting for TMU write combiner flush\n");
}
 
mutex_lock(>cache_clean_lock);
-- 
2.25.1



[PATCH] drm/v3d: fix wait for TMU write combiner flush

2021-09-13 Thread Iago Toral Quiroga
The hardware sets the TMUWCF bit back to 0 when the TMU write
combiner flush completes so we should be checking for that instead
of the L2TFLS bit.

Fixes spurious Vulkan CTS failures in:
dEQP-VK.binding_model.descriptorset_random.*
---
 drivers/gpu/drm/v3d/v3d_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index a3529809d547..5159f544bc16 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -197,7 +197,7 @@ v3d_clean_caches(struct v3d_dev *v3d)
 
V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
-  V3D_L2TCACTL_L2TFLS), 100)) {
+  V3D_L2TCACTL_TMUWCF), 100)) {
DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
}
 
-- 
2.25.1



[PATCH v3] drm/v3d: clean caches at the end of render jobs on request from user space

2019-09-19 Thread Iago Toral Quiroga
Extends the user space ioctl for CL submissions so it can include a request
to flush the cache once the CL execution has completed. Fixes memory
write violation messages reported by the kernel in workloads involving
shader memory writes (SSBOs, shader images, scratch, etc) which sometimes
also lead to GPU resets during Piglit and CTS workloads.

v2: if v3d_job_init() fails we need to kfree() the job instead of
v3d_job_put() it (Eric Anholt).

v3 (Eric Anholt):
  - Drop _FLAG suffix from the new flag name.
  - Add a new param so userspace can tell whether cache flushing is
implemented in the kernel.

Signed-off-by: Iago Toral Quiroga 
Reviewed-by: Eric Anholt 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20190912083516.13797-1-ito...@igalia.com
---
 drivers/gpu/drm/v3d/v3d_drv.c |  3 ++
 drivers/gpu/drm/v3d/v3d_gem.c | 54 +--
 include/uapi/drm/v3d_drm.h|  8 --
 3 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index 3506ae2723ae..e94bf75368be 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -126,6 +126,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void 
*data,
case DRM_V3D_PARAM_SUPPORTS_CSD:
args->value = v3d_has_csd(v3d);
return 0;
+   case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
+   args->value = 1;
+   return 0;
default:
DRM_DEBUG("Unknown parameter %d\n", args->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index fb32cda18ffe..4c4b59ae2c81 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -530,13 +530,16 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
struct drm_v3d_submit_cl *args = data;
struct v3d_bin_job *bin = NULL;
struct v3d_render_job *render;
+   struct v3d_job *clean_job = NULL;
+   struct v3d_job *last_job;
struct ww_acquire_ctx acquire_ctx;
int ret = 0;
 
trace_v3d_submit_cl_ioctl(>drm, args->rcl_start, args->rcl_end);
 
-   if (args->pad != 0) {
-   DRM_INFO("pad must be zero: %d\n", args->pad);
+   if (args->flags != 0 &&
+   args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
+   DRM_INFO("invalid flags: %d\n", args->flags);
return -EINVAL;
}
 
@@ -576,12 +579,31 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
bin->render = render;
}
 
-   ret = v3d_lookup_bos(dev, file_priv, >base,
+   if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) {
+   clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
+   if (!clean_job) {
+   ret = -ENOMEM;
+   goto fail;
+   }
+
+   ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0);
+   if (ret) {
+   kfree(clean_job);
+   clean_job = NULL;
+   goto fail;
+   }
+
+   last_job = clean_job;
+   } else {
+   last_job = >base;
+   }
+
+   ret = v3d_lookup_bos(dev, file_priv, last_job,
 args->bo_handles, args->bo_handle_count);
if (ret)
goto fail;
 
-   ret = v3d_lock_bo_reservations(>base, _ctx);
+   ret = v3d_lock_bo_reservations(last_job, _ctx);
if (ret)
goto fail;
 
@@ -600,28 +622,44 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
ret = v3d_push_job(v3d_priv, >base, V3D_RENDER);
if (ret)
goto fail_unreserve;
+
+   if (clean_job) {
+   struct dma_fence *render_fence =
+   dma_fence_get(render->base.done_fence);
+   ret = drm_gem_fence_array_add(_job->deps, render_fence);
+   if (ret)
+   goto fail_unreserve;
+   ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
+   if (ret)
+   goto fail_unreserve;
+   }
+
mutex_unlock(>sched_lock);
 
v3d_attach_fences_and_unlock_reservation(file_priv,
->base,
+last_job,
 _ctx,
 args->out_sync,
-render->base.done_fence);
+last_job->done_fence);
 
if (bin)
v3d_job_put(>base);
v3d_job_put(>base);
+   if (clean_job)
+   v3d_job_put(clean_job);

[PATCH v2] drm/v3d: clean caches at the end of render jobs on request from user space

2019-09-18 Thread Iago Toral Quiroga
Extends the user space ioctl for CL submissions so it can include a request
to flush the cache once the CL execution has completed. Fixes memory
write violation messages reported by the kernel in workloads involving
shader memory writes (SSBOs, shader images, scratch, etc) which sometimes
also lead to GPU resets during Piglit and CTS workloads.

v2: if v3d_job_init() fails we need to kfree() the job instead of
v3d_job_put() it (Eric Anholt).

Signed-off-by: Iago Toral Quiroga 
Reviewed-by: Eric Anholt 
Link: 
https://patchwork.freedesktop.org/patch/msgid/20190912083516.13797-1-ito...@igalia.com
---
 drivers/gpu/drm/v3d/v3d_gem.c | 54 +--
 include/uapi/drm/v3d_drm.h|  7 +++--
 2 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 5d80507b539b..d46d91346d09 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -530,13 +530,16 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
struct drm_v3d_submit_cl *args = data;
struct v3d_bin_job *bin = NULL;
struct v3d_render_job *render;
+   struct v3d_job *clean_job = NULL;
+   struct v3d_job *last_job;
struct ww_acquire_ctx acquire_ctx;
int ret = 0;
 
trace_v3d_submit_cl_ioctl(>drm, args->rcl_start, args->rcl_end);
 
-   if (args->pad != 0) {
-   DRM_INFO("pad must be zero: %d\n", args->pad);
+   if (args->flags != 0 &&
+   args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) {
+   DRM_INFO("invalid flags: %d\n", args->flags);
return -EINVAL;
}
 
@@ -575,12 +578,31 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
bin->render = render;
}
 
-   ret = v3d_lookup_bos(dev, file_priv, >base,
+   if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) {
+   clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
+   if (!clean_job) {
+   ret = -ENOMEM;
+   goto fail;
+   }
+
+   ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0);
+   if (ret) {
+   kfree(clean_job);
+   clean_job = NULL;
+   goto fail;
+   }
+
+   last_job = clean_job;
+   } else {
+   last_job = >base;
+   }
+
+   ret = v3d_lookup_bos(dev, file_priv, last_job,
 args->bo_handles, args->bo_handle_count);
if (ret)
goto fail;
 
-   ret = v3d_lock_bo_reservations(>base, _ctx);
+   ret = v3d_lock_bo_reservations(last_job, _ctx);
if (ret)
goto fail;
 
@@ -599,28 +621,44 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
ret = v3d_push_job(v3d_priv, >base, V3D_RENDER);
if (ret)
goto fail_unreserve;
+
+   if (clean_job) {
+   struct dma_fence *render_fence =
+   dma_fence_get(render->base.done_fence);
+   ret = drm_gem_fence_array_add(_job->deps, render_fence);
+   if (ret)
+   goto fail_unreserve;
+   ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
+   if (ret)
+   goto fail_unreserve;
+   }
+
mutex_unlock(>sched_lock);
 
v3d_attach_fences_and_unlock_reservation(file_priv,
->base,
+last_job,
 _ctx,
 args->out_sync,
-render->base.done_fence);
+last_job->done_fence);
 
if (bin)
v3d_job_put(>base);
v3d_job_put(>base);
+   if (clean_job)
+   v3d_job_put(clean_job);
 
return 0;
 
 fail_unreserve:
mutex_unlock(>sched_lock);
-   drm_gem_unlock_reservations(render->base.bo,
-   render->base.bo_count, _ctx);
+   drm_gem_unlock_reservations(last_job->bo,
+   last_job->bo_count, _ctx);
 fail:
if (bin)
v3d_job_put(>base);
v3d_job_put(>base);
+   if (clean_job)
+   v3d_job_put(clean_job);
 
return ret;
 }
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 58fbe48c91e9..58d2040ea48c 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -48,6 +48,8 @@ extern "C" {
 #define DRM_IOCTL_V3D_SUBMIT_TFU  DRM_IOW(DRM_COMMAND_BASE + 
DRM_V3D_SUBMIT_TFU, struct drm_v3d_su

[PATCH] drm/v3d: don't leak bin job if v3d_job_init fails.

2019-09-16 Thread Iago Toral Quiroga
If the initialization of the job fails we need to kfree() it
before returning.

Signed-off-by: Iago Toral Quiroga 
---
 drivers/gpu/drm/v3d/v3d_gem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index d46d91346d09..ed68731404a7 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -566,6 +566,7 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
ret = v3d_job_init(v3d, file_priv, >base,
   v3d_job_free, args->in_sync_bcl);
if (ret) {
+   kfree(bin);
v3d_job_put(>base);
return ret;
}
-- 
2.17.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

[PATCH] drm/v3d: clean caches at the end of render jobs on request from user space

2019-09-12 Thread Iago Toral Quiroga
Extends the user space ioctl for CL submissions so it can include a request
to flush the cache once the CL execution has completed. Fixes memory
write violation messages reported by the kernel in workloads involving
shader memory writes (SSBOs, shader images, scratch, etc) which sometimes
also lead to GPU resets during Piglit and CTS workloads.

Signed-off-by: Iago Toral Quiroga 
---
 drivers/gpu/drm/v3d/v3d_gem.c | 51 +--
 include/uapi/drm/v3d_drm.h|  7 ++---
 2 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 5d80507b539b..530fe9d9d5bd 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -530,13 +530,16 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
struct drm_v3d_submit_cl *args = data;
struct v3d_bin_job *bin = NULL;
struct v3d_render_job *render;
+   struct v3d_job *clean_job = NULL;
+   struct v3d_job *last_job;
struct ww_acquire_ctx acquire_ctx;
int ret = 0;
 
trace_v3d_submit_cl_ioctl(>drm, args->rcl_start, args->rcl_end);
 
-   if (args->pad != 0) {
-   DRM_INFO("pad must be zero: %d\n", args->pad);
+   if (args->flags != 0 &&
+   args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) {
+   DRM_INFO("invalid flags: %d\n", args->flags);
return -EINVAL;
}
 
@@ -575,12 +578,28 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
bin->render = render;
}
 
-   ret = v3d_lookup_bos(dev, file_priv, >base,
+   if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) {
+   clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
+   if (!clean_job) {
+   ret = -ENOMEM;
+   goto fail;
+   }
+
+   ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0);
+   if (ret)
+   goto fail;
+
+   last_job = clean_job;
+   } else {
+   last_job = >base;
+   }
+
+   ret = v3d_lookup_bos(dev, file_priv, last_job,
 args->bo_handles, args->bo_handle_count);
if (ret)
goto fail;
 
-   ret = v3d_lock_bo_reservations(>base, _ctx);
+   ret = v3d_lock_bo_reservations(last_job, _ctx);
if (ret)
goto fail;
 
@@ -599,28 +618,44 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
ret = v3d_push_job(v3d_priv, >base, V3D_RENDER);
if (ret)
goto fail_unreserve;
+
+   if (clean_job) {
+   struct dma_fence *render_fence =
+   dma_fence_get(render->base.done_fence);
+   ret = drm_gem_fence_array_add(_job->deps, render_fence);
+   if (ret)
+   goto fail_unreserve;
+   ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
+   if (ret)
+   goto fail_unreserve;
+   }
+
mutex_unlock(>sched_lock);
 
v3d_attach_fences_and_unlock_reservation(file_priv,
->base,
+last_job,
 _ctx,
 args->out_sync,
-render->base.done_fence);
+last_job->done_fence);
 
if (bin)
v3d_job_put(>base);
v3d_job_put(>base);
+   if (clean_job)
+   v3d_job_put(clean_job);
 
return 0;
 
 fail_unreserve:
mutex_unlock(>sched_lock);
-   drm_gem_unlock_reservations(render->base.bo,
-   render->base.bo_count, _ctx);
+   drm_gem_unlock_reservations(last_job->bo,
+   last_job->bo_count, _ctx);
 fail:
if (bin)
v3d_job_put(>base);
v3d_job_put(>base);
+   if (clean_job)
+   v3d_job_put(clean_job);
 
return ret;
 }
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 58fbe48c91e9..58d2040ea48c 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -48,6 +48,8 @@ extern "C" {
 #define DRM_IOCTL_V3D_SUBMIT_TFU  DRM_IOW(DRM_COMMAND_BASE + 
DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
 #define DRM_IOCTL_V3D_SUBMIT_CSD  DRM_IOW(DRM_COMMAND_BASE + 
DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
 
+#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG0x01
+
 /**
  * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
  * engine.
@@ -61,7 +63,7 @@ extern "C" {
  * flushed